gadfly-1.0.0/0040755000157700012320000000000007512763043012000 5ustar rjonestechgadfly-1.0.0/doc/0040755000157700012320000000000007512763043012545 5ustar rjonestechgadfly-1.0.0/doc/demo/0040755000157700012320000000000007512763043013471 5ustar rjonestechgadfly-1.0.0/doc/demo/kjParsing/0040755000157700012320000000000007512763043015421 5ustar rjonestechgadfly-1.0.0/doc/demo/kjParsing/DLispShort.py0100644000157700012320000001167707466100677020045 0ustar rjonestech# Grammar generation # for lisp lists with strings, ints, vars, print, and setq # set this variable to regenerate the grammar on each load REGENERATEONLOAD = 1 import string GRAMMARSTRING =""" Value :: ## indicates Value is the root nonterminal for the grammar @R SetqRule :: Value >> ( setq var Value ) @R ListRule :: Value >> ( ListTail @R TailFull :: ListTail >> Value ListTail @R TailEmpty :: ListTail >> ) @R Varrule :: Value >> var @R Intrule :: Value >> int @R Strrule :: Value >> str @R PrintRule :: Value >> ( print Value ) """ COMPILEDFILENAME = "TESTLispG.py" MARSHALLEDFILENAME = "TESTLispG.mar" LISPCOMMENTREGEX = ";.*" INTREGEX = "["+string.digits+"]+" STRREGEX = '"[^\n"]*"' VARREGEX = "["+string.letters+"]["+string.letters+string.digits+"]*" ### declare interpretation functions and regex's for terminals def intInterp( str ): return string.atoi(str) def stripQuotes( str ): return str[1:len(str)-1] def echo(string): return string def DeclareTerminals(Grammar): Grammar.Addterm("int", INTREGEX, intInterp) Grammar.Addterm("str", STRREGEX, stripQuotes) Grammar.Addterm("var", VARREGEX, echo) ### declare the rule reduction interpretation functions. def EchoValue( list, Context ): return list[0] def VarValue( list, Context ): varName = list[0] if Context.has_key(varName): return Context[varName] else: raise NameError, "no such lisp variable in context "+varName def NilTail( list, Context ): return [] def AddToList( list, Context ): return [ list[0] ] + list[1] def MakeList( list, Context ): return list[1] def DoSetq( list, Context): Context[ list[2] ] = list[3] return list[3] def DoPrint( list, Context ): print list[2] return list[2] def BindRules(Grammar): Grammar.Bind( "Intrule", EchoValue ) Grammar.Bind( "Strrule", EchoValue ) Grammar.Bind( "Varrule", VarValue ) Grammar.Bind( "TailEmpty", NilTail ) Grammar.Bind( "TailFull", AddToList ) Grammar.Bind( "ListRule", MakeList ) Grammar.Bind( "SetqRule", DoSetq ) Grammar.Bind( "PrintRule", DoPrint ) # This function generates the grammar and dumps it to a file. def GrammarBuild(): import kjParseBuild LispG = kjParseBuild.NullCGrammar() LispG.SetCaseSensitivity(0) # grammar is not case sensitive for keywords DeclareTerminals(LispG) LispG.Keywords("setq print") LispG.punct("().") LispG.Nonterms("Value ListTail") LispG.comments([LISPCOMMENTREGEX]) LispG.Declarerules(GRAMMARSTRING) LispG.Compile() print "dumping as python to "+COMPILEDFILENAME outfile = open(COMPILEDFILENAME, "w") LispG.Reconstruct("LispG",outfile,"GRAMMAR") outfile.close() print "dumping as binary to "+MARSHALLEDFILENAME outfile = open(MARSHALLEDFILENAME, "w") LispG.MarshalDump(outfile) outfile.close() BindRules(LispG) return LispG # this function initializes the compiled grammar from the generated file. def LoadLispG(): import TESTLispG # reload to make sure we get the most recent version! # (only needed when debugging the grammar). reload(TESTLispG) LispG = TESTLispG.GRAMMAR() DeclareTerminals(LispG) BindRules(LispG) return LispG def unMarshalLispG(): import kjParser infile = open(MARSHALLEDFILENAME, "r") LispG = kjParser.UnMarshalGram(infile) infile.close() DeclareTerminals(LispG) BindRules(LispG) return LispG ########## test the grammar generation if REGENERATEONLOAD: print "(re)generating the LispG grammar in file TESTLispG.py" Dummy = GrammarBuild() print "(re)generation done." print "loading grammar as python" LispG = LoadLispG() ### declare an initial context, and do some tests. Context = { 'x':3 } test1 = LispG.DoParse1( '()', Context) test2 = LispG.DoParse1( '(123)', Context) test3 = LispG.DoParse1( '(x)', Context) test4 = LispG.DoParse1( '" a string "', Context) test5 = LispG.DoParse1( '(setq y (1 2 3) )', Context ) test6 = LispG.DoParse1( '(SeTq x ("a string" "another" 0))', Context ) test7str = """ ; this is a lisp comment (setq abc (("a" x) ("b" (setq d 12)) ("c" y) ) ; another lisp comment ) """ test7 = LispG.DoParse1( test7str, Context) test8 = LispG.DoParse1( '(print (1 x d))', Context) print "unmarshalling the grammar" LispG2 = unMarshalLispG() ### declare an initial context, and do some tests. Context = { 'x':3 } test1 = LispG2.DoParse1( '()', Context) test2 = LispG2.DoParse1( '(123)', Context) test3 = LispG2.DoParse1( '(x)', Context) test4 = LispG2.DoParse1( '" a string "', Context) test5 = LispG2.DoParse1( '(setq y (1 2 3) )', Context ) test6 = LispG2.DoParse1( '(SeTq x ("a string" "another" 0))', Context ) test7str = """ ; this is a lisp comment (setq abc (("a" x) ("b" (setq d 12)) ("c" y) ) ; another lisp comment ) """ test7 = LispG2.DoParse1( test7str, Context) test8 = LispG2.DoParse1( '(print (1 x d))', Context) gadfly-1.0.0/doc/demo/kjParsing/DumbLispGen.py0100644000157700012320000001545007466100677020154 0ustar rjonestech # # test for kjParseBuild module automatic parser generation # # lisp lists with strings, ints, vars, and setq import string ### The string representation for the grammar. ### Since this is used only by GrammarBuild() ### it could be put in a separate file with GrammarBuild() ### to save space/load time after Grammar compilation. ### GRAMMARSTRING =""" Value :: ## indicates Value is the root nonterminal for the grammar @R SetqRule :: Value >> ( setq var Value ) @R ListRule :: Value >> ( ListTail @R TailFull :: ListTail >> Value ListTail @R TailEmpty :: ListTail >> ) @R Varrule :: Value >> var @R Intrule :: Value >> int @R Strrule :: Value >> str """ ### the name of the file in which to create the compiled ### grammar declarations COMPILEDFILENAME = "TESTLispG2.py" ### declare comment form(s) as regular expressions LISPCOMMENTREGEX = ";.*" ### declare regular expression string constants for terminals #integer terminal::::::: INTREGEX = "["+string.digits+"]+" #string terminal:::::::: STRREGEX = '"[^\n"]*"' #var terminal:::::::: VARREGEX = "["+string.letters+"]["+string.letters+string.digits+"]*" ### declare interpretation functions for terminals # int interpretation function: translates string to int: # Could use string.atoi without the extra level of indirection # but for demo purposes here it is. # def intInterp( str ): return string.atoi(str) # interpretation function for strings strips off the surrounding quotes. def stripQuotes( str ): if len(str)<2: TypeError, "string too short?" return str[1:len(str)-1] # interpretation function for vars just returns the recognized string def echo(string): return string # This function declares the nonterminals both in the # "grammar generation phase" and in loading the compiled # grammar after generation # def DeclareTerminals(Grammar): Grammar.Addterm("int", INTREGEX, intInterp) Grammar.Addterm("str", STRREGEX, stripQuotes) Grammar.Addterm("var", VARREGEX, echo) ### declare the rule reduction interpretation functions. # EchoValue() serves for Intrule and Strrule, since # we just want to echo the value returned by the # respective terminal interpretation functions. # # Parser delivers list of form [ interpreted_value ] def EchoValue( list, Context ): if len(list)!=1: raise TypeError, "this shouldn't happen! (1)" return list[0] # for Varrule interpreter must try to look up the value # in the Context dictionary # # Parser delivers list of form [ var_name ] def VarValue( list, Context ): if len(list)!=1: raise TypeError, "Huh? (2)" varName = list[0] if Context.has_key(varName): return Context[varName] else: raise NameError, "no such lisp variable in context "+varName # for an empty tail, return the empty list # # Parser delivers list of form [")"] def NilTail( list, Context ): if len(list) != 1 or list[0] != ")": return TypeError, "Bad reduction?" return [] # For a full tail, add the new element to the front of the list # # Parser delivers list of form [Value, TailValue] def AddToList( list, Context ): if len(list) !=2: return TypeError, "Bad reduction?" return [ list[0] ] + list[1] # For a list, simply return the list determined by the tail # # Parser delivers list of form ["(", TailValue ] def MakeList( list, Context ): if len(list)!=2 or list[0]!="(": raise TypeError, "Bad reduction? (3)" return list[1] # For a setq, declare a new variable in the Context dictionary # # Parser delivers list of form # ["(", "setq", varName, Value, ")"] def DoSetq( list, Context): if len(list) != 5\ or list[0] != "("\ or list[1] != "setq"\ or list[4] != ")": print list raise TypeError, "Bad reduction? (4)" VarName = list[2] if type(VarName) != type(''): raise TypeError, "Bad var name? (5)" Value = list[3] # add or set the variable in the Context dictionary Context[ VarName ] = Value return Value # This function Binds the named rules of the Grammar string to their # interpretation functions in a Grammar. # def BindRules(Grammar): Grammar.Bind( "Intrule", EchoValue ) Grammar.Bind( "Strrule", EchoValue ) Grammar.Bind( "Varrule", VarValue ) Grammar.Bind( "TailEmpty", NilTail ) Grammar.Bind( "TailFull", AddToList ) Grammar.Bind( "ListRule", MakeList ) Grammar.Bind( "SetqRule", DoSetq ) # This function generates the grammar and dumps it to a file. # Since it will be used only once (after debugging), # it probably should be put in another file save memory/load-time. # # the result returned is a Grammar Object that can be used # for testing/debugging purposes. # # (maybe this should be made into a generic function?) def GrammarBuild(): import kjParseBuild # initialize a Null compilable grammar to define LispG = kjParseBuild.NullCGrammar() # declare terminals for the grammar DeclareTerminals(LispG) # declare the keywords for the grammar # defun is not used, included here for demo purposes only LispG.Keywords("setq defun") # Declare punctuations # dot is not used here LispG.punct("().") # Declare Nonterms LispG.Nonterms("Value ListTail") # Declare comment forms LispG.comments([LISPCOMMENTREGEX]) # Declare rules LispG.Declarerules(GRAMMARSTRING) # Compile the grammar LispG.Compile() # Write the grammar to a file except for # the function bindings (which must be rebound) outfile = open(COMPILEDFILENAME, "w") LispG.Reconstruct("LispG",outfile,"GRAMMAR") outfile.close() # for debugging purposes only, bind the rules # in the generated grammar BindRules(LispG) # return the generated Grammar return LispG # this function initializes the compiled grammar from # generated file. def LoadLispG(): import TESTLispG2 # make sure we have most recent version (during debugging) reload(TESTLispG2) # evaluate the grammar function from generated file LispG = TESTLispG2.GRAMMAR() # bind the semantics functions DeclareTerminals(LispG) BindRules(LispG) return LispG ########## test grammar generation # do generation Dummy = GrammarBuild() # load the grammar from the file as LispG LispG = LoadLispG() # declare an initial context, and do some tests. Context = { "x":3 } test1 = LispG.DoParse1( "()", Context) test2 = LispG.DoParse1( "(123)", Context) test3 = LispG.DoParse1( "(x)", Context) test4 = LispG.DoParse1( '" a string "', Context) test5 = LispG.DoParse1( "(setq y (1 2 3) )", Context ) test6 = LispG.DoParse1( '(setq x ("a string" "another" 0))', Context ) test7str = """ ; this is a lisp comment (setq abc (("a" x) ("b" (setq d 12)) ("c" y) ) ; another lisp comment ) """ test7 = LispG.DoParse1( test7str, Context) gadfly-1.0.0/doc/demo/kjParsing/idl.py0100644000157700012320000002772107466100677016557 0ustar rjonestech # idl grammar # # Note, this grammar requires a special hack at the lexical # level in order to parse the fragment # # ... # case abc::def: jjj::www: whatever... # # (Yuck!) # Some would argue this is a language design flaw, but whatever... # It causes a shift/reduce problem without special handling for :: # below coloncolon is a 'fake' keyword that parses as two colons. idlgramstring = """ specification :: ## 1 @R r1a :: specification >> definition speclist @R r1b :: speclist >> specification @R r1c :: speclist >> ## 2 punct ; @R r2a :: definition >> type_dcl ; @R r2b :: definition >> const_dcl ; @R r2c :: definition >> except_dcl ; @R r2d :: definition >> interface_nt ; @R r2e :: definition >> module_nt ; ## 3 identifier=term, module=kw puncts {} @R r3 :: module_nt >> module identifier { specification } ## 4 @R r4a :: interface_nt >> interface_dcl @R r4b :: interface_nt >> forward_dcl ## 5 @R r5 :: interface_dcl >> interface_header { interface_body } ## 6 interface=kw @R r6 :: forward_dcl >> interface identifier ## 7 puncts [] @R r7 :: interface_header >> interface identifier [ inheritance_spec ] ## 8 @R r8a :: interface_body >> @R r8b :: interface_body >> export interface_body ## 9 @R r9a :: export >> type_dcl @R r9b :: export >> const_dcl @R r9c :: export >> except_dcl @R r9d :: export >> attr_dcl @R r9e :: export >> op_dcl ## 10 punct ,: @R r10a :: inheritance_spec >> : scoped_name_list @R r10b :: scoped_name_list >> scoped_name @R r10c :: scoped_name_list >> scoped_name_list , scoped_name ## 11 @R r11a :: scoped_name >> identifier @R r11b :: scoped_name >> colon_colon identifier @R r11d :: scoped_name >> scoped_name coloncolon identifier ## 12 const=kw punct = @R r12 :: const_dcl >> const const_type identifier = const_expr ## 13 @R r13a :: const_type >> integer_type @R r13b :: const_type >> char_type @R r13c :: const_type >> boolean_type @R r13d :: const_type >> floating_type @R r13e :: const_type >> string_type @R r13f :: const_type >> scoped_name ## 14 @R r14 :: const_expr >> or_expr ##15 punct | @R r15a :: or_expr >> xor_expr @R r15b :: or_expr >> or_expr | xor_expr ##16 punct ^ @R r16a :: xor_expr >> and_expr @R r16b :: xor_expr >> xor_expr ^ and_expr ##17 punct & @R r17a :: and_expr >> shift_expr @R r17b :: and_expr >> and_expr & shift_expr ##18 punct > < @R r18a :: shift_expr >> add_expr @R r18b :: shift_expr >> shift_expr > > add_expr @R r18c :: shift_expr >> shift_expr < < add_expr ##19 punct +- @R r19a :: add_expr >> mult_expr @R r19b :: add_expr >> add_expr + mult_expr @R r19c :: add_expr >> add_expr - mult_expr ##20 punct */% @R r20a :: mult_expr >> unary_expr @R r20b :: mult_expr >> mult_expr * unary_expr @R r20c :: mult_expr >> mult_expr / unary_expr @R r20d :: mult_expr >> mult_expr % unary_expr ##21 @R r21a :: unary_expr >> unary_operator primary_expr @R r21b :: unary_expr >> primary_expr ##22 @R r22a :: unary_operator >> - @R r22b :: unary_operator >> + @R r22c :: unary_operator >> ~ ##23 punct () @R r23a :: primary_expr >> scoped_name @R r23b :: primary_expr >> literal @R r23c :: primary_expr >> ( const_expr ) ##24 terms = *_literal (?) except boolean @R r24a :: literal >> integer_literal @R r24b :: literal >> string_literal @R r24c :: literal >> character_literal @R r24d :: literal >> floating_pt_literal @R r24e :: literal >> boolean_literal ##25 kw TRUE FALSE @R r25a :: boolean_literal >> TRUE @R r25b :: boolean_literal >> FALSE ##26 @R r26 :: positive_int_literal >> const_expr ##27 kw typedef @R r27a :: type_dcl >> typedef type_declarator @R r27b :: type_dcl >> struct_type @R r27c :: type_dcl >> union_type @R r27d :: type_dcl >> enum_type ##28 @R r28 :: type_declarator >> type_spec declarators ##29 @R r29a :: type_spec >> simple_type_spec @R r29b :: type_spec >> constr_type_spec ##30 @R r30a :: simple_type_spec >> base_type_spec @R r30b :: simple_type_spec >> template_type_spec @R r30c :: simple_type_spec >> scoped_name ##31 @R r31a :: base_type_spec >> floating_pt_type @R r31b :: base_type_spec >> integer_type @R r31c :: base_type_spec >> char_type @R r31d :: base_type_spec >> boolean_type @R r31e :: base_type_spec >> octet_type @R r31f :: base_type_spec >> any_type ## 32 @R r32a :: template_type_spec >> sequence_type @R r32b :: template_type_spec >> string_type ##33 @R r33a :: constr_type_spec >> struct_type @R r33b :: constr_type_spec >> union_type @R r33c :: constr_type_spec >> enum_type ##34 @R r34a :: declarators >> declarator @R r34b :: declarators >> declarators , declarator ##35 @R r35a :: declarator >> simple_declarator @R r35b :: declarator >> complex_declarator ##36 @R r36 :: simple_declarator >> identifier ##37 @R r37 :: complex_declarator >> array_declarator ##38 kw float double @R r38a :: floating_pt_type >> float @R r38b :: floating_pt_type >> double ##39 @R r39a :: integer_type >> signed_int @R r39b :: integer_type >> unsigned_int ##40 @R r40 :: signed_int >> signed_long_int @R r40 :: signed_int >> signed_short_int ##41 kw long @R r41 :: signed_long_int >> long ##42 kw short @R r42 :: signed_short_int >> short ##43 @R r43 :: unsigned_int >> unsigned_long_int @R r43 :: unsigned_int >> unsigned_short_int ##44 kw unsigned @R r44 :: unsigned_long_int >> unsigned long ##45 @R r45 :: unsigned_short_int >> unsigned short ##46 kw char @R r46 :: char_type >> char ##47 kw boolean @R r47 :: boolean_type >> boolean ##48 kw octet @R r48 :: octet_type >> octet ##49 kw any @R r49 :: any_type >> any ##50 kw struct @R r50 :: struct_type >> struct identifier { member_list } ##51 @R r51a :: member_list >> member @R r51b :: member_list >> member_list member ##52 @R r52 :: member >> type_spec declarators ; ##53 kw union switch @R r53 :: union_type >> union identifier switch ( switch_type_spec ) { switch_body } ##54 @R r54a :: switch_type_spec >> integer_type @R r54b :: switch_type_spec >> char_type @R r54c :: switch_type_spec >> boolean_type @R r54d :: switch_type_spec >> enum_type @R r54e :: switch_type_spec >> scoped_name ##55 @R r55a :: switch_body >> case_nt @R r55b :: switch_body >> switch_body case_nt ##56 @R r56a :: case_nt >> case_labels element_spec ; @R r56b :: case_labels >> case_label @R r56c :: case_labels >> case_labels case_label ##57 kw default case @R r57a :: case_label >> case const_expr : @R r57b :: case_label >> default : ##58 @R r58 :: element_spec >> type_spec declarator ##59 kw enum @R r59a :: enum_type >> enum identifier { enumerators } @R r59b :: enumerators >> enumerator @R r59c :: enumerators >> enumerators , enumerator ##60 @R r60 :: enumerator >> identifier ##61 kw sequence @R r61 :: sequence_type >> sequence < simple_type_spec , positive_int_const > ##62 kw string @R r62a :: string_type >> string < positive_int_const > @R r62b :: string_type >> string ##63 @R r63a :: array_declarator >> identifier fixed_array_sizes @R r63b :: fixed_array_sizes >> fixed_array_size @R r63c :: fixed_array_sizes >> fixed_array_sizes fixed_array_size ##64 @R r64 :: fixed_array_size >> [ positive_int_const ] ##65 kw attribute readonly @R r65a :: attr_dcl >> maybe_readonly attribute param_type_spec simple_declarators @R r65b :: maybe_readonly >> readonly @R r65c :: maybe_readonly >> @R r65d :: simple_declarators >> simple_declarator @R r65e :: simple_declarators >> simple_declarators , simple_declarator ##66 kw exception @R r66a :: except_dcl >> exception identifier { members } @R r66b :: members >> @R r66c :: members >> member_list ##67 @R r67a :: op_dcl >> maybe_op_attribute op_type_spec identifier parameter_dcls maybe_raises_expr maybe_context_expr @R r67b :: maybe_op_attribute >> @R r67c :: maybe_op_attribute >> op_attribute @R r67d :: maybe_raises_expr >> @R r67e :: maybe_raises_expr >> raises_expr @R r67f :: maybe_context_expr >> @R r67g :: maybe_context_expr >> context_expr ##68 kw oneway @R r68a :: op_attribute >> oneway ##69 kw void @R r69a :: op_type_spec >> param_type_spec @R r69b :: op_type_spec >> void ##70 @R r70a :: parameter_dcls >> ( parameterlist ) @R r70b :: parameter_dcls >> ( ) @R r70c :: parameterlist >> param_dcl @R r70d :: parameterlist >> parameterlist , param_dcl ##71 @R r71 :: param_dcl >> param_attribute param_type_spec simple_declarator ##72 kw in out inout @R r72 :: param_attribute >> in @R r72 :: param_attribute >> out @R r72 :: param_attribute >> inout ##73 kw raises @R r73 :: raises_expr >> raises ( scoped_name_list ) ##74 kw context @R r74 :: context_expr >> context ( string_literal_list ) @R r74b :: string_literal_list >> string_literal @R r74c :: string_literal_list >> string_literal_list , string_literal @R r75 :: param_type_spec >> base_type_spec @R r75 :: param_type_spec >> string_type @R r75 :: param_type_spec >> scoped_name """ nonterms = """ colon_colon param_attribute unsigned_long_int unsigned_short_int param_dcl parameterlist string_literal_list members maybe_op_attribute maybe_raises_expr maybe_context_expr op_type_spec parameter_dcls op_attribute raises_expr context_expr maybe_readonly param_type_spec simple_declarators simple_declarator fixed_array_sizes fixed_array_size element_spec enumerator enumerators switch_type_spec switch_body case_nt case_labels case_label member_list member signed_int unsigned_int signed_long_int signed_short_int simple_declarator complex_declarator array_declarator declarator sequence_type string_type floating_pt_type integer_type char_type boolean_type octet_type any_type base_type_spec template_type_spec simple_type_spec constr_type_spec type_spec declarators type_declarator struct_type union_type enum_type literal boolean_literal positive_int_literal mult_expr unary_expr unary_operator primary_expr or_expr xor_expr and_expr shift_expr add_expr integer_type char_type boolean_type floating_type string_type const_type const_expr scoped_name_list scoped_name attr_dcl op_dcl inheritance_spec export interface_header interface_body interface_dcl forward_dcl type_dcl const_dcl except_dcl interface_nt module_nt specification definition speclist """ keywords = """ exception oneway void in out inout raises context interface module const TRUE FALSE typedef float double long unsigned short char boolean octet any struct union switch enum string attribute readonly default case sequence :: """ # NOTE: FOR NECESSARY HACKERY REASONS :: IS A KEYWORD! punctuations = ";{}()[],:|^&<>+-*/%~=" # dummy regexen identifierre = "identifier" integer_literalre = "123" positive_int_constre = "999" string_literalre = "'string'" character_literalre= "'c'" floating_pt_literalre = "1.23" # dummy interp fun for all terminals def echo (str): return str def DeclareTerminals(Grammar): Grammar.Addterm("identifier", identifierre, echo) Grammar.Addterm("integer_literal", integer_literalre, echo) Grammar.Addterm("string_literal", string_literalre, echo) Grammar.Addterm("character_literal", character_literalre, echo) Grammar.Addterm("floating_pt_literal", floating_pt_literalre, echo) Grammar.Addterm("positive_int_const", positive_int_constre, echo) ## we need to override LexDictionary to recognize :: as a SINGLE punctuation. ## (not possible using standard kjParsing, requires a special override) import kjParser class myLexDictionary(kjParser.LexDictionary): def __init__(self): kjParser.LexDictionary.__init__(self) map = ((kjParser.KEYFLAG, "coloncolon"), "coloncolon") self.keywordmap["::"] = map self.keywordmap["coloncolon"] = map def Token(self, String, StartPosition): if String[StartPosition:StartPosition+2] == "::": tok = self.keywordmap["::"] return (tok, 2) # default: return kjParseBuild.LexDictionary.Token(self, String, StartPosition) # default bind all rules def GrammarBuild(): import kjParseBuild idl = kjParseBuild.NullCGrammar() idl.LexD = myLexDictionary() #idl.SetCaseSensitivity(0) # grammar is not case sensitive for keywords DeclareTerminals(idl) idl.Keywords(keywords) idl.punct(punctuations) idl.Nonterms(nonterms) #idl.comments([LISPCOMMENTREGEX]) idl.Declarerules(idlgramstring) print "now compiling" idl.Compile() return idl if __name__=="__main__": GrammarBuild() gadfly-1.0.0/doc/demo/kjParsing/pygram.py0100644000157700012320000010001307466100700017253 0ustar rjonestech# rules for python # based on grammar given in Programming Python by Mark Lutz # EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE # GRAMMAR DATA STRUCTURES. # ARCHIVE = "." marshalfilename = ARCHIVE + "/pygram.mar" pyrules = """ all :: ## input terminates with "fake" dedent (forces read of all file) @R all1 :: all >> file_input DEDENT ## 1 term newline ##@R lead_blank :: file_input >> NEWLINE file_input @R top_stmt :: file_input >> file_input stmt @R file_input :: file_input >> stmt ## 2 @R simple :: stmt >> simple_stmt @R compound :: stmt >> compound_stmt ## 3 punct ; term NEWLINE @R one_small :: simple_stmt >> small_stmt NEWLINE @R more_small :: simple_stmt >> small_stmt ; simple_stmt @R small_semi :: simple_stmt >> small_stmt ; NEWLINE ## 4 kw pass @R smexpr :: small_stmt >> expr_stmt @R smassn :: small_stmt >> assn @R smprint :: small_stmt >> print_stmt @R smdel :: small_stmt >> del_stmt @R smpass :: small_stmt >> pass @R smflow :: small_stmt >> flow_stmt @R smimport :: small_stmt >> import_stmt @R smglobal :: small_stmt >> global_stmt ## access ignored @R smexec :: small_stmt >> exec_stmt ## 5 @R cmif :: compound_stmt >> if_stmt @R cmwhile :: compound_stmt >> while_stmt @R cmfor :: compound_stmt >> for_stmt @R cmtry :: compound_stmt >> try_stmt @R cmdef :: compound_stmt >> funcdef @R cmclass :: compound_stmt >> classdef ##6 @R exprlist :: expr_stmt >> testlist ##@R assignment :: expr_stmt >> assn @R assn1 :: assn >> testlist = testlist @R assnn :: assn >> testlist = assn @R assn1c :: assn >> testlist , = testlist @R assn1c2 :: assn >> testlist , = testlist , @R assnnc :: assn >> testlist , = assn ##testing @R exprassn :: expr_stmt >> expr_stmt = testlist @R exprlistc :: expr_stmt >> testlist , ##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist , ##7 kw print @R rprint0 :: print_stmt >> print @R rprint :: print_stmt >> print testlist @R rprintc :: print_stmt >> print testlist , ##8 kw del @R rdel :: del_stmt >> del exprlist ##9 trivially handled in #4 ##10 kw raise continue break return ## eliminates 11 12 13 14 @R rbreak :: flow_stmt >> break @R rcontinue :: flow_stmt >> continue @R rreturn0 :: flow_stmt >> return @R rreturn :: flow_stmt >> return testlist @R rreturnc :: flow_stmt >> return testlist , @R rraise1 :: flow_stmt >> raise test @R rraise2 :: flow_stmt >> raise test , test @R rraise3 :: flow_stmt >> raise test , test , test ## 11 12 13 14 skipped ## 15 kw import from @R rimport :: import_stmt >> import dotted_name_list @R rimportc :: import_stmt >> import dotted_name_list , @R dnlist1 :: dotted_name_list >> dotted_name @R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name @R rfrom :: import_stmt >> from dotted_name import name_list @R rfroms :: import_stmt >> from dotted_name import * @R rfromc :: import_stmt >> from dotted_name import name_list , @R nlistn :: name_list >> name_list , NAME @R nlist1 :: name_list >> NAME ##16 nt NAME @R dn1 :: dotted_name >> NAME @R dnn :: dotted_name >> dotted_name . NAME ##17 kw global @R global1 :: global_stmt >> global NAME @R globaln :: global_stmt >> global_stmt , NAME ## 18 19 ignored ##20 kw exec in @R exec1 :: exec_stmt >> exec expr @R exec2 :: exec_stmt >> exec expr in test @R exec3 :: exec_stmt >> exec expr in test , test ##21 kw if elif else punct : @R ifr :: if_stmt >> if test : suite elifs @R elifs0 :: elifs >> @R relse :: elifs >> else : suite @R elifsn :: elifs >> elif test : suite elifs ##22 kw while @R while1 :: while_stmt >> while test : suite @R while2 :: while_stmt >> while test : suite else : suite ##23 kw for @R for1 :: for_stmt >> for exprlist in testlist : suite @R for2 :: for_stmt >> for exprlist in testlist : suite else : suite ##24 kw try @R tryr :: try_stmt >> try : suite excepts @R excepts1 :: excepts >> except_clause : suite @R excepts2 :: excepts >> except_clause : suite else : suite @R exceptsn :: excepts >> except_clause : suite excepts @R tryf :: try_stmt >> try : suite finally : suite ##25 kw except @R except0 :: except_clause >> except @R except1 :: except_clause >> except test @R except2 :: except_clause >> except test , test ##26 @R class1 :: classdef >> class NAME : suite @R class2 :: classdef >> class NAME ( testlist ) : suite ##27 kw def @R rdef :: funcdef >> def NAME parameters : suite ##28, 29 punct = * ## (modified from grammar presented) @R params1 :: parameters >> ( varargslist ) @R params1c :: parameters >> ( varargslist , ) @R params2 :: varargslist >> ## this is way too permissive: fix at semantic level @R params3 :: varargslist >> arg @R params4 :: varargslist >> varargslist , arg @R argd :: arg >> NAME = test @R arg2 :: arg >> fpdef @R arg3 :: arg >> * NAME @R arg4 :: arg >> ** NAME ## 30 @R fpdef1 :: fpdef >> NAME @R fpdef2 :: fpdef >> ( fplist ) @R fpdef2c :: fpdef >> ( fplist , ) ##31 @R fplist1 :: fplist >> fpdef @R fplistn :: fplist >> fplist , fpdef ##32 t INDENT DEDENT @R ssuite :: suite >> simple_stmt @R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT @R stmtseq1 :: stmtseq >> stmt @R stmtseqn :: stmtseq >> stmtseq stmt ##33 kw or cancels 53 @R testor :: test >> or_test @R testand :: or_test >> and_test @R testor1 :: or_test >> or_test or and_test ## @R testlambda0 :: test >> lambda : test REDUNDANT @R testlambda1 :: test >> lambda varargslist : test ##34 kw and @R andnot :: and_test >> not_test @R andand :: and_test >> and_test and not_test ##35 kw not @R notnot :: not_test >> not not_test @R notcmp :: not_test >> comparison ##36 NOTE KWS == >= <= <> != @R cmpexpr :: comparison >> expr @R cmplt :: comparison >> comparison < expr @R cmpgt :: comparison >> comparison > expr @R cmpeq :: comparison >> comparison == expr @R cmpge :: comparison >> comparison >= expr @R cmple :: comparison >> comparison <= expr @R cmpnep :: comparison >> comparison <> expr @R cmpne :: comparison >> comparison != expr @R cmpin :: comparison >> comparison in expr @R cmpnotin :: comparison >> comparison not in expr @R cmpis :: comparison >> comparison is expr @R cmpisnot :: comparison >> comparison is not expr ##37 kw is not punct > < ! (eliminated) ##38 p | @R expr_xor :: expr >> xor_expr @R expr_lor :: expr >> expr | xor_expr ##39 p ^ @R xor_and :: xor_expr >> and_expr @R xor_xor :: xor_expr >> xor_expr ^ and_expr ##40 @R and_shift :: and_expr >> shift_expr @R and_and :: and_expr >> and_expr & shift_expr ##41 note kw's << >x> note goofy x to avoid confusing the grammar @R shift_arith :: shift_expr >> arith_expr @R shift_left :: shift_expr >> shift_expr << arith_expr @R shift_right :: shift_expr >> shift_expr >x> arith_expr ##42 @R arith_term :: arith_expr >> term @R arith_plus :: arith_expr >> arith_expr + term @R arith_minus :: arith_expr >> arith_expr - term ##43 p */% @R termfactor :: term >> factor @R termmul :: term >> term * factor @R termdiv :: term >> term / factor @R termmod :: term >> term % factor ## stuff for power @R factorpower :: factor >> power @R factorexp :: factor >> factor ** power ##44 p ~ @R powera :: power >> atom trailerlist @R trailerlist0 :: trailerlist >> @R trailerlistn :: trailerlist >> trailer trailerlist @R powerp :: power >> + power @R powerm :: power >> - power @R poweri :: power >> ~ power ##45 t NUMBER STRING @R nulltup :: atom >> ( ) @R parens :: atom >> ( testlist ) @R parensc :: atom >> ( testlist , ) @R nulllist :: atom >> [ ] @R list :: atom >> [ testlist ] @R listc :: atom >> [ testlist , ] @R nulldict :: atom >> { } @R dict :: atom >> { dictmaker } @R dictc :: atom >> { dictmaker , } @R repr :: atom >> ` testlist ` ## @R reprc :: atom >> ` testlist , ` doesn't work, apparently @R aname :: atom >> NAME ## note number to be broken out into FLOAT OCTINT HEXINT INT @R anumber :: atom >> NUMBER @R astring :: atom >> stringseq @R stringseq0 :: stringseq >> STRING @R stringseqn :: stringseq >> stringseq STRING ##46 @R nullcall :: trailer >> ( ) @R call :: trailer >> ( arglist ) @R callc :: trailer >> ( arglist , ) @R index :: trailer >> [ subscriptdots ] @R getattr :: trailer >> . NAME ##47 @R arg1 :: arglist >> argument @R argn :: arglist >> arglist , argument ##@R argn1 :: arglist >> arglist , NAME = test ##48 ( !!!! is this wrong in PP?) @R posarg :: argument >> test ## here the left test should be a NAME always, but parser doesn't like it @R namearg :: argument >> test = test ##49 this IS wrong in PP (numeric ext) @R nodots :: subscriptdots >> subscriptseq @R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq @R subscript1 :: subscriptseq >> subscript @R subscriptn :: subscriptseq >> subscriptseq , subscript @R subscriptt :: subscript >> test @R subscripts0 :: subscript >> : @R subscriptsL :: subscript >> test : @R subscriptsR :: subscript >> : test @R subscripts :: subscript >> test : test ##50 @R exprlist1 :: exprlist >> expr @R exprlistn :: exprlist >> exprlist , expr ##51 @R testlist0 :: testlist >> test @R testlistn :: testlist >> testlist , test ##52 @R dictmaker1 :: dictmaker >> test : test @R dictmaker2 :: dictmaker >> dictmaker , test : test """ nonterms = """ subscriptdots subscript arg argument arglist subscriptseq params trailerlist factor atom trailer dictmaker stringseq power xor_expr and_expr shift_expr arith_expr term and_test or_test not_test comparison comp_op expr fplist stmtseq varargslist assn expr elifs suite excepts parameters pbasic pdefault pspecial testlist exprlist test dotted_name_list dotted_name name_list if_stmt while_stmt for_stmt try_stmt funcdef classdef expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt small_stmt compound_stmt stmt simple_stmt exec_stmt file_input except_clause fpdef cmp_op all """ import string # python needs special handling for the lexical stuff NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*" NUMBERre = "[" + string.digits + "]+" # temporary! STRINGre = '"[^"\n]*"' # to be overridden in lexdict #NEWLINEre = "\n" # to be overridden in lexdict INDENTre = "#" # a fake! to be overridden DEDENTre = "#" # a fake! to be overridden def echo(str): return str def DeclareTerminals(Grammar): Grammar.Addterm("NAME", NAMEre, echo) Grammar.Addterm("NUMBER", NUMBERre, echo) Grammar.Addterm("STRING", STRINGre, echo) #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw! Grammar.Addterm("INDENT", INDENTre, echo) Grammar.Addterm("DEDENT", DEDENTre, echo) # not >x> is a fake! keywords = """ and break class continue def del elif else except exec finally for from global if import in is lambda not or pass print raise return try while == >= <= <> != >x> << NEWLINE ** """ import kjParser, string, re from kjParser import KEYFLAG, ENDOFFILETERM alphanumunder = string.letters+string.digits+"_" alpha = string.letters + "_" # components that are part of a identifier (cannot be next to kw). id_letters = map(None, alphanumunder) # terminator re for names nametermre = "[^" + alphanumunder + "]" nameterm = re.compile(nametermre) # terminator re for numbers (same as above but allow "." in num). numtermre = "[^" + alphanumunder + "\.]" numterm = re.compile(numtermre) parseerror = "parseerror" pycommentre = r"(#.*)" # whitespace regex outside of brackets # white followed by (comment\n maybe repeated) # DON'T EAT NEWLINE!! pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre pywhiteout = re.compile(pywhiteoutre) # whitespace regex inside brackets # white or newline possibly followed by comment, all maybe repeated pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre pywhitein = re.compile(pywhiteinre) # totally blank lines (only recognize if next char is newline) #allblankre = "\n" + pywhiteinre #allblank = re.compile(allblankre) # re for indentation (might accept empty string) indentp = re.compile(r"[\t ]*") # two char kws and puncts char2kw = ["if", "or", "in", "is"] punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="] # >two char kws as map of first 3 chars to others char3k_data = """ and break class continue def del elif else except finally for from global import lambda not pass print raise return try while exec """ char3kw = string.split(char3k_data) char3kwdict = {} for x in char3kw: char3kwdict[x[:3]] = x # NOTE: newline is treated same as a punctuation # NOTE: "' ARE NOT PUNCTS punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`" punctlist = map(None, punct) kwmap = {} for x in char2kw + punct2 + char3kw + map(None, punct): # everything parses as length 1 to the outer world. kwmap[x] = (((KEYFLAG, x), x), 1) # special hack kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1) newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1) #finaldedent = (((TERMFLAG, "DEDENT"), ""), 1) # Python lexical dictionary. ### MUST HANDLE WHOLELY BLANK LINES CORRECTLY! def RMATCH(re, key, start=0): group = re.match(key, start) if group is None: return -1 return group.end() - group.start() class pylexdict(kjParser.LexDictionary): def __init__(self): kjParser.LexDictionary.__init__(self) # need to add special map for >> self.brackets = 0 # count of active brackets self.realindex = 0 # where to start self.indents = [""] # stack of indents (start with a fake one) self.lineno = 0 self.atdedent = 0 ### handle multiple dedents correctly!!! ### translate tabs to 8 spaces... from kjParser import TERMFLAG self.NAMEflag = (TERMFLAG, "NAME") self.STRINGflag = (TERMFLAG, "STRING") self.NEWLINEflag = (TERMFLAG, "NEWLINE") self.INDENTflag = (TERMFLAG, "INDENT") self.DEDENTflag = (TERMFLAG, "DEDENT") self.NUMBERflag = (TERMFLAG, "NUMBER") def endoffile(self, String): # pop off all indentations! indents = self.indents #lastresult = self.lastresult self.realindex = len(String) if not indents: # pop indents #print "eof after dedent" result = self.lastresult = (ENDOFFILETERM, 0) else: #print "eof as dedent after", self.lastresult del indents[-1] if indents: dedent = indents[-1] else: dedent = "" result = self.lastresult = ((self.DEDENTflag, dedent), 1) #print "returning eof", result, "after", lastresult return result def Token(self, String, StartPosition): #print "Token", (StartPosition, # `String[self.realindex:self.realindex+20]`, self.lastresult) # HAVE TO FAKE OUT LEXER FOR DEDENTS # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION # STRING POSITION IS MAINTAINED IN LexDict object. lastindex = self.lastindex lastresult = self.lastresult if self.laststring is not String: #print "parsing new string" self.laststring = String # special hack: skip lead whitespace cursor = 0 self.lineno = 1 while 1: test = RMATCH(pywhitein,String, cursor) if test<0: break next = cursor + test #print "lead skip:", next, String[cursor:next] if String[next]!="\n": break #skipped = String[cursor:next] #if "\n" in skipped: # self.lineno = ( # self.lineno + len(string.splitfields(skipped, "\n"))) #self.lineno = self.lineno+1 cursor = next + 1 self.realindex = cursor self.saveindex = 0 self.indents = [""] # stack of indents (start with a fake one) # pretend we saw a newline self.lastresult = newlineresult if StartPosition!=0: self.laststring = None raise ValueError, "python lexical parsing must start at zero" lastindex = self.lastindex lastresult = None elif lastindex == StartPosition: #print "returning lastresult ", lastresult return lastresult elif lastindex != StartPosition-1: raise ValueError, "python lexer can't skip tokens" #print "parsing", StartPosition, lastresult # do newline counting here! delta = String[self.saveindex: self.realindex] #print "delta", `delta` if "\n" in delta: #print self.lineno, self.saveindex, self.realindex, `delta` self.lineno = self.lineno + len( string.splitfields(delta, "\n")) - 1 realindex = self.saveindex = self.realindex self.lastindex = StartPosition # skip whitespace (including comments) ### needs to be improved to parse blank lines, count line numbers... # skip all totally blank lines (don't eat last newline) atlineend = (String[realindex:realindex+1] == "\n" or lastresult is newlineresult or self.atdedent) skipnewlines = (lastresult is newlineresult or self.atdedent or self.brackets>0) if atlineend: #String[realindex:realindex+1]=="\n": #print "trying to skip blank lines", String[realindex:realindex+10] while 1: #if String[realindex:realindex+1]=="\n": # start = realindex+1 # move past current newline # self.lineno = self.lineno + 1 #else: # start = realindex start = realindex if skipnewlines: while String[start:start+1]=="\n": start = start+1 #self.lineno = self.lineno+1 #print "matching", `String[start:start+10]` skip = RMATCH(pywhitein,String, start) #print "skip=", skip if skip<0: break rs = skip + realindex + (start-realindex) if rs==realindex: break #print "at", rs, `String[rs]` if (rs0: # rs = rs + 1 #skipped = String[start:rs] #if "\n" in skipped: #self.lineno = self.lineno + len( # string.splitfields(skipped, "\n")) self.realindex = realindex = rs #self.lineno = self.lineno+1 else: if skipnewlines: self.realindex = realindex = start break #print "after skipping blank lines", `String[realindex:realindex+20]` skipto = realindex skip = 0 if self.brackets>0: while 1: #print "skipping white in brackets", skipto if realindex>len(String): break if String[skipto]=="\n": #self.lineno = self.lineno+1 skipto = skipto + 1 self.realindex = realindex = skipto continue skip = RMATCH(pywhiteout,String, skipto) nextskipto = skipto+skip #skipped = String[skipto:nextskipto] #if "\n" in skipped: # self.lineno = self.lineno+len( # string.splitfields(skipped, "\n")) if skip>0: skipto = nextskipto else: break skip = skipto - realindex elif not atlineend: skip = RMATCH(pywhitein,String, realindex) if skip<=0: skip = 0 else: #print "skipping", skip nextri = realindex + skip #skipped = String[realindex:nextri] #if "\n" in skipped: # self.lineno = self.lineno + len( # string.splitfields(skipped, "\n")) realindex = self.realindex = nextri if realindex>=len(String): return self.endoffile(String) # now look for a keyword, name, number, punctuation, # INDENT, DEDENT, NEWLINE first = String[realindex] #if last parse was newline and not in brackets: # look for indent/dedent if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent) and first != "\n"): #print "looking for dent", realindex, `String[realindex:realindex+20]` match = RMATCH(indentp,String, realindex) if match>=0: dent = String[realindex: realindex+match] #print "dent match", match, `dent` oldindex = realindex self.realindex = realindex = realindex+match # replace tabs with 8 spaces dent = string.joinfields(string.splitfields(dent, "\t"), " ") dents = self.indents lastdent = dents[-1] ldl = len(lastdent) dl = len(dent) #print "last", ldl, dents if ldldl: self.realindex = oldindex # back up, may have to see it again! self.atdedent = 1 result = self.lastresult = ((self.DEDENTflag, dent), 1) del dents[-1] #print "dedent ", result, dl, dents return result # otherwise, indentation is same, keep looking # might be at eof now: if realindex>=len(String): #print "returning eof" return self.endoffile(String) first = String[realindex] self.atdedent = 0 from string import digits #, letters if (first in punctlist and # special case for .123 numbers (yuck!) (first!="." or String[realindex+1] not in digits)): # is it a 2 char punct? first2 = String[realindex:realindex+2] if first2 in punct2: result = self.lastresult = kwmap[first2] self.realindex = realindex+2 #print "2 digit punct", result return result # otherwise, just return normal punct result = self.lastresult = kwmap[first] self.realindex = self.realindex + 1 ### special bookkeeping if first=="\n": result = newlineresult #print "newline!" #self.lineno = self.lineno+1 elif first in "[{(": #print "bracket!" self.brackets = self.brackets + 1 elif first in "]})": #print "close bracket!" self.brackets = self.brackets - 1 #print "1 digit punct", result return result if first in digits or first==".": # parse a number... skip = numterm.search(String, realindex) if skip<=realindex: raise parseerror, "number length<1 (!)" thenumber = String[realindex:skip] self.realindex = skip ### note don't interpret number here!! result = self.lastresult = ((self.NUMBERflag, thenumber), 1) #print "number", result return result if first in alpha: # try keyword... first2 = String[realindex: realindex+2] if first2 in char2kw: if String[realindex+2:realindex+3] not in id_letters: # parse a 2 char kw first2 result = self.lastresult = kwmap[first2] self.realindex = self.realindex+2 #print "keyword 2", result return result first3 = String[realindex: realindex+3] if char3kwdict.has_key(first3): the_kw = char3kwdict[first3] the_end = realindex+len(the_kw) if ((the_endsanity: raise parseerror, "failed to terminate single quotes" if String[last-1:last]=="\\": # are we at the end of an odd number of backslashes? (yuck!) bplace = last-1 while String[bplace:bplace+1]=="\\": bplace = bplace-1 if (last-bplace)%2==1: break # the end quote is real! place = last+1 else: break the_string = String[start:last] self.realindex = last+1 result = self.lastresult = ((self.STRINGflag, the_string), 1) #print "1q string", result return result #print (String[realindex-20:realindex-1], String[realindex], # String[realindex+1:realindex+20]) raise parseerror, "invalid first: " + `first` # use a modified lexstringwalker class pylexstringwalker(kjParser.LexStringWalker): def DUMP(self): kjParser.DumpStringWindow(self.String, self.LexDict.realindex) ## a HORRIBLE HACK! of a hack: override the DoParse of Grammar ## to give Python line numbers. RELIES ON GLOBAL pyg ## def hackDoParse(String, Context=None, DoReductions=1): import sys, kjParser try: # construct the ParserObj # add a newline to front to avoid problem with leading comment #String = "\n%s\n" % String Stream = pylexstringwalker( String, pyg.LexD ) Stack = [] # {-1:0} #Walkers.SimpleStack() ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \ DoReductions, Context ) # do the parse ParseResult = ParseOb.GO() # return final result of reduction and the context return (ParseResult[1], Context) #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions) except: ### for testing!! t, v = sys.exc_type, sys.exc_value v = ("near line", pyg.LexD.lineno, v) raise t, v buildinfo = """ Please edit the ARCHIVE parameter of this module (%s) to place the python grammar archive in a standard directory to prevent the module from rebuilding the python grammar over and over and over... """ % __name__ def GrammarBuild(): global pyg import kjParseBuild pyg = kjParseBuild.NullCGrammar() pyg.DoParse = hackDoParse # override lexical dict here pyg.LexD = pylexdict() DeclareTerminals(pyg) pyg.Keywords(keywords) pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}") pyg.Nonterms(nonterms) pyg.Declarerules(pyrules) print buildinfo print "compiling... this may take a while..." pyg.Compile() print "dumping" outfile = open(marshalfilename, "wb") pyg.MarshalDump(outfile) outfile.close() print "self testing the grammar" test(pyg) print "\n\ndone with regeneration" return pyg def unMarshalpygram(): global pyg import kjParser print "loading" try: infile = open(marshalfilename, "rb") except IOError: print marshalfilename, "not found, attempting creation" pyg = GrammarBuild() else: pyg = kjParser.UnMarshalGram(infile) infile.close() pyg.DoParse = hackDoParse # lexical override pyg.LexD = pylexdict() DeclareTerminals(pyg) # BindRules(pyg) if dotest: print "self testing the grammar" test(pyg) return pyg # not used, commented #### interpretation rules/classes # #def zeroth(list, Context): # return list[0] # eg, for all1, ignore all but first # ## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob #def append(list, Context): # "eg, for top_stmt, conjoin two smt lists" # return list[0] + list[1] # ## file_input >zeroth # ## simple, compound, one_small, small_semi: echol #def echol(list, Context): # return list # ## more_small > seq_sep #def seq_sep(list, Context): # list[0].append(list[2]) # return list[0] # ## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec ## > zeroth # ## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth # # #def BindRules(pyg): # for name in string.split(""" # all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass # smexpr smassn smprint smdel smflow smimport smglobal smexec # """): # pyg.Bind(name, zeroth) # for name in string.split(""" # simple compound one_small small_semi # """): # pyg.Bind(name, echol) # pyg.Bind("top_stmt", append) # pyg.Bind("more_small", seq_sep) teststring = """# # # a test string # from string import join, split ''' import re for a in l: a.attr, a[x], b = c else: d = b ''' class zzz: ''' #doc string ''' ''' global re, join d = {} for i in range(10): d[i] = i ''' def test(c,s): return "this" while not done: print done break list = [1,2,3] # comment return 5 n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8 if x==5: while y: for i in range(6): raise SystemError, "oops" """ #teststring ="""\ ## comment #if x in y: print z #elif 1: print w #""" ''' teststring=""" exec "print 1" """ ''' def test(grammar, context=None, teststring=teststring): from time import time now = time() x = grammar.DoParse1(teststring, context) elapsed = time()-now print x print elapsed return x regen = 0 dotest = 0 if __name__ == "__main__" : if regen: GrammarBuild() unMarshalpygram() gadfly-1.0.0/doc/demo/kjParsing/pylint.py0100644000157700012320000003644307466100700017312 0ustar rjonestech#!/usr/local/bin/python """python lint using kwParsing The goal of this module/filter is to help find programming errors in python source files. As a filter use thusly: % python kjpylint.py source_file.py As an internal tool use like this: import kjpylint (pyg, context) = kjpylint.setup() kjpylint.lint(data, pyg, context) where data is the text of a python program. You can build your own context structure by subclassing GlobalContext, and redefining GlobalContext.complain(string) for example. You could do a lot more than that too... Also, to lint all *.py files recursively contained in a directory hierarchy use kjpylint.lintdir("/usr/local/lib/python") # for example FEATURES: Lint expects 1) a newline or two at the end of the data; 2) consistent indenting (and inconsistency may be invisible) [eg " \t" and "\t" are not the same indent to Lint, but Python sees them the same.] If (1) or (2) are not satisfied Lint will raise an exception. Buglets: lambdas and for loops on one line generate extraneous warnings. Notes: ====== The lint process works, in outline, like this. Scan over a python program x = 1 def f(a): a = x d.x, y = b z = w and build annotations like [ set("x", 1), [ get("x", 4) set("a", 4) get("b", 5) get("d", 5) set("y", 5) pop_local() ] get("w", 7) set("z", 7) ] from this stream conclude warning on line 5: b used before set warning on line 5: d used before set warning on line 5: y set, never used etc. using simple one pass approximate flow analysis. """ pyg = context = None #import pygram from pygram import newlineresult # reduction rules: # only need to consider # expressions, assignments, def, class, global, import, from, for # # expressions return a list of unqualified names, not known set # qualified names are automatically put in context as refs # # assignments set left names, ref right names # # def sets new name for function and args, # refs other names # # class adds new name for class # refs other names # # global forces global interpretation for name # # import adds FIRST names # from sets names # for sets names # # related rules # ASSIGNMENT REQUIRES SPECIAL TREATMENT #@R assn1 :: assn >> testlist = testlist def assn1(list, context): [t1, e, t2] = list return assn(t1, t2) #@R assnn :: assn >> testlist = assn def assnn(list, context): [t1, e, a1] = list return assn(t1, a1) # @R assn1c :: assn >> testlist , = testlist def assn1c(list, context): [t1, c, e, t2] = list return assn(t1, t2) # @R assn1c2 :: assn >> testlist , = testlist , def assn1c2(list, context): del list[-1] return assn1c(list, context) # @R assnnc :: assn >> testlist , = assn def assnnc(list, context): return assn1c(list, context) def assn(left, right): result = right for x in left: (ln, ri, op, name) = x if op == "ref": result.append( (ln, ri, "set", name) ) else: result.append(x) return result #@R except2 :: except_clause >> except test , test def except2(list, context): [e, t1, c, t2] = list result = t1 for (ln, ri, op, name) in t2: result.append( (ln, ri, "set", name) ) return result #@R smassn :: small_stmt >> assn # ignored #@R rfrom :: import_stmt >> from dotted_name import name_list #@R rfromc :: import_stmt >> from dotted_name import name_list , def rfrom(list, context): #print rfrom, list [f, d, i, n] = list # ignore d return n def rfromc(list, context): return rfrom(list[:-1]) def mark(kind, thing, context): L = context.LexD lineno = L.lineno # are we reducing on a newline? if L.lastresult==newlineresult: lineno = lineno-1 return (lineno, -L.realindex, kind, thing) #@R dn1 :: dotted_name >> NAME def dn1(list, context): #print "dn1", list #L = context.LexD return [ mark("set", list[0], context) ] #return [ (L.lineno, -L.realindex, "set", list[0]) ] # handles import case, make name set local #@R nlistn :: name_list >> name_list , NAME def nlistn(list, context): #print "nlistn", list [nl, c, n] = list #L = context.LexD #nl.append( (L.lineno, -L.realindex, "set", n) ) nl.append( mark("set", n, context) ) return nl #@R nlist1 :: name_list >> NAME def nlist1(list, context): #print "nlist1", list #L = context.LexD #return [ (L.lineno, -L.realindex, "set", list[0]) ] return [ mark("set", list[0], context) ] # ignore lhs in calls with keywords. #@R namearg :: argument >> test = test def namearg(list, context): [t1, e, t2] = list return t2 # handles from case, make names set local #@R global1 :: global_stmt >> global NAME def global1(list, context): #print "global1", list #L = context.LexD #return [ (L.lineno, -L.realindex, "global", list[1]) ] return [ mark("global", list[1], context) ] #@R globaln :: global_stmt >> global_stmt , NAME # handles global, make names global (not set or reffed) def globaln(list, context): #print "globaln", list [g, c, n] = list #L = context.LexD #g.append( (L.lineno, -L.realindex, "global", n) ) g.append( mark("global", n, context) ) return g #@R for1 :: for_stmt >> #for exprlist in testlist : # suite def for1(list, context): #print "for1", list [f, e, i, t, c, s] = list refs = t + s return assn(e, refs) #@R for2 :: for_stmt >> #for exprlist in testlist : # suite #else : # suite def for2(list,context): #print "for2", list [f, e, i, t, c1, s1, el, c2, s2] = list refs = t + s1 + s2 return assn(e, refs) ### #@R class1 :: classdef >> class NAME : suite def class1(list, context): [c, n, cl, s] = list return Class(n, [], s, context) #@R class2 :: classdef >> class NAME ( testlist ) : suite def class2(list, context): [c, n, opn, t, cls, cl, s] = list return Class(n, t, s, context) def Class(name, testlist, suite, context): globals = analyse_scope(name, suite, context, unused_ok=1) context.defer_globals(globals) result = testlist L = context.LexD # try to correct lineno lineno = L.lineno realindex = L.realindex for (ln, ri, op, n) in testlist+suite: lineno = min(lineno, ln) result.append((lineno, -realindex, "set", name)) #result.append( mark("set", name, context) ) # supress complaints about unreffed classes result.append((lineno+1, -realindex, "qref", name)) #result.append( mark("qref", name, context) ) return result # vararsglist requires special treatment. # return (innerscope, outerscope) pair of lists # @R params1 :: parameters >> ( varargslist ) def params1(l, c): return l[1] params1c = params1 #@R params2 :: varargslist >> def params2(l, c): return ([], []) #@R params3 :: varargslist >> arg def params3(l, c): return l[0] #@R params4 :: varargslist >> varargslist , arg def params4(l, c): #print "params4", l [v, c, a] = l v[0][0:0] = a[0] v[1][0:0] = a[1] return v #@R argd :: arg >> NAME = test def argd(l, c): [n, e, t] = l #L = c.LexD #return ([(L.lineno, -L.realindex, "set", n)], t) return ([ mark("set", n, c) ], t) #@R arg2 :: arg >> fpdef def arg2(l, c): return l[0] #@R arg3 :: arg >> * NAME def arg3(l, c): del l[0] return fpdef1(l, c) #@R arg4 :: arg >> ** NAME def arg4(l, c): #del l[0] return arg3(l, c) #@R fpdef1 :: fpdef >> NAME def fpdef1(l, c): [n] = l #LexD = c.LexD return ([ mark("set", n, c) ], []) #@R fpdef2 :: fpdef >> ( fplist ) def fpdef2(l, c): return l[1] ## @R fpdef2c :: fpdef >> ( fplist , ) #fpdef2c = fpdef2 ##31 #@R fplist1 :: fplist >> fpdef def fplist1(l, c): #print l return l[0] #@R fplistn :: fplist >> fplist , fpdef fplistn = params4 #@R rdef :: funcdef >> def NAME parameters : suite def rdef(list, context): #print "rdef", list [ddef, name, parameters, c, suite] = list (l, g) = parameters globals = analyse_scope(name, l + suite, context) # for embedded function defs global internal refs must be deferred. context.defer_globals(globals) result = g L = context.LexD # try to steal a lineno from other declarations: lineno = L.lineno index = L.realindex for (ln, ri, op, n) in l+g+suite: lineno = min(lineno, ln) if name is not None: result.append((lineno, -index, "set", name)) # Note: this is to prevent complaints about unreffed functions result.append((lineno+1, -index, "qref", name)) return result #@R testlambda1 :: test >> lambda varargslist : test def testlambda1(list, context): [l, v, c, t] = list return rdef(["def", None, v, ":", t], context) def analyse_scope(sname, var_accesses, context, unused_ok=0): var_accesses.sort() result = [] globals = {} locals = {} # scan for globals for x in var_accesses: (ln, ri, op, name) = x if op == "global": globals[name] = ln #result.append(x) (ignore global sets in local context) # scan for locals for (ln, ri, op, name) in var_accesses: if op == "set" and not locals.has_key(name): if globals.has_key(name): context.complain( "Warning: set of global %s in local context %s" % (`name`, `sname`)) result.append( (ln, ri, op, name) ) pass # ignore global set in local context else: locals[name] = [ln, 0] # line assigned, #refs # scan for use before assign, etc. for x in var_accesses: (ln, ri, op, name) = x if locals.has_key(name): if op in ["ref", "qref"]: set = locals[name] set[1] = set[1] + 1 assnln = set[0] if (ln <= assnln): context.complain( "(%s) local %s ref at %s before assign at %s" % ( sname, `name`, ln, `assnln`)) elif op not in ("global", "set"): # ignore global sets in local context. result.append(x) # scan for no use if not unused_ok: for (name, set) in locals.items(): [where, count] = set if count<1: context.complain( "(%s) %s defined before %s not used" % (sname, `name`, where)) return result ### note, need to make special case for qualified names #@R powera :: power >> atom trailerlist def powera(list, context): #print "powera", list [a, (t, full)] = list if a and full: # atom is a qualified name (ln, ri, op, n) = a[0] result = [ (ln, ri, "qref", n) ] else: result = a result = result + t #print "returning", result return result #@R trailerlist0 :: trailerlist >> def trailerlist0(list, context): return ([], 0) # empty trailerlist #@R trailerlistn :: trailerlist >> trailer trailerlist def trailerlistn(list, context): #print "trailerlistn", list result = list[0] + list[1][0] for i in xrange(len(result)): (a, b, op, d) = result[i] result[i] = (a, b, "qref", d) return (result, 1) # make name+parameters set local reduce suite... def default_reduction(list, context): # append all lists from types import ListType #print "defred", list #return result = [] for x in list: if type(x)==ListType: if result == []: if len(x)>0 and type(x[0])==ListType: raise "oops", x result = x else: for y in x: result.append(y) return result def aname(list, context): #print "aname", list, context L = context.LexD # note -L.realindex makes rhs of assignment seem before lhs in sort. return [ (L.lineno, -L.realindex, "ref", list[0]) ] # the highest level reduction! # all1 :: all >> file_input DEDENT def all1(list, context): stuff = list[0] context.when_done(stuff) # first test def BindRules(pyg): for name in pyg.RuleNameToIndex.keys(): pyg.Bind(name, default_reduction) pyg.Bind("all1", all1) pyg.Bind("testlambda1", testlambda1) pyg.Bind("except2", except2) pyg.Bind("namearg", namearg) pyg.Bind("rfrom", rfrom) pyg.Bind("rfromc", rfromc) pyg.Bind("class1", class1) pyg.Bind("class2", class2) pyg.Bind("aname", aname) pyg.Bind("assn1", assn1) pyg.Bind("assnn", assnn) pyg.Bind("assn1c", assn1c) pyg.Bind("assn1c2", assn1c2) pyg.Bind("assnnc", assnnc) pyg.Bind("dn1", dn1) pyg.Bind("nlistn", nlistn) pyg.Bind("nlist1", nlist1) pyg.Bind("global1", global1) pyg.Bind("globaln", globaln) pyg.Bind("for1", for1) pyg.Bind("for2", for2) pyg.Bind("powera", powera) pyg.Bind("trailerlist0", trailerlist0) pyg.Bind("trailerlistn", trailerlistn) pyg.Bind("params1", params1) pyg.Bind("params1c", params1c) pyg.Bind("params2", params2) pyg.Bind("params3", params3) pyg.Bind("params4", params4) pyg.Bind("argd", argd) pyg.Bind("arg2", arg2) pyg.Bind("arg3", arg3) pyg.Bind("arg4", arg4) pyg.Bind("fpdef1", fpdef1) pyg.Bind("fpdef2", fpdef2) # pyg.Bind("fpdef2c", fpdef2c) pyg.Bind("fplist1" , fplist1 ) pyg.Bind("fplistn" , fplistn) pyg.Bind("rdef" , rdef) # pyg.Bind( , ) class globalContext: def __init__(self, lexd): self.deferred = [] self.LexD = lexd def complain(self, str): print str def defer_globals(self, globals): self.deferred[0:0] = globals def when_done(self, list): stuff = list + self.deferred + self.patch_globals() globals = analyse_scope("", stuff, self) seen = {} for (ln, ri, op, name) in globals: if not seen.has_key(name) and op!="set": seen[name] = name self.complain( "%s: (%s) %s not defined in module?" % (ln, op, `name`)) self.deferred = [] # reset state. def patch_globals(self): # patch in global names import __builtin__ names = dir(__builtin__) list = names[:] list2 = names[:] for i in xrange(len(list)): list[i] = (-2, -900, "set", names[i]) list2[i] = (-1, -900, "qref", names[i]) return list + list2 teststring = """ class x(y,z): ''' a doc string blah ''' def test(this, that): w = that+this+x, n x = 1 return w """ def go(): import sys try: file = sys.argv[1] except IndexError: print "required input file missing, defaulting to test string" data = teststring else: data = open(file).read() print "setup" (pyg, context) = setup() print "now parsing" lint(data, pyg, context) def setup(): global pyg, context import pygram pyg = pygram.unMarshalpygram() BindRules(pyg) context = globalContext(pyg.LexD) return (pyg, context) def lint(data, pygin=None, contextin=None): if pygin is None: pygin = pyg if contextin is None: contextin = context pygin.DoParse1(data, contextin) def lintdir(directory_name): """lint all files recursively in directory""" from find import find print "\n\nrecursively linting %s\n\n" % directory_name (pyg, context) = setup() python_files = find("*.py", directory_name) for x in python_files: print "\n\n [ %s ]\n\n" % x lint( open(x).read(), pyg, context ) print "\014" if __name__=="__main__": go() gadfly-1.0.0/doc/demo/kjParsing/relalg.py0100644000157700012320000002774207466100701017244 0ustar rjonestech """Simple relational algebra interpreter. usage: To make the grammar python relalg.py make To run some relatoinal algebra expressions python relalg.py < expressions_file """ # EDIT INSTALLDIR TO BE ABLE TO LOAD UNDER ANY CWD INSTALLDIR = "." ## simple relational algebra using only the equality predicate ## note: string values cannot contain ; ## statement sequencing using ; handled at higher level relalg_rules = """ statement :: @R statementassn :: statement >> assignment @R statementexpr :: statement >> rexpr @R assignment1 :: assignment >> name = rexpr @R assignmentn :: assignment >> name = assignment @R union :: rexpr >> rexpr U rterm @R rterm :: rexpr >> rterm @R minus :: rexpr >> rexpr - rterm @R intersect :: rterm >> rterm intersect rfactor @R join :: rterm >> rterm join rfactor @R rfactor :: rterm >> rfactor @R projection :: rfactor >> projection [ names ] rfactor @R names0 :: names >> @R namesn :: names >> names1 @R names11 :: names1 >> name @R names1n :: names1 >> names1 name @R selection :: rfactor >> selection ( condition ) rfactor @R conditionor :: condition >> condition | boolfactor @R condfactor :: condition >> boolfactor @R factorand :: boolfactor >> boolfactor & boolprimary @R factorprime :: boolfactor >> boolprimary @R notprimary :: boolprimary >> ~ boolprimary @R primarycondition :: boolprimary >> ( condition ) @R primaryeq :: boolprimary >> expression = expression @R expname :: expression >> name @R expvalue :: expression >> value @R rename :: rfactor >> rename [ names ] to [ names ] rfactor @R named :: rfactor >> name @R factorexpr :: rfactor >> ( rexpr ) @R relationval :: rfactor >> [ names ] ( rows ) @R rows0 :: rows >> @R rowsn :: rows >> somerows @R somerows1 :: somerows >> row @R somerowsn :: somerows >> somerows , row @R emptyrow :: row >> NIL @R row1 :: row >> value @R rown :: row >> row value @R valuenum :: value >> number @R valuestr :: value >> string """ keywords = """ selection intersect rename projection to NIL U join """ puncts = """=^~|,-[]()&""" nonterms = """ statement assignment rexpr rterm value rfactor names names1 condition boolfactor boolprimary expression rows somerows row """ try: from kjbuckets import * except ImportError: from kjbuckets0 import * class relation: def __init__(self, names, rows): #print "relation init", names, rows names = self.names = tuple(names) nameset = self.nameset = kjSet(names) for r in rows: if nameset != kjSet(r.keys()): raise ValueError, \ "bad names: "+`(names, r.items())` self.rows = kjSet(rows) def __repr__(self): from string import join names = self.names rows = self.rows.items() if not rows: nns = join(names) replist = [nns, "="*len(nns), " ----"] return join(replist, "\n") #print names, rows nnames = len(names) if nnames==1: replist = [names[0]] else: replist = [names] for r in rows: elt = r.dump(names) replist.append(r.dump(names)) #print replist if nnames==1: replist = maxrep(replist) else: transpose = apply(map, tuple([None] + replist)) adjusted = map(maxrep, transpose) replist = apply(map, tuple([None] + adjusted)) replist = map(join, replist) replist.insert(1, "=" * len(replist[0])) #print replist return join(replist, "\n") def maxrep(list): list = map(str, list) maxlen = max( map(len, list) ) for i in range(len(list)): item = list[i] litem = len(item) list[i] = item + (" " * (maxlen-litem)) return list # context is a simple dictionary of named relations def elt0(l, c): return l[0] statementassn = elt0 def statementexpr(l, c): from string import split, join print print " --- expression result ---" print data = str(l[0]) print " "+ join(split(data, "\n"), "\n ") def assignment1(l, c): [name, eq, val] = l c[name] = val return val assignmentn = assignment1 def check_compat(v1, v2): names1, names2 = v1.names, v2.names if names1 != names2: raise ValueError, \ "operands not union compatible "+`(names1, names2)` return names1, v1.rows, v2.rows def union(l, c): [v1, U, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1+r2).items()) rterm = elt0 def minus(l, c): [v1, m, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1-r2).items()) def intersect(l, c): [v1, i, v2] = l names1, r1, r2 = check_compat(v1, v2) return relation(names1, (r1&r2).items()) def join(l, c): [v1, j, v2] = l n1, n2 = v1.names, v2.names r1, r2 = v1.rows.items(), v2.rows.items() n1s, n2s = kjSet(n1), kjSet(n2) common = tuple((n1s&n2s).items()) result = kjSet() if common: # simple hashjoin G = kjGraph() for a in r1: G[a.dump(common)] = a for b in r2: for a in G.neighbors(b.dump(common)): result[a+b] = 1 else: for a in r1: for b in r2: result[a+b] = 1 return relation( (n1s+n2s).items(), result.items() ) rfactor = elt0 def projection(l, c): [p, b1, names, b2, val] = l proj = kjSet(names) result = kjSet() for row in val.rows.items(): result[ proj * row ] = 1 return relation( names, result.items()) def emptylist(l, c): return [] names0 = emptylist namesn = elt0 def names11(l, c): return l def names1n(l, c): [ns, n] = l ns.append(n) return ns def selection(l, c): [sel, p1, cond, p2, val] = l return cond.filter(val) ## conditions are not optimized at all! class conditionor: def __init__(self, l, c): [self.c1, op, self.c2] = l def filter(self, val): v1 = self.c1.filter(val) v2 = self.c2.filter(val) return relation(v1.names, (v1.rows+v2.rows).items()) condfactor = elt0 class factorand(conditionor): def filter(self, val): v1 = self.c1.filter(val) v2 = self.c2.filter(val) return relation(v1.names, (v1.rows&v2.rows).items()) factorprime = elt0 class notprimary: def __init__(self, l, c): [n, self.c1] = l def filter(self, val): v1 = self.c1.filter(val) return relation(v1.names, (val.rows-v1.rows).items()) def elt1(l, c): return l[1] primarycondition = elt1 class primaryeq: def __init__(self, l, c): [self.e1, eq, self.e2] = l def filter(self, val): rows = val.rows.items() e1v = self.e1.value(rows) e2v = self.e2.value(rows) result = kjSet() for (r, v1, v2) in map(None, rows, e1v, e2v): if v1==v2: result[r] = 1 return relation(val.names, result.items()) class expname: def __init__(self, l, c): self.name = l[0] def value(self, rows): name = self.name r = list(rows) for i in xrange(len(r)): r[i] = r[i][name] return r class expvalue(expname): def value(self, rows): return [self.name] * len(rows) def rename(l, c): [ren, b1, names, b2, to, b3, names2, b4, val] = l if len(names)!=len(names2): raise ValueError, "names lengths must match"+`(names1, names2)` remap = kjDict(map(None, names2, names)) oldnames = kjSet(val.names) addnames = kjSet(names2) remnames = kjSet(names) keepnames = oldnames - remnames remap = remap + keepnames if not remnames.subset(oldnames): #print remnames, oldnames raise ValueError, "old names not present"+`(names, val.names)` newnames = keepnames+addnames rows = val.rows.items() for i in range(len(rows)): rows[i] = remap*rows[i] return relation(newnames.items(), rows) def named(l, c): [name] = l return c[name] def relationval(l, c): [b1, names, b2, p1, rows, p2] = l names = tuple(names) ln = len(names) for i in xrange(len(rows)): this = rows[i] lt = len(this) if lt!=ln: raise ValueError, "names, vals don't match"+`(names,this)` if len(this)==1: this = this[0] else: this = tuple(this) rows[i] = kjUndump(names, this) return relation(names, rows) rows0 = emptylist rowsn = elt0 def somerows1(l, c): #print "somerows1", l return l def somerowsn(l, c): #print "somerowsn", l [sr, c, r] = l sr.append(r) return sr emptyrow = emptylist row1 = somerows1 def factorexpr(l, c): return l[1] def rown(l, c): #print "rows", l [r, v] = l r.append(v) return r valuenum = valuestr = elt0 ## snarfed from sqlbind # note: all reduction function defs must precede this assign VARS = vars() class punter: def __init__(self, name): self.name = name def __call__(self, list, context): print "punt:", self.name, list return list class tracer: def __init__(self, name, fn): self.name = name self.fn = fn def __call__(self, list, context): print "tracing", self.name, list test = self.fn(list, context) print self.name, "returns", test return test def BindRules(sqlg): for name in sqlg.RuleNameToIndex.keys(): if VARS.has_key(name): #print "binding", name sqlg.Bind(name, VARS[name]) # nondebug #sqlg.Bind(name, tracer(name, VARS[name]) ) # debug else: print "unbound", name sqlg.Bind(name, punter(name)) return sqlg ## snarfed from sqlgen MARSHALFILE = "relalg.mar" import string alphanum = string.letters+string.digits + "_" userdefre = "[%s][%s]*" % (string.letters +"_", alphanum) RACOMMENTREGEX = "COMMENT .*" def userdeffn(str): return str charstre = "'[^\n']*'" def charstfn(str): return str[1:-1] numlitre = "[%s][%s\.]*" % (string.digits, alphanum) # not really... def numlitfn(str): """Note: this is "safe" because regex filters out dangerous things.""" return eval(str) def DeclareTerminals(Grammar): Grammar.Addterm("name", userdefre, userdeffn) Grammar.Addterm("string", charstre, charstfn) Grammar.Addterm("number", numlitre, numlitfn) def Buildrelalg(filename=MARSHALFILE): import kjParseBuild SQLG = kjParseBuild.NullCGrammar() #SQLG.SetCaseSensitivity(0) DeclareTerminals(SQLG) SQLG.Keywords(keywords) SQLG.punct(puncts) SQLG.Nonterms(nonterms) # should add comments SQLG.comments([RACOMMENTREGEX]) SQLG.Declarerules(relalg_rules) print "working..." SQLG.Compile() filename = INSTALLDIR+"/"+filename print "dumping to", filename outfile = open(filename, "wb") SQLG.MarshalDump(outfile) outfile.close() return SQLG def reloadrelalg(filename=MARSHALFILE): import kjParser filename = INSTALLDIR+"/"+filename infile = open(filename, "rb") SQLG = kjParser.UnMarshalGram(infile) infile.close() DeclareTerminals(SQLG) BindRules(SQLG) return SQLG def runfile(f): from string import split, join ragram = reloadrelalg() context = {} #f = open(filename, "r") data = f.read() #f.close() from string import split, strip commands = split(data, ";") for c in commands: if not strip(c): continue print " COMMAND:" data = str(c) pdata = " "+join(split(c, "\n"), "\n ") print pdata test = ragram.DoParse1(c, context) print # c:\python\python relalg.py ratest.txt if __name__=="__main__": try: done = 0 import sys argv = sys.argv if len(argv)>1: command = argv[1] if command=="make": print "building relational algebra grammar" Buildrelalg() done = 1 else: runfile(sys.stdin) done = 1 finally: if not done: print __doc__ gadfly-1.0.0/doc/demo/kjbuckets/0040755000157700012320000000000007512763043015456 5ustar rjonestechgadfly-1.0.0/doc/demo/kjbuckets/kjfactor.py0100644000157700012320000000301607465430476017640 0ustar rjonestech#! /usr/local/bin/python -O # factor a graph G on A x B into # a lists L and list of pairs G2 on int x int # such that (a,b) in G iff (i,j) in G2 where L[i], L[j] == a,b # # got that? # # The basic idea is that if the elements of G are (say) # large strings and G is dense, then it saves space to # marshal G to a file as a sequence of indices, rather # than storing G directly. # # for greater space efficiency the list of pairs is spit # into two lists (leftmembers, rightmembers) def factor(G): from kjbuckets import kjSet, kjGraph allnodes = kjSet(G.keys()) + kjSet(G.values()) allnodelist = allnodes.items() allnodemap = map(None, allnodelist, range(len(allnodelist))) nodetoindex = kjGraph(allnodemap) pairs = G.items() left = pairs[:] right = left[:] for i in xrange(len(left)): (l, r) = pairs[i] left[i], right[i] = nodetoindex[l], nodetoindex[r] return (left, right), allnodelist # and back again def unfactor(indexpairs, allnodelist): from kjbuckets import kjGraph from time import time now = time() (left, right) = indexpairs size = len(left) result = kjGraph(size) for i in xrange(size): result[allnodelist[left[i]]] = allnodelist[right[i]] #print time() - now return result def test(): from kjbuckets import kjGraph G = kjGraph( map(None, "pumpernickle", "nicklepumppp") ) print G (iG, l) = factor(G) print iG, l G2 = unfactor(iG, l) print G2 if G!=G2: print "OOPS" if __name__=="__main__": test() gadfly-1.0.0/doc/demo/kjbuckets/kjtest.py0100644000157700012320000002011107466100701017316 0ustar rjonestech#! /usr/local/bin/python -O # silly functions for testing/timing simple table access operations. import profile from kjbuckets import * r = range(5000) r2 = range(1000) def dtest(d): for i in r: d[ (hex(i),oct(i),i) ] = hex(i)+oct(i)+`i` def dtest2(d): global temp for i in r: d[ (i*33) % 1000 ] = i for i in r: temp = d[ (i*31) % 1000 ] for i in r: temp = d[ (i*7) % 1000 ] def dtest3(d): global temp for i in r: d[ (i*33) % 1000 ] = i for i in r: temp = d[ (i*31) % 1000 ] for i in r: temp = d[ (i*7) % 1000 ] for i in r2: del d[i] d[`i`] = `(i*3%1000)` for i in r2: del d[`i`] def dtest4(d): for i in range(10): dtest(d) dtest2(d) dtest3(d) if __name__=="__main__": from kjbuckets import kjDict dtest4(kjDict()) # some profiling done on my ancient sun server # # example stats for Python dict #>>> D = {} #>>> profile.run("dtest4(D)") # 33 function calls in 83.033 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 14.383 1.438 14.383 1.438 kjtest.py:11(dtest2) # 10 20.967 2.097 20.967 2.097 kjtest.py:17(dtest3) # 1 0.083 0.083 83.017 83.017 kjtest.py:28(dtest4) # 10 47.583 4.758 47.583 4.758 kjtest.py:7(dtest) # 1 0.017 0.017 83.033 83.033 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 83.017 83.017 python:0(20520.C.2) # # with gsize of 1 # # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 16.650 1.665 16.650 1.665 kjtest.py:11(dtest2) # 10 24.083 2.408 24.083 2.408 kjtest.py:17(dtest3) # 1 0.050 0.050 84.150 84.150 kjtest.py:28(dtest4) # 10 43.367 4.337 43.367 4.337 kjtest.py:7(dtest) # 1 0.117 0.117 84.267 84.267 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 84.150 84.150 python:0(21460.C.1) # with gsize of 2 #>>> profile.run("dtest4(D)") # 33 function calls in 93.467 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 16.900 1.690 16.900 1.690 kjtest.py:11(dtest2) # 10 24.183 2.418 24.183 2.418 kjtest.py:17(dtest3) # 1 0.083 0.083 93.433 93.433 kjtest.py:28(dtest4) # 10 52.267 5.227 52.267 5.227 kjtest.py:7(dtest) # 1 0.017 0.017 93.467 93.467 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.017 0.017 93.450 93.450 python:0(20824.C.3) # # with gsize of 4 #33 function calls in 90.200 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 17.950 1.795 17.950 1.795 kjtest.py:11(dtest2) # 10 26.733 2.673 26.733 2.673 kjtest.py:17(dtest3) # 1 0.033 0.033 90.067 90.067 kjtest.py:28(dtest4) # 10 45.350 4.535 45.350 4.535 kjtest.py:7(dtest) # 1 0.133 0.133 90.200 90.200 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 90.067 90.067 python:0(22100.C.1) # with gsize of 6 # 33 function calls in 98.217 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 17.017 1.702 17.017 1.702 kjtest.py:11(dtest2) # 10 27.033 2.703 27.033 2.703 kjtest.py:17(dtest3) # 1 0.067 0.067 98.200 98.200 kjtest.py:28(dtest4) # 10 54.083 5.408 54.083 5.408 kjtest.py:7(dtest) # 1 0.017 0.017 98.217 98.217 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 98.200 98.200 python:0(22727.C.2) # with Gsize of 8 #>>> D = kjDict() #>>> profile.run("dtest4(D)") # 33 function calls in 106.900 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 18.683 1.868 18.683 1.868 kjtest.py:11(dtest2) # 10 31.433 3.143 31.433 3.143 kjtest.py:17(dtest3) # 1 0.017 0.017 106.883 106.883 kjtest.py:28(dtest4) # 10 56.750 5.675 56.750 5.675 kjtest.py:7(dtest) # 1 0.017 0.017 106.900 106.900 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 106.883 106.883 python:0(20520.C.4) # # with gsize of 16 #>>> D = kjDict() #>>> profile.run("dtest4(D)") # 33 function calls in 118.533 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 22.200 2.220 22.200 2.220 kjtest.py:11(dtest2) # 10 41.233 4.123 41.233 4.123 kjtest.py:17(dtest3) # 1 0.067 0.067 118.483 118.483 kjtest.py:28(dtest4) # 10 54.983 5.498 54.983 5.498 kjtest.py:7(dtest) # 1 0.033 0.033 118.533 118.533 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.017 0.017 118.500 118.500 python:0(20659.C.3) # # with gsize of 32 # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 27.650 2.765 27.650 2.765 kjtest.py:11(dtest2) # 10 55.600 5.560 55.600 5.560 kjtest.py:17(dtest3) # 1 0.067 0.067 129.117 129.117 kjtest.py:28(dtest4) # 10 45.800 4.580 45.800 4.580 kjtest.py:7(dtest) # 1 0.100 0.100 129.217 129.217 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 129.117 129.117 python:0(21213.C.1) # # with gsize of 64 # 33 function calls in 177.017 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 38.983 3.898 38.983 3.898 kjtest.py:11(dtest2) # 10 89.517 8.952 89.517 8.952 kjtest.py:17(dtest3) # 1 0.033 0.033 176.900 176.900 kjtest.py:28(dtest4) # 10 48.367 4.837 48.367 4.837 kjtest.py:7(dtest) # 1 0.117 0.117 177.017 177.017 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 176.900 176.900 python:0(21657.C.1) # # with gsize of 128 # 33 function calls in 278.450 CPU seconds # # Ordered by: standard name # # ncalls tottime percall cumtime percall filename:lineno(function) # 10 63.500 6.350 63.500 6.350 kjtest.py:11(dtest2) # 10 161.283 16.128 161.283 16.128 kjtest.py:17(dtest3) # 1 0.033 0.033 278.333 278.333 kjtest.py:28(dtest4) # 10 53.517 5.352 53.517 5.352 kjtest.py:7(dtest) # 1 0.117 0.117 278.450 278.450 profile:0(dtest4(D)) # 0 0.000 0.000 profile:0(profiler) # 1 0.000 0.000 278.333 278.333 python:0(22265.C.1) # #Stats = { # total times #gsize: [ dtest, dtest2, dtest3 ] #"py": [ 47.5, 14.3, 20.9 ], # 1: [ 43.3, 16.6, 24.0 ], # better! on dtest(?) # 2: [ 52.2, 16.9, 24.1 ], # 4: [ 45.3, 17.9, 26.7 ], # 6: [ 54.0, 17.0, 27.0 ], # 8: [ 56.7, 18.6, 31.4 ], # 16: [ 54.9, 22.2, 41.2 ], # 32: [ 45.8, 27.6, 55.6 ], # 64: [ 48.3, 38.9, 89.5 ], # 128: [ 53.5, 63.5, 161.2 ] # }# weird increasing increasing # # linear performance decrease seems to start around GSIZE=64 # dtest performance seems to be heavily influenced by more complex # key/value calculations. unreliable. gadfly-1.0.0/doc/demo/kjbuckets/relalg.py0100644000157700012320000001243607466100701017273 0ustar rjonestech#! /usr/local/bin/python -O # A simple implementation of the relational algebra # using kjbuckets from kjbuckets import * def relFromDictSet(schemeseq, dictSet): result = relation(schemeseq, [] ) result.rowset = dictSet return result class relation: def __init__(self, schemeseq, listofrows): self.schemeseq = schemeseq self.scheme = kjSet(schemeseq) rowset = kjSet() for row in listofrows: rowset.add(kjUndump(schemeseq, row)) self.rowset = rowset def pprint(self): print self.schemeseq print "============" for row in self.rowset.items(): print row.dump(self.schemeseq) def addDicts(self, dictseq): # not used... for dict in dictseq: self.rowset.add(dict) def checkUnionCompatible(self,other): if self.scheme != other.scheme: raise ValueError, "operands not union compatible" # relational union def __add__(self, other): self.checkUnionCompatible(other) return relFromDictSet(self.schemeseq, self.rowset + other.rowset) # relational difference def __sub__(self, other): self.checkUnionCompatible(other) return relFromDictSet(self.schemeseq, self.rowset - other.rowset) # natural join (hash based algorithm) def __mul__(self,other): commonatts = self.scheme & other.scheme resultset = kjSet() if commonatts: # do a hash based join dumper = tuple(commonatts.items()) selfgraph = kjGraph() # hash index for self othergraph = kjGraph() # hash index for other for row in self.rowset.items(): selfgraph[row] = row.dump(dumper) for row in other.rowset.items(): othergraph[row.dump(dumper)] = row for (selfrow, otherrow) in (selfgraph * othergraph).items(): resultset.add(selfrow + otherrow) else: # no common attributes: do a cross product otherrows = other.rowset.items() for selfrow in self.rowset.items(): for otherrow in otherrows: resultset.add(selfrow + otherrow) return relFromDictSet( tuple((self.scheme + other.scheme).items()), resultset ) # selection using a att->value pairs (as conjunction) def vSel(pairs, rel): selected = kjSet() selector = kjDict(pairs) if selector.Clean()!=None: for row in rel.rowset.items(): if (row + selector).Clean() != None: selected.add(row) return relFromDictSet(rel.schemeseq, selected) # selection using att = att pairs (as conjunction) def eqSelect(pairs, rel): selected = kjSet() selector = kjGraph(pairs) selector = (selector + ~selector).tclosure() # sym, trans closure for row in rel.rowset.items(): if row.remap(selector) != None: selected.add(row) return relFromDictSet(rel.schemeseq, selected) # projection on attribute sequence (as conjunction) def proj(atts, rel): attset = kjSet(atts) resultset = kjSet() for row in rel.rowset.items(): resultset.add(attset * row) return relFromDictSet(atts, resultset) # renaming using (new,old) pair sequence def rename(pairs, rel): renames = kjDict(pairs) untouched = rel.scheme - kjSet(renames.values()) mapper = renames + untouched resultset = kjSet() for row in rel.rowset.items(): resultset.add(mapper * row) return relFromDictSet(tuple(mapper.keys()), resultset) #=========== end of simple.py # #Now let me show you the "simple" module in use. First we need some relations. #I'll steal C.J.Date's canonical/soporific supplier/parts database: # ## database of suppliers, parts and shipments ## from Date, page 79 (2nd ed) or page 92 (3rd ed) */ def test(): #suppliers S = relation( ('snum', 'sname', 'status', 'city'), [ (1, 'Smith', 20, 'London'), (2, 'Jones', 10, 'Paris'), (3, 'Blake', 30, 'Paris'), (4, 'Clark', 20, 'London'), (5, 'Adams', 30, 'Athens') ]) #parts P = relation( ('pnum', 'pname', 'color', 'weight', 'pcity'), [ (1, 'Nut', 'Red', 12, 'London'), (2, 'Bolt', 'Green', 17, 'Paris' ), (3, 'Screw', 'Blue', 17, 'Rome' ), (4, 'Screw', 'Red', 14, 'London'), (5, 'Cam', 'Blue', 12, 'Paris'), (6, 'Cog', 'Red', 19, 'London') ]) # shipments SP = relation( ('snum', 'pnum', 'qty',), [ (1, 1, 300), (1, 2, 200), (1, 3, 400), (1, 4, 200), (1, 5, 100), (1, 6, 100), (2, 1, 300), (2, 2, 400), (3, 2, 200), (4, 2, 200), (4, 4, 300), (4, 5, 400) ]) # names and cities of suppliers proj(("sname","city"),S).pprint() # part names of parts supplied by Blake proj(("pname",),vSel( ( ("sname","Blake"), ), S*SP*P)).pprint() # supplier names and numbers where the supplier doesn't supply screws ( proj( ("sname","snum"), S) - proj( ("sname","snum"), vSel( ( ("pname", "Screw"), ), P*SP*S ) ) ).pprint() if __name__=="__main__": test() gadfly-1.0.0/doc/demo/kjbuckets/trigram.py0100644000157700012320000001471307466100701017472 0ustar rjonestech#! /usr/local/bin/python -O """Trigram indexing of documents. T = TriGram() makes a trigram archive. Add *HASHABLE* documents to the archive, associated with keywords by T.add_doc(document, [keyword1, keyword2, ...]) Get documents that for each substring are associated with a keyword containing that substring with T.getDocs([substring1, substring2,...]) (returns kjSet) OR T.getDocList([keyword1, keyword2,...]) returns list. if you can't hash your documents use some aliasing strategy... performance should be "good" when the graphs are sparse, but may get bad if graphs get extremely dense... """ ### note: this is in the process of improvement ### ### need to vector docs to numbers to save space on loading ### from kjbuckets import * from string import upper, find # note: documents must be hashable! class TriGram: def __init__(self): self._trigramtokeyword = kjGraph() self._keywordtodoc = kjGraph() self._tricache = kjDict() # memory optimization def keys(self): return self._keywordtodoc.keys() def trigrams(self): return self._trigramtokeyword.keys() def marshal_to_file(self, file): from kjfactor import factor from marshal import dump dump( (factor(self._trigramtokeyword), factor(self._keywordtodoc)), file ) def unmarshal_from_file(self, file): from marshal import load from kjfactor import unfactor (ttkf, kwdf) = load(file) self._trigramtokeyword = apply(unfactor, ttkf) self._keywordtodoc = apply(unfactor, kwdf) # associate document to all keywords in keyword_list def add_doc(self, document, keyword_list): for keyword in keyword_list: keyword = upper(keyword) self._addlink(keyword, document) # associate keyword to document, assumes keyword upcased def _addlink(self, keyword, document): cache = self._tricache ntrigrams = len(keyword) - 2 if ntrigrams<1: #raise ValueError, "keyword must be length 3 or greater" return # ignore for start in range(ntrigrams): trigram = keyword[start: start+3] try: trigram = cache[trigram] except: cache[trigram]=trigram self._trigramtokeyword[trigram] = keyword self._keywordtodoc[keyword] = document # get set of keywords associated with a substring, assumes substring upcased def _getkeywords(self, substring): TtoK = self._trigramtokeyword ntrigrams = len(substring) - 2 if ntrigrams<1: raise ValueError, \ "substring must be length 3 or greater:" +`substring` keywords = None for start in range(ntrigrams): trigram = substring[start: start+3] thesekeywords = kjSet( TtoK.neighbors(trigram) ) if keywords == None: keywords = thesekeywords else: keywords = keywords & thesekeywords if not keywords: break # now check for false hits (trigrams in wrong order...) for keyword in keywords.items(): if find(keyword, substring)==-1: del keywords[keyword] return keywords # get kjSet of documents # which for each substring of substring_list # is associated with a keyword containing that substring. # (for boolean queries, left as set for easy combination...) def getDocs(self, substring_list): DocSet = None kwToDoc = self._keywordtodoc for substring in substring_list: substring = upper(substring) keywords = self._getkeywords(substring) thesedocs = kjSet((keywords * kwToDoc).values()) if DocSet == None: DocSet = thesedocs else: DocSet = thesedocs & DocSet if not DocSet: break return DocSet # same as above, but returns list not set. def getDocList(self, substring_list): return self.getDocs(substring_list).items() if __name__=="__main__": #### example usage and for testing bigstring = """ Python release 1.1.1 ==================== ==> This is Python version 1.1.1. ==> Python 1.1.1 is a pure bugfix release. It fixes two core dumps related to the changed implementation of (new)getargs, some portability bugs, and some very minor things here and there. If you have 1.1, you only need to install 1.1 if bugs in it are bugging you. ==> If you don't know yet what Python is: it's an interpreted, extensible, embeddable, interactive, object-oriented programming language. For a quick summary of what Python can mean for a UNIX/C programmer, read Misc/BLURB.LUTZ. ==> If you want to start compiling right away (on UNIX): just type "./configure" in the current directory and when it finishes, type "make". See the section Build Instructions below for more details. ==> All documentation is in the subdirectory Doc in the form of LaTeX files. In order of importance for new users: Tutorial (tut), Library Reference (lib), Language Reference (ref), Extending (ext). Note that especially the Library Reference is of immense value since much of Python's power (including the built-in data types and functions!) is described there. [NB The ext document has not been updated to reflect this release yet.] .... --Guido van Rossum, CWI, Amsterdam """ import string bigsplit = string.split(bigstring) print "testing loading" TGram = TriGram() # associate each string of bigstring to itself, unless too small... for str in bigsplit: if len(str)>3: TGram.add_doc(str, [str]) print len(TGram._trigramtokeyword), len(TGram._keywordtodoc) print "testing marshalling" f = open("test.mar", "wb") TGram.marshal_to_file(f) f.close() f = open("test.mar", "rb") TGram = TriGram() TGram.unmarshal_from_file(f) f.close() print "testing retrieval" print TGram.getDocList(["thon"]) print TGram.getDocList(["tion"]) print TGram.getDocList(["dire"]) print TGram.getDocList(["here"]) print TGram.getDocList(["ers","sio"]) print TGram.getDocList(["int","era"]) print TGram.getDocList(["htt","url","van"]) print TGram.getDocList(["Nope"]) gadfly-1.0.0/doc/demo/kjbuckets/tsort.py0100644000157700012320000000202607466100701017172 0ustar rjonestech#! /usr/local/bin/python -O # simple implementation of topological sort # using kjbuckets. For very large and very dense # graphs you can do better... from kjbuckets import kjGraph, kjSet LOOPERROR = "LOOPERROR" # topological sort def tsort(list_of_pairs): result = [] Graph = kjGraph(list_of_pairs) notsource = (kjSet(Graph.values()) - kjSet(Graph.keys())).items() while Graph: sources = kjSet(Graph.keys()) dests = kjSet(Graph.values()) startingpoints = sources - dests if not startingpoints: raise LOOPERROR, "loop detected in Graph" for node in startingpoints.items(): result.append(node) del Graph[node] return result + notsource if __name__=="__main__": list = [ (1,2), (3,4), (1,6), (6,3), (3,9), (4,2) ] print tsort(list) try: list = [ (1,2), (3,4), (1,6), (6,3), (3,9), (3,1) ] print tsort(list) print "WHOOPS: loop 1-6-3-1 not detected" except LOOPERROR: print "loop error as expected" gadfly-1.0.0/doc/demo/kjbuckets/tsort2.py0100644000157700012320000000203707466100701017256 0ustar rjonestech#! /usr/local/bin/python -O # more complex implementation of topological sort LOOPERROR = "LOOPERROR" def tsort(pairs): from kjbuckets import kjGraph, kjSet G = kjGraph(pairs) Gt = ~G # transpose sources = kjSet(G.keys()) dests = kjSet(G.values()) all = (sources+dests).items() total = len(all) endpoints = dests - sources for i in xrange(total-1, -1, -1): #print i, endpoints if not endpoints: raise LOOPERROR, "loop detected" choice = endpoints.choose_key() for n in Gt.neighbors(choice): G.delete_arc(n,choice) if not G.has_key(n): endpoints[n] = n del endpoints[choice] all[i] = choice return all if __name__=="__main__": list = [ (1,2), (3,4), (1,6), (6,3), (3,9), (4,2) ] print tsort(list) try: list = [ (1,2), (3,4), (1,6), (6,3), (3,9), (3,1) ] print tsort(list) print "WHOOPS: loop 1-6-3-1 not detected" except LOOPERROR: print "loop error as expected" gadfly-1.0.0/doc/announcement.txt0100644000157700012320000000300507512762514015775 0ustar rjonestech================================================ Gadfly 1.0.0 - SQL Relational Database in Python ================================================ Gadfly is a relational database system implemented in Python based on the SQL Structured Query Language. This is the GadflyB5 release - like a NG release only better :) Note: Aaron Watters is not the contact for this project. The contact for this project is richard@users.sourceforge.net. Gadfly requires python 2.1 or later for correct operation. GadflyB5 is mostly the same old Gadfly, but: - updated to use new regular expression engine (regex -> re migration) performed by the fine folk at the Zope Corporation (http://www.zope.com/). - kjbuckets C extension module maintenance and updates (see the kjbuckets documentation for details) - documentation cleanup - cleanup and reorganisation of the gadfly modules, including: - migration to distutils-based installation - cleanup of SQL grammar marshalling - more strict (in places) unit/regression testing - general cleanup of the code itself - cleanup of networking code (note: gfclient argument list has changed!) Please read CHANGES.txt for a complete list of changes since the last release. There is no ongoing support available for usage, unless someone volunteers. If you have found a bug, please submit an issue to the bug tracker at: https://sourceforge.net/tracker/?atid=100662&group_id=662 If you've got a great idea for gadfly, and have the time to work on it, please contact the gadfly project admins. gadfly-1.0.0/doc/build_html.py0100755000157700012320000000141607466100677015252 0ustar rjonestech#!/usr/bin/env python """ :Author: David Goodger :Contact: goodger@users.sourceforge.net :Revision: $Revision: 1.2 $ :Date: $Date: 2002/05/08 00:49:00 $ :Copyright: This module has been placed in the public domain. A minimal front-end to the Docutils Publisher. This module takes advantage of the default values defined in `publish()`. """ import sys, os.path from docutils.core import publish from docutils import utils if len(sys.argv) < 2: print >>sys.stderr, 'I need at least one filename' sys.exit(1) reporter = utils.Reporter(2, 4) for file in sys.argv[1:]: name, ext = os.path.splitext(file) dest = '%s.html'%name print >>sys.stderr, '%s -> %s'%(file, dest) publish(writer_name='html', source=file, destination=dest, reporter=reporter) gadfly-1.0.0/doc/faq.txt0100644000157700012320000004514707465430476014075 0ustar rjonestechGadfly Frequently Asked Questions ================================= :Version: $Revision: 1.1.1.1 $ .. contents:: General information ------------------- The following topics are of a general nature. What is gadfly? ~~~~~~~~~~~~~~~~~ Gadfly is a relational database management system which uses a large subset of very standard SQL as its query language and Python modules and optional Python/C extension modules as its underlying engine. Gadfly stores the active database in memory, with recovery logging to a file system. It supports an optional TCP/IP based client server mode and log based failure recovery for system or software failures (but not for disk failures). What is the current version? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The current version is Gadfly 1.0. To be sure you have the current version, please consult the version number that comes in the documentation. Gadfly 1.0 attempts to fix all known bugs in previous versions and adds a small number of features (hopefully without adding too many new bugs). Where can I get it? ~~~~~~~~~~~~~~~~~~~~~ Please get it from http://www.chordate.com Why SQL and the relational model? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly is based in SQL and the relational model primarily because the SQL query language and the relational model have been highly successful, are highly standard, and are well understood by a large number of programmers. If you understand standard SQL you already know how to use Gadfly -- and this distinguishes Gadfly from many similar database systems out there -- even ones which aren't "really free." Why Python? ~~~~~~~~~~~~~ Gadfly is based in Python because Python made the development of such a relatively sophisticated and standard SQL implementation feasible for one programmer, in his spare time, and also because it automatically made Gadfly portable to almost any computing platform. In addition, Gadfly can be used with Python to develop sophisticated data manipulation applications easily. What is the License? Is it Free? Why? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly can be used and modified in source form for any purpose, including commercial purposes. There is no explicit or implied warrantee for Gadfly, even though it is intended to be useful. Gadfly is free because I wanted people to use it, and I suspected they wouldn't if it wasn't free and highly standard, and furthermore I didn't want to risk the possibility of legal responsibility if people did use it and had problems. You mean I can bundle it into my own product for free? Really? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Yes. Please do. Several organizations have done so already. Where does it run? ~~~~~~~~~~~~~~~~~~~~ To run Gadfly you need (1) a file system and (2) Python. If Python runs on your platform Gadfly should run there too. I have not been able to verify that this version runs on the Macintosh yet (since the Mac apparently has some fairly nonstandard file system conventions), but even if it doesn't it should run after a few trivial adjustments. Please let me know if Gadfly doesn't run on any of your platforms. Gadfly is expected to run "out of the box" on Windows 95/98, Windows NT, all Unices or unix like platforms (Solaris, IX, BSD, Irix, Linux) and anywhere else where Python runs and there is a filesystem (Windows CE, QNX, Mac, etcetera). Furthermore the Gadfly database storage format (based in the Python marshal format) is binary compatible between all these systems -- you can create a database on any one of the systems and use a binary transfer mechanism to any other system and the database should load and run. What are the software/hardware requirements? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly requires a Python installation and sufficient memory and disk on the target machine to store the live databases in question. Since live Gadfly databases are kept in memory during active use Gadfly is not appropriate for databases that approach the size of virtual memory. In practice this means that on modern inexpensive machines Gadfly can store and manipulate databases with tens of thousands of rows without difficulty. Unless your machine is large databases with millions of rows might be infeasible -- and even if memory is sufficient the loading and storing of the database might make startup and shutdown speed an issue. Many interesting database instances never approach this size. Is there a query optimizer? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Yes, Gadfly automatically optimizes queries using some relatively sophisticated methods that attempt to take advantage of any available indices, among other methods. Technically all optimization uses only "equality predicates" at this time, so queries involving many inequalities (eg, BETWEEN) and few or no equalities might be slow, or might create very large intermediate results. Most standard database applications use equality constraints extensively (eg, as foriegn keys or relationships) but if your application is, say, a scientific application that requires many inequality comparisons across multiple tables among floating point data, the Gadfly optimizer may not help much -- you may want to load the data into your own structures and hand optimize the access and combination methods. On the other hand, if your application uses equality predicates often enough you may find that the Gadfly optimizer does pretty well, perhaps better than a "hand coded home grown database" would do without a lot of work. Is it fast? ~~~~~~~~~~~~~ You probably want a benchmark comparison. I have none to offer. The query evaluation and load/store facilities for Gadfly seem to be pretty fast for non-huge databases -- particularly if you make intelligent use of indices. At this point the slowest part is the SQL parser itself. If you use a lot of similar SQL statements please consult the documentation on how to avoid reparsing statements by use of multiple cursors and dynamic parameters. Unverified anecdotal reports suggest that Gadfly is not noticably slower than other free or freeish portable database systems; some have suggested it can even be faster, at least for certain types of use. The entire query engine is designed to use the kjbuckets Python C extension module, and use of kjbuckets should speed the performance of Gadfly considerably -- Gadfly will run without it, but noticably slower (2x slower for even small databases, more for larger ones). Are there data size limitations? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are no intrinsic limitations. For example a "varchar" can be of any size. That said, remember that a gadfly database, in the absense of hackery (eg, storing a filename instead of a value) stores all data in memory, so at some point large values may fill up virtual memory and the load/store operations can get slowish. What about recovery after crashes? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly supports a log with deferred updates recovery procedure, together with commits and rollbacks. This means that if your program crashes or if your computer crashes it should be able to restore a Gadfly database to the state of the database at the point of the last "commit". Any remaining bugs in this area are likely to have to do with data definition statements ("create" or "drop") so be careful to commit and checkpoint after modifying the data definitions. Of course, there are no known bugs, but it is possibly that not all possible combinations have been tested (this area is quite subtle, unfortunately :( ). Please see the recovery documentation for more information. What about client/server based access? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly supports a TCP/IP based client/server mode, together with a non-standard but highly flexible security paradigm for the server. Please see the client/server mode documentation. What about concurrency control? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly does not have true concurrency control. The only safe way to allow two concurrent agents to modify a single database is to have both of them communicate with the database via a gadfly server that arbitrates database accesses. The server will serve each agent serially -- one at a time -- in separate transactions each committed immediately. Of course, two programs can have read-only access to the same database in separate memory instances without any problems. Is there a mailing list? ~~~~~~~~~~~~~~~~~~~~~~~~~~ Not yet - there may be once sufficient volunteers put their hands up to help maintain the project. Who uses it? ~~~~~~~~~~~~~~ My email suggests that quite a few people have been using Gadfly, including professional and industrial organizations of various sorts. From my vantage point it is not clear how serious all of these uses have been -- but some of them appear quite serious. Gadfly has been included in a number of CD-rom publications, such as Linux distributions. Zope Corporation includes gadfly in Zope. Is support available? ~~~~~~~~~~~~~~~~~~~~~~~ Not at present. There may be once sufficient volunteers put their hands up to help maintain the project. What are the plans for Gadfly? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ At this point the current plan is to maintain the current implementation and fix any bugs that arise. Some time in the distant future (several months maybe) a new release with major new features might arrive, but only if the new features allow existing applications to run. Gadfly will support backwards compatibility of existing databases. How do I contribute some code that uses Gadfly? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Please contact the project maintainers. Installation ------------ This section relates to the installation of Gadfly and connecting Gadfly to other software. Does Gadfly support ODBC or JDBC? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Although the Gadfly SQL subset is based on the ODBC 2.0 SQL definition Gadfly does not support the C-level ODBC calling mechanism. All direct accesses to Gadfly must use Python modules at this time. How can I move a Gadfly database instance from one location/machine to another? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A gadfly database instance as stored in a file system directory may be copied to another directory even on another machine using any binary file copy mechanism. You must copy all files relating to the database instance in the gadfly database directory to the destination, and be sure that if you are copying across different platforms to use a binary copy mechanism (eg, ftp in binary mode). How can I access gadfly from another program? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The only supported API (applications program interface) for gadfly at this writing is the Python Database API either using direct access or via the client/server mechanism. Although you must use Python to access a gadfly database at this time it is possible to embed a Python instance in another program. Please see the Python documentation. What database size limits are there? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As mentioned there are no intrinsic limits (eg, a varchar can be of any size) except for the limitations of memory and the possibility that the load/store mechanism may get too slow if the database grows too large. What are the file types in the gadfly database directory? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - \*.grl -- a relation representation where * is the relation name. - \*.brl -- a back-up relation representation (for possible recovery). - \*.gfd -- a data definitions file where * is the database name. - \*.bfd -- a back-up data definitions file (for possible recovery). - \*.gfl -- a log file where * is the database name - \*.glb -- a back-up log file (for possible recovery). Use --- The following section relates to the use of gadfly. Does Gadfly support virtual tables (VIEWS)? Quantifiers? EXISTS? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Aggregates? Groupings? Indices? Of course it does! Gadfly supports a very large SQL subset. See the additional documentation for more detailed information. How standard is the SQL subset supported? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly SQL is based on ODBC 2.0 SQL which in turn is based on an SQL standard draft. This means that Gadfly SQL adheres closely to the SQL you find in many other database products and documented in many books and other documentation. A number of people have implemented Gadfly databases and transferred the SQL with few to no modifications from the implementation directly to Oracle or MS-Access, for example. Are BLOBS (binary large objects) and arbitrary string values supported? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Yes, varchars have no intrinsic size limit and can be used to store marshalled or pickled Python BLOBS -- but the application using gadfly will have to know when to "deserialize" the objects (using marshal or pickle or other mechanisms). For example it is even possible to store Python byte code objects in a gadfly database in this way. In particular gadfly stores all strings, including strings with null bytes and newlines, but for such strings you must use dynamic parameters (or other mechanisms that avoid embedding the string directly into the SQL syntax), like:: s = chr(0)+chr(10) cursor.insert("insert into table x(v) values (?)", (s,)) NOT:: s = chr(0)+chr(10) cursor.insert("insert into table x(v) values ('%s')" % (s,)) Since the SQL parser will choke on the latter. How can I find the column names in a "select \*"? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The nasty answer is "don't use select \*", but if you really want to you can get the order of names for the columns from the description attribute for a cursor:: >>> from gadfly import gadfly >>> g = gadfly("test", "dbtest") >>> c = g.cursor() >>> c.execute("select * from work") >>> c.description (('RATE', None, None, None, None, None, None), ('NAME', None, None, None, None, None, None), ('HOURS', None, None, None, None, None, None)) >>> print c.pp() RATE | NAME | HOURS ======================= 40.2 | sam | 30 10.2 | norm | 45 5.4 | woody | 80 4.4 | diane | 3 12.9 | rebecca | 120 200.0 | cliff | 26 3.5 | carla | 9 IE, c.description[i][0] gives the name of the ith column. The query mechanism essentially randomizes the order of the columns in a select * and you cannot rely on the engine producing columns in any specific order unless you specify the order in the query explicitly. Can I add my own table implementation? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Yes you can, to a limited extent. Use the remote view protocol in gfintrospect.py. The table you add can extract information from any source but must not recursively query the same database instance (using a Python implementation) and place it into a Gadfly database either only once on first usage or once per query. The table added cannot be updated via SQL and you cannot load "only the part of the table you need for this query." The table added must be explicitly re-added during the database initialization on each usage, and if you implement the table incorrectly you may cause some gadfly queries to crash. Use with caution: this is an advanced feature. See remotetest.py. Why can't I update my own table implementation via SQL? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Although it is possible to create a protocol which allows non standard table implementations to be updated and optimized via SQL, the implications to the query optimization mechanism and the crash recovery strategy are not clear and may be quite subtle. At this time the implementation opts to avoid possible bugs by not supporting such features, although programmers are welcome to experiment at their own risk, with the understanding that their experimental modifications may not be supported in future releases. It is perfectly possible to change the number of rows or values in no modifications from the implementations without using SQL or gadfly, however and have the updates automatically reflected in the table instance in the database. [You should not, however, change the number or names of the columns.] For example gfintrospect.DictKeyValueView will "wrap" a Python dictionary as a gadfly table and automatically reflect modifications made by an external Python program to the dictionary. How do you define a primary key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ New in 1.0 you can get the effect of a primary key by defining a unique index on the primary key columns:: create unique index pkey on person(firstname, lastname) Effectively will enforce a primary key constraint for (firstname, lastname) on the person table. What about NULLs, Triggers and other missing stuff? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The present release opted not to add missing standard or non-standard features that were likely to cause major modifications to large sections of the implementation, and therefore were likely to introduce bugs. Where is the LIKE predicate? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The LIKE predicate for string matching is still not supported at the SQL level. For what it's worth, it is easy to use Python's string matching (regex, re, string.search, etcetera) facilities on the result of a query. Also, for what it's worth, since the gadfly optimizer won't easily be able to optimize for string matching the "by hand" method would essentially be what gadfly would do anyway, without major modifications to the implementation. After a crash Gadfly won't recover! Help! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This shouldn't happen, but did happen (at least once) for a previous release. If it happens again, you can explicitly delete the log files from the database directory in order to recover the database to a state which may or may not correspond to the state of the database at the second to last commit/checkpoint operation. It is possible, but not likely, that the database state will include some but not all updates from the last commit, but, to repeat, it shouldn't happen. Please report the problem if it occurs again. gadfly-1.0.0/doc/gadfly.txt0100644000157700012320000004462607465610515014567 0ustar rjonestechGadfly: SQL Relational Database in Python ========================================= :Version: $Revision: 1.2 $ .. contents:: What is it? ----------- Gadfly is a collection of python modules that provides relational database functionality entirely implemented in Python. It supports a subset of the intergalactic standard RDBMS Structured Query Language SQL. One of the most compelling aspects of Gadfly is that it runs whereever Python runs and supports client/server on any platform that supports the standard Python socket interface. Even the file formats used by Gadfly for storage are cross-platform -- a gadfly database directory can be moved from Win95 to Linux using a binary copying mechanism and gadfly will read and run the database. It supports persistent databases consisting of a collection of structured tables with indices, and a large subset of SQL for accessing and modifying those tables. It supports a log based recovery protocol which allows committed operations of a database to be recovered even if the database was not shut down in a proper manner (ie, in the event of a CPU or software crash, [but not in the event of a disk crash]). It also supports a TCP/IP Client/Server mode where remote clients can access a Gadfly database over a TCP/IP network (such as the Internet) subject to configurable security mechanisms. Because it lacks (at this time) true concurrency control, and file-system based indexing it is not appropriate for very large multiprocess transaction based systems. Since Gadfly depends intimately on the kwParsing package it is distributed as part of the kwParsing package, under the same generous copyright. Why? ---- Gadfly allows Python programs a convenient way to store, retrieve and query tabular data without having to rely on any external database engine or package. That is, Gadfly provides a simple, easy, and relatively efficient in-memory relational database style engine for Python programs, complete with a notion of a "committed, recoverable transaction" and "aborts". Looking at the /etc directory in unix or at the Registry under win32 or at the buzzillions of configuration files one find sitting around file systems it becomes apparent that modern programs depend heavily on tabular data. Furthermore as memory prices continue to drop and inexpensive machines keep growing larger and larger memory capacity it is clear that more and more database-style work can be done on largish data sets in memory, and hence a simple in-memory SQL implementation like Gadfly may be useful for serious work. Gadfly uses relational style representations and the SQL query language primarily because these are widely understood and familiar to many programmers. SQL can't do everything, but it is successful in part because it can do a lot of important things easily and well. (Python can do everything else...) Use --- The main "gadfly" module attempts to faithfully adhere to Greg Stein's Python Database API, as discussed and certified by the Python DB-SIG. That said, some of the API that I didn't really understand is not implemented yet. Please look to gadfly.py to determine exactly what parts of the interface are implemented or stubbed. Concurrent database updates are not supported. The "databases" are currently designed to be written/modified by one process in isolation. Multiple processes can access a Gadfly database when accesses are arbitrated by a tcp/ip Gadfly server process. Creating a new database ~~~~~~~~~~~~~~~~~~~~~~~ Unlike most Python/database-engine interfaces you must create a Gadfly database using Python (whereas with Oracle you'd use other tools, for example). To accomplish this use:: import gadfly connection = gadfly.gadfly() with no arguments and then startup a database using the startup method:: connection.startup("mydatabase", "mydirectory") Here "mydirectory" must be a directory which exists and which can be written to in order to store the database files. The startup will create some files in "mydirectory". This will have the effect of clobbering any existing Gadfly database called "mydatabase" in the directory "mydirectory". Gadfly will prevent you from starting up the same connection twice, however. Note that the first "import gadfly" reads in and initializes some rather large data structures used for parsing SQL, and thus may take longer than other module imports. Now with your new database you can create tables, populate them, and commit the result when you are happy:: cursor = connection.cursor() cursor.execute("create table ph (nm varchar, ph varchar)") cursor.execute("insert into ph(nm, ph) values ('arw', '3367')") cursor.execute("select * from ph") for x in cursor.fetchall(): print x # prints ('arw', '3367') connection.commit() Reconnecting to an existing database ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Once a database exists you can reconnect to it as follows:: import gadfly connection = gadfly.gadfly("mydatabase", "mydirectory") This will read in the database tables with the most recently committed values. The initialized database may now be queried and updated:: cursor = connection.cursor() cursor.execute("update ph set nm='aaron' where nm='arw'") cursor.execute("select * from ph") for x in cursor.fetchall(): print x # prints ('aaron', '3367') If you do not wish to commit updates you may simply not execute a commit on the connection object (which writes out the tables). If you wish to restore the old values from the existing database use:: connection.abort() Updates are only stored upon a connection.commit(). [Actually, if autocheckpoint is disabled, updates are only stored to table files on checkpoint -- see the documentation on the recovery mechanism.] Use:: print cursor.pp() to "pretty print" the result of any evaluation (which might be None for a non-select). Features -------- In this version all tables are read into memory upon "connecting" to the database and "touched" tables are written out upon checkpoint. Each table is represented as a separate file in the destination directory, and there is a "data definition" file as well (a list of data definition declarations). During active use a log file appears int the active directory as well, and if the process crashes this log file is used to recover committed operations. The SELECT statement ~~~~~~~~~~~~~~~~~~~~ At this point Gadfly supports quite a lot of the SQL semantics requested in the ODBC 2.0 specification. Please see the SQL constructs page for a more detailed presentation. SQL statements supported include the SELECT:: SELECT [DISTINCT|ALL] expressions or * FROM tables [WHERE condition] [GROUP BY group-expressions] [HAVING aggregate-condition] [union-clause] [ORDER BY columns] This statement is quite powerful. It reads intuitively as follows: 1) Make all combinations of rows from the tables (FROM line) 2) Eliminate those combinations not satisfying condition (WHERE line) 3) (if GROUP present) form aggregate groups that match on group-expressions 4) (if HAVING present) eliminate aggregate groups that don't satisfy the aggregate-condition. 5) compute the columns to keep (SELECT line). 6) (if union-clause present) combine (union, difference, intersect) the result with the result of another select statement. 7) if DISTINCT, throw out redundant entries. 8) (if ORDER present) order the result by the columns (ascending or descending as specified, with precedence as listed). The actual implementation in gadfly is much more optimal than the intuitive reading, particularly at steps 1 and 2 (which are combined via optimizing transformations and hash join algorithms). Conditions may include equalities, and inequalities of expressions. Conditions may also be combined using AND, OR, NOT. Expressions include column names, constants, and standard arithmetic operations over them. Embedded queries supported include subquery expressions, expr IN (subselect), quantified comparisons, and the EXISTS (subselect) predicate. Aggregate tests and computations can only be applied after the GROUPing and before the columns are selected (steps 3,4,5). Aggregate operations include COUNT(*), COUNT(expression), AVG(expression), SUM(expression), MAX(expression), MIN(expression), and the non-standard MEDIAN(expression). These may be applied to DISTINCT values (throwing out redundancies, as in COUNT(DISTINCT drinker). if no GROUPing is present the aggregate computations apply to the entire result after step 2. There is much more to know about the SELECT statement. The test suite ``test/test_gadfly.py`` gives numerous examples of SELECT statements. Please examine gadfly/grammar.py for a precise definition of the supported syntax. Please find any of the 500 books on SQL for a description of the meaning of these constructs. Please inform me if any of them give the wrong result when executed in Gadfly! Table creation and "data types" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Create tables using the CREATE TABLE statement:: CREATE TABLE name (colname datatype [, colname datatype...]) Data types currently "supported" are integer, float, and varchar. They are ignored by the implementation, anything that is hashable and marshallable can currently go in any column (but that is likely to change). For example:: create table frequents (drinker varchar, bar varchar, perweek integer) At present you can put tuples, complexes, or anything else into a column specified as "varchar". Don't count on that always being true, please. Other supported statements ~~~~~~~~~~~~~~~~~~~~~~~~~~ Gadfly also supports the searched DELETE and UPDATE; INSERT VALUES and INSERT subselect; CREATE/DROP INDEX, and DROP TABLE. These have the informal syntax:: DELETE FROM table WHERE condition UPDATE table SET col=expr [, col=expr...] WHERE condition INSERT INTO table [(column [, column...])] values (value [, value...]) INSERT INTO table [(column [, column...])] subselect CREATE [UNIQUE] INDEX name ON table (column [, column...]) DROP TABLE table DROP INDEX name Again, see the test/test_gadfly.py file for examples like:: delete from templikes where be='rollingrock' update templikes set dr='norman' where dr='norm' insert into ph(nm,ph) values ('nan', '0356') insert into templikes(dr, be) select drinker, beer from likes create index sbb on serves (beer, bar) drop table templikes drop index tdindex Multiple statements may be executed in one cursor.execute(S) by separating the statements with semicolons in S, for example S might have the string value:: drop index tdindex; drop table templikes (no final semicolon please!). Please see gftest.py for examples of most of these. Remember that SQL is case insensitive (capitalization of keywords doesn't matter). Please see sqlgram.py for a precise definition of all supported constructs Dynamic Values ~~~~~~~~~~~~~~ Expressions also include the special expression '?' (the ODBC-style dynamic expression) as in:: insertstat = "insert into ph(nm,ph) values (?, ?)" cursor.execute(insertstat, ('nan', "0356")) cursor.execute(insertstat, ('bill', "2356")) cursor.execute(insertstat, ('tom', "4356")) Dynamic values allow the cursor to use the same parsed expression many times for a similar operation. Above the insertstat is parsed and bound to the database only once. Using dynamic attributes should speed up accesses. Thus the above should run much faster than the equivalent:: cursor.execute("insert into ph(nm,ph) values ('nan', '0356')"); cursor.execute("insert into ph(nm,ph) values ('bill', '2356')"); cursor.execute("insert into ph(nm,ph) values ('tom', '4356')"); Dynamic attributes can appear in other statements containing expressions (such as SELECTs, UPDATEs and DELETEs too). For SELECT, UPDATE, and DELETE the dynamic expression substitutions must consist of a single tuple, as in:: stat = "select * from ph where nm=?" cursor.execute(stat, ("nan",)) ... cursor.execute(stat, ("bob",)) ... Since the dynamic substitution eliminates the need for parsing and binding (expensive operations!) the above should run faster than the equivalent:: cursor.execute("select * from ph where nm='nan'") ... cursor.execute("select * from ph where nm='bob'") ... If you repeat several similar queries multiple times, associate each query "template string" with a unique cursor object so that each template must be parsed and bound only once. Note that some relatively complex queries from the test suite run 2 to 3 times faster after they have been parsed and bound, even without the kjbuckets builtin. With kjbuckets the same ran 5 to 10 times faster. Multiple Batch Inserts and Dynamic Values ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For the special case of INSERT VALUES a list of substitution tuples allows the query engine to perform the inserts in optimized batch mode. Thus the fastest way to perform the three inserts given earlier is:: data = [('nan', "0356")), ('bill', "2356"), ('tom', "4356")] stat = "insert into ph(nm,ph) values (?, ?)" cursor.execute(stat, data) ...and it would be even faster if the cursor had previously executed the stat with different data (since then no parsing or binding would occur). Introspection ~~~~~~~~~~~~~ By default a gadfly database now automatically includes "introspective" tables which allow a gadfly query to "query the shape of the database" -- for example to view table names and names of rows in tables:: >>> g = gadfly() >>> g.startup("dbtest", "dbtest") >>> c = g.cursor() >>> c.execute("select * from __table_names__") >>> print c.pp() IS_VIEW | TABLE_NAME ========================= 1 | __TABLE_NAMES__ 1 | DUAL 1 | __DATADEFS__ 1 | __COLUMNS__ 1 | __INDICES__ 1 | __INDEXCOLS__ Here DUAL is a standard one row/one column test table (from the Oracle tradition) and the other tables provide information about the database schema:: >>> c.execute("create table t1 (a varchar, b varchar)") >>> c.execute("create table t2 (b varchar, c varchar)") >>> c.execute("create unique index t1a on t1(a)") >>> c.execute("create index t1b on t1(b)") >>> c.execute("select * from __table_names__") >>> print c.pp() IS_VIEW | TABLE_NAME ========================= 0 | T1 1 | __DATADEFS__ 1 | __INDICES__ 0 | T2 1 | __TABLE_NAMES__ 1 | __COLUMNS__ 1 | DUAL 1 | __INDEXCOLS__ >>> c.execute("select * from __columns__") >>> print c.pp() COLUMN_NAME | TABLE_NAME ============================= A | T1 B | T1 NAME | __DATADEFS__ DEFN | __DATADEFS__ INDEX_NAME | __INDICES__ TABLE_NAME | __INDICES__ IS_UNIQUE | __INDICES__ TABLE_NAME | __TABLE_NAMES__ IS_VIEW | __TABLE_NAMES__ B | T2 C | T2 COLUMN1 | DUAL TABLE_NAME | __COLUMNS__ COLUMN_NAME | __COLUMNS__ INDEX_NAME | __INDEXCOLS__ COLUMN_NAME | __INDEXCOLS__ >>> c.execute("select * from __indices__") >>> print c.pp() IS_UNIQUE | TABLE_NAME | INDEX_NAME =================================== 0 | T1 | T1B 1 | T1 | T1A >>> c.execute("select * from __indexcols__") >>> print c.pp() COLUMN_NAME | INDEX_NAME ======================== B | T1B A | T1A >>> c.execute("select * from dual") >>> print c.pp() COLUMN1 ======= 0 Interactive testing ~~~~~~~~~~~~~~~~~~~ After installation, you may interactively test the created database from the same directory using the interactive interpreter, for example, as in:: Python 2.1.3 (#1, Apr 30 2002, 19:37:40) [GCC 2.96 20000731 (Red Hat Linux 7.1 2.96-96)] on linux2 Type "copyright", "credits" or "license" for more information. >>> >>> from gadfly import gadfly >>> connection = gadfly("test", "dbtest") >>> cursor = connection.cursor() >>> cursor.execute("select * from frequents") >>> cursor.description (('DRINKER', None, None, None, None, None, None), ('PERWEEK', None, None, None, None, None, None), ('BAR', None, None, None, None, None, None)) >>> print cursor.pp() DRINKER | PERWEEK | BAR ============================ adam | 1 | lolas woody | 5 | cheers sam | 5 | cheers norm | 3 | cheers wilt | 2 | joes norm | 1 | joes lola | 6 | lolas norm | 2 | lolas woody | 1 | lolas pierre | 0 | frankies >>> Architecture ------------ The SQL grammar is described in grammar.py, the binding of the grammar constructs to semantic objects is performed in bindings.py, the semantic objects and their execution strategies is defined in semantics.py. The semantics use a lot of classical and non-classical logic (cylindric logic, to be precise) as well as optimization heuristics to define a relatively efficient and hopefully correct implementation of SQL. I recommend the brave have a look at semantics.py for a look into the 12 years of research into databases, logic, and programming languages that contributed bits to this work. The underlying logic (in a different framework) is given in A. Watters, "Interpreting a Reconstructed Relational Calculus", ACM SIGMOD Proceedings, 1993, Washington DC, pp. 367-376. The most basic data structures of the implementation are given in either kjbuckets0.py or the faster kjbucketsmodule.c, which implement the same data type signatures in Python and in a C extension to Python respectively. The database.py module is a simple wrapper that provides a standard DBAPI interface to the system. The test suite test/test_gadfly.py attempts to provide a regression test and a demonstration of the system. The SQL parser also requires the kwParsing parser generation package, which consists of a number of additional python modules. Comments -------- Please find bugs and report them to us at http://gadfly.sf.net/. The query engine should run faster if you have the builtin (C) module kjbuckets installed. Otherwise it will use a "python imitation" kjbuckets0.py. In one test the test suite ran two times faster using kjbuckets. I suspect it will have a higher payoff for larger data sets. gadfly-1.0.0/doc/gfplus.txt0100644000157700012320000000566107467324627014625 0ustar rjonestech=================================================== gfplus -- An interactive shell for the Gadfly RDBMS =================================================== Introduction ------------ gfplus was designed to allow interactive sessions to Gadfly databases. It is molded in the fashion of SQL*Plus, a similar tool for Oracle databases. gfplus allows you to type SQL directly to your database, and interactively modify your data or database structure. gfplus requires gadfly to be installed. Features -------- - Interactively query your database with SQL - Supports both local and Client/Server databases - When avaialble, uses full command line editing, and command recall - Can be used in a "pipeline" or take its input from a script - Full commit/rollback capability - Create new Gadfly databases with ease - Easily access column listings for all relations - Switch between Gadfly databases in one session - Interactive help menu, by typing "help;" Instructions ------------ gfplus runs on any platform where Python can run and Gadfly is installed. To execute it on a standalone database, you can just run:: > python gfplus.py [db] [location] gfplus takes two optional arguments. The first is the name of the database to open. The second is the location of the database, on disk. If you do not provide these parameters, gfplus will prompt you for them. If you give a name and location which gfplus cannot find, it will allow you to create a database with those parameters:: > python gfplus.py bar /u/jeff/gf/ gfplus 1.0 -- Interactive gadfly shell Using: DB: bar Loc: /u/jeff/gf/ Unable to locate database "bar" at location "/u/jeff/gf/". Create? (Yy/Nn) All gfplus statements are terminated with a semicolon (;). Statements can span multiple lines. A sample session, where I edit the foo database, stored in the directory /u/jeff/gf/:: gfplus 1.0 -- Interactive gadfly shell Using: DB: foo Loc: /u/jeff/gf/ GF> select * from rush where tracks < 6; TRACKS | ALBUM ==================== 4 | Hemispheres GF> select * from rush; TRACKS | ALBUM ============================ 8 | Fly By Night 6 | Caress of Steel 4 | Hemispheres 6 | A Farewell To Kings 6 | Permanent Waves GF> update rush set tracks = 5 _GF> where album = '2112'; GF> select * from rush where tracks < 6; TRACKS | ALBUM ==================== 5 | 2112 4 | Hemispheres GF> commit; GF> desc rush; COLUMN_NAME =========== ALBUM TRACKS GF> exit; To use gfplus with a Client/Server instance, start gfplus with the same arguments you would pass to gfclient:: > python gfplus.py admin admin localhost 2222 gfplus 1.1 -- Interactive gadfly shell [readline] Tuesday January 19, 1999 09:22 AM Using: Policy: admin Loc: localhost:2222 GF> gadfly-1.0.0/doc/index.txt0100644000157700012320000000630207467344413014420 0ustar rjonestech=========================================== GadflyB5: SQL Relational Database in Python =========================================== Gadfly is a simple relational database system implemented in Python based on the SQL Structured Query Language. The package requires installation before use, please see the section on installation_. In addition to this general documentation, please see the Gadfly `frequently asked questions`_. Contents ======== - Installation_ - Overview_ - gfplus_, the command-line SQL tool - SQL_ details - `Frequently Asked Questions`_ - kwParsing_ (core parsing engine) - kjBuckets_ (core storage types) - `Database Recovery`_ - `Network Setup`_ - `About the New Gadfly Relase, GadflyB5`_ - Contact_ - Acknowledgements_ - License_ - `Gadfly project page`_, including downloads and bug tracker. .. _installation: installation.html .. _overview: gadfly.html .. _gfplus: gfplus.html .. _sql: sql.html .. _`frequently asked questions`: faq.html .. _kjbuckets: kjbuckets.html .. _kwparsing: kwParsing.html .. _`database recovery`: recover.html .. _`network setup`: network.html .. _`gadfly project page`: http://sourceforge.net/projects/gadfly/ About the New Gadfly Relase, GadflyB5 ===================================== Codename: GadflyB5 (a much better series than TNG ;) See the announcement_ text for more info. .. _announcement: announcement.html Contact ======= The primary contact for this project is Richard Jones . There is no ongoing support available for usage, unless someone volunteers. If you have found a bug, please submit an issue to the bug tracker at: https://sourceforge.net/tracker/?atid=100662&group_id=662 Please include information about your system, your python version and the traceback of the error you encountered. If you've got a great idea for gadfly, and have the time to work on it, please contact the gadfly project admins. Acknowledgements ================ Aaron Watters, for writing the whole thing in the first place. You rock! For contributions over time: the `Zope Corporation`_ team, Berthold Hoellmann, Oleg Broytmann, Adnan Merican, spex66, jfarr, Anthony Baxter, Lars M. Garshol, Marc Risney, Jeff Berliner. .. _`Zope Corporation`: http://www.zope.com/ License ======= The gadfly and kjbuckets source is copyrighted, but you can freely use and copy it as long as you don't change or remove the copyright: Copyright Aaron Robert Watters, 1994 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies and that both that copyright notice and this permission notice appear in supporting documentation. AARON ROBERT WATTERS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL AARON ROBERT WATTERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. gadfly-1.0.0/doc/installation.txt0100644000157700012320000000614507512761366016020 0ustar rjonestechGadfly: SQL Relational Database in Python ========================================= :Version: $Revision: 1.5 $ Installation ------------ To guarantee correct installation, please follow the steps in this document. Of course you must have Python in order to use this package! Python can be obtained from the `Python project page`_ .. _`Python project page`: http://www.python.org/ Note: this package requires Python 2.1 or later. If your python2.1 executable is called something other than "python" then substitute it below (for example, on RedHat 6.x/7.x systems, python version 2+ is called "python2"). On Windows, make sure that the Python directory is in your path. The easiest way to check this is to start a command prompt (or DOS) window and type "python". If you then see some output like;:: Python 2.2 (#28, Dec 21 2001, 12:21:22) [MSC 32 bit (Intel)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> You have Python on your path and may continue. If you get a message saying "not found" or "'python' is not recognized as an internal or external command" then you may not have the right directory in your path. If you are reading these instructions we presume that you have already found the gadfly module and downloaded it to your machine. If you haven't go and get the appropriate file from the web site (http://sourceforge.net/project/showfiles.php?group_id=662) before proceeding. 0) If you've got a previous version of gadfly installed on your system, you may need to remove it since the packaging of gadfly has changed. Look in the site-packages directory of your python installation's "lib" directory. If there are any directories under there called "Gadfly" or "gadfly" then you have an existing version of the module and should rename or delete that directory. 1) Unpack the package. 2) Test: In the package directory, run the command:: % python run_tests and all tests should pass. 3) Install: In the package directory, run the command:: % python setup.py install This installs the gadfly module, and after this command, you're ready to go. Have a look at the overview_ for details of how to use gadfly. 4) If you wish to speed up gadfly you can use the kjbuckets_ parser, go to the kjbuckets directory and a) if you have a C compiler - run the command "python setup.py install" - the kjbuckets extenstion could compile and install b) if you're on Windows, - cd to kjbuckets/ - copy the .pyd file to your the library directory appropriate to your python version. This should be something like /usr/lib/python/site-packages on \*nix, c:\\python on Windows (prior to Python2.2) or c:\\python\\Lib\\site-packages if you are on Python2.2 or later .. _overview: gadfly.html .. _kjbuckets: kjbuckets.html If for some reason you are stuck with using Python 2.0, you will need to patch the gadly/semantics.py file with the patch in kjbuckets/2.0. This is risky and we recommend upgrading to Python 2.1 or 2.2 instead. gadfly-1.0.0/doc/kjbuckets.txt0100644000157700012320000007107007465430476015305 0ustar rjonestechSet and Graph Datatypes for Python: kjbuckets Release 2.2 ========================================================= :Author: Aaron Watters :Organization: Computer and Information Sciences, New Jersey Institute of Technology, University Heights, Newark, NJ, 07102 (address obsolescent). :Version: $Revision: 1.1.1.1 $ :Abstract: This is the documentation for the kjbuckets C extension to Python (second release), which defines graph and set datatypes as well as an alternative dictionary data type. These types are tightly coupled at the level of C, allowing fast and powerful algebraic combinations of container objects. .. Note: this text is a conversion from the TeX original source. This is modified version of kjbuckets. Modifications are: - updated for Python 2.0 (Berthold Hoellmann ) - now have Makefile.in/Setup for old-style compilation/configuration - now have setup.py for new-style compilation/configuration (Distutils) (Oleg Broytmann ) - kjbuckets.pyd Windows DLL for Python 2.0 - sqlsem.py patch to use it with kjbuckets.pyd (Adnan Merican ) - kjbuckets.pyd Windows DLL for Python 2.1 - kjbucketsmodule.c patches for Python 2.1 (ActiveState 2.1.1, actually) (spex66 ) - kjbuckets.pyd Windows DLL for Python 2.2 (jfarr" ) .. contents:: Introduction ------------ The kjbuckets module defines three data types for Python: kjSet, kjGraph, and kjDict. These types come with a number of associated methods, including common set theoretical operations such as union, intersection, difference, composition, transposition, reachability sets, and transitive closure. For suitably large compute intensive uses these types should provide up to an order of magnitude speedup versus an implementation that uses analogous operations implemented directly in Python. The following discussion assumes the kjbuckets module has been compiled and installed in the Python executable. For information on how to perform such an installation, see the Python extensions manual that comes with the Python distribution. Release 2.2 contains a number of goodies not documented here. If you want, you can try to figure them out from looking at the code! Release 2.1 had a problem linking under Python 1.2. This has been fixed in 2.2. The Types --------- This module defines three types kjSets are initialized using the function kjbuckets.kjSet(). They are containers for Python hashable objects with no significance to redundancy and no order to members. For example _[#]:: >>> from kjbuckets import * >>> X = kjSet([1,2,3,3,5,4]); print X kjSet([1, 4, 3, 2, 5]) >>> Y = kjSet([5,5,3,3,2,1,4,4,4]); print Y kjSet([1, 4, 3, 5, 2]) >>> X == Y 1 kjGraphs are initialized using the function kjbuckets.kjGraph(). They relate Python hashable objects to other objects, with no significance to order or redundancies on the pairings. Technically, kjGraph defines a directed graph abstract data type. For example:: >>> G1 = kjGraph([(1,1),(1,2),(2,4),(9,6),(2,4)]); print G1 kjGraph([(1, 1), (1, 2), (9, 6), (2, 4)]) >>> G1.reachable(1) kjSet([1, 4, 2]) kjDicts are initialized using the function kjbuckets.kjDict(). They map hashable objects to other objects, in a manner similar to the Python builtin Dictionary data type, except that the kjbucket implementation is slower. That is, it is slower if you use it just like another Python dictionary. It's a lot faster if you want to do compositions, intersections, and so forth using dictionaries. And with the new release the speed difference is not so great anymore -- about 20% slower on comparable operations -- and kjDict's tend to use less space than Python dictionaries for the same contents. Example:: >>> D = kjDict([(1,1),(1,2),(2,4),(9,6),(2,4)]); print D kjDict([(1, 2), (9, 6), (2, 4)]) >>> D * D kjDict([(1, 4)]) .. [#] Most of the examples given here use numeric elements for ease of presentation, which is bad because it's boring. It's also bad because it leaves the impression that only simple things can be archived -- which is wrong. Remember that keys may be any hashable type (which even includes user defined classes which have a hash method defined), and for dictionaries and graphs the left members may be any Python object whatsoever. Initialization functions ------------------------ Each of the initialization functions accept four possible argument sequences: No argument: Results in the creation of a smallest empty object of the requested type. For example kjSet(), creates the smallest possible empty kjSet. Contents list or tuple: As illustrated above, the structures may be initialized with a list or tuple of contents, where the elements of the sequence are tuples of form (hashable object, object) pairs for kjDicts and kjGraphs and just hashable objects for kjSets. The examples given here use lists as the top level structure for the sequence initialization form, but you can also use tuples. For example as in:: >>> kjDict( ( (1,2), (2,3), (2,4), (3,4) ) ) kjDict([(1, 2), (2, 4), (3, 4)]) >>> kjSet( (9,2,1,9,8,7,6,4) ) kjSet([9, 6, 1, 7, 4, 2, 8]) In the case of kjDicts if there are key collisions the resulting kjDict may be dirty. Other kjTable: If the initializer argument is another kjTable the result will be the input table "coerced" to the other type (or if the types match you will get "first-level" copy of the table. The new object will be a distinct table which shares object references with the input table. For example:: >>> G kjGraph([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 5), (1, 0), (2, 1)]) >>> kjDict(G) kjDict([(0, 5), (1, 0), (2, 1), (3, 3), (4, 4)]) >>> kjSet(G) kjSet([0, 1, 2, 3, 4]) >>> G2=kjGraph(G) >>> G2 kjGraph([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 5), (1, 0), (2, 1)]) >>> G[12]=3 >>> G kjGraph([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 5), (1, 0), (2, 1), (12, 3)]) >>> G2 kjGraph([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (0, 5), (1, 0), (2, 1)]) Coercing a graph to a dictionary where the graph maps the same object to several objects will produce a "dirty" dictionary with key collisions decided arbitrarily. Coercing a set to a graph or dictionary produces an "identity" containing (x,x) for each element x of the Set. Coercing a graph or dictionary to a set produces the set of keys (left members) from the graph or dictionary. To get the "set of arcs" from a graph use kjSet(G.items()) instead of kjSet(G). Number: This option refers to the internal implementation of the types. Internally these types are implemented using arrays. Sometimes these arrays need to be resized to a larger size before an insert can complete. By initializing using a single integer argument n, you request that the structure be large enough that no resize will be needed until after n inserts (in the absense of deletions). For example S = kjSet(1000) initializes a set that will not need to be resized until after 1000 inserts have completed. However, since deletes sometimes trigger the array to resize to a smaller size, deleting an element from S before insert number 1000 may make resizing necessary anyway. Them's the breaks. Using this option may save some time and prevent some unnecessary memory fragmentation, when the programmer can determine (or guess) the expected number of insertions, a priori. There is a peculiar way to initialize a kjDict:: >>> kjUndump(("name","age"), ("aaron",12)) kjDict([('name', 'aaron'), ('age', 12)]) >>> kjUndump(("ssnum",),"123456789") kjDict([('ssnum', '123456789')]) This is a parallel operation to kjDict.dump which together are designed to make it easy to pack and unpack information from kjDicts, in particular for constructing database-style indices. There are two behaviors for this function. Called with arguments of form:: kjUndump( (key,), map ) (ie, the first argument is a tuple of length one and map is any object) the result is the same as:: kjDict( [ (key, map) ] ) Alternatively, called with two tuples of the same length with lengths larger than 1 the invocation:: kjUndump( (k1, k2, ..., kn), (m1, m2, ..., mn) ) produces the same result as:: kjDict( [ (k1,m1), (k2,m2), ..., (kn, mn) ] ) If the same key is mentioned twice in the first argument and the corresponding values in the second argument are not equal the result will be a dirty dictionary. Dirtiness --------- A table which has had a non-monotone update (ie, a deletion or a dictionary overwrite) is said to be "dirty." In particular any deletion makes a table dirty; and coercing a graph to a dictionary, or transposing a dictionary, or unioning a set or dictionary with a dictionary will produce dirty dictionaries if the computation results in any key collisions. To test whether a table is dirty use the X.Clean() method which produces X if X is clean, otherwise None. For example:: >>> G = kjGraph([(0, 0), (0, 1), (1, 4), (9, 9), (2, 5)]) >>> D = kjDict(G); print D; print D.Clean() kjDict([(0, 1), (1, 4), (9, 9), (2, 5)]) None >>> D2 = kjDict(D); print D2.Clean() kjDict([(0, 1), (1, 4), (9, 9), (2, 5)]) Here D is dirty because the coercion from a graph resulted in key collisions on 0, but the fresh copy D2 is not dirty. The result of an algebraic expression involving a dirty table will be dirty also, for example:: >>> D3 = D2 * D >>> print D3, D3.Clean() kjDict([(0, 4), (9, 9)]) None Note that, for example kjDict([(1,2),(1,3)]) will be dirty but kjDict([(1,2),(1,2)]) is not, i.e., inserting the same pair twice is not considered a collision. These types have a number of associated methods, operations, and accessors. For the purposes of discussion assume that S is a kjSet, D is a kjDict, and G is a kjGraph in the remainder. Furthermore assume X is an object of any of these types. Methods ------- There are a number of methods associated with each member of these types. S.member(ob), D.member(arg,map), G.member(src,dst) respectively are membership tests for the types. Each returns 1 if the object or pair are members of the structure or 0 otherwise. S.add(ob), D.add(arg,map), G.add(src,dst) respectively add new members to the object. These are equivalent to G[src]=dst, D[arg]=map, S[ob]=1 but the former may be preferrable for graphs and sets since they are less misleading. This is an "in place" mutation operation -- it will raise an error if the object has been hashed. D.delete_arc(arg,map), G.delete_arc(src,dst) respectively delete a pair from the structure or raise an error if the pair is not found. This is an "in place" mutation operation -- it will raise an error if the object has been hashed. X.has_key(key) determines whether a given key value occurs in the structure. In the case of sets this is identical to the membership test. In the case of dictionaries and graphs the function tests whether key occurs as a left member of some pair in the structure and returns 1 if so, otherwise 0. X.choose_key() selects an arbitrary key from the structure. In the case of sets it returns an arbitrary member of the set. In the case of graphs and dictionaries it picks an arbitrary left member of a pair in the structure. This operation is useful for algorithms that begin "pick an arbitrary node of the graph..." This method is "nondeterministic" in the sense that tables with the same members may choose different keys. X.subset(Y) determines whether X is a subset of Y. Returns 1 if so, else 0. X and Y may be of different types but may be confusing if one argument is a set and the other is not. If X is a set and Y is a graph or dictionary then subset will succeed if and only if Y contains (e,e) for each member e of X. If Y is a set and X is a graph or dictionary then subset will succeed if and only if every key of X is a member of Y. G.neighbors(key) returns a list of the objects y where (key, y) is a member of G. For example:: >>> G = kjGraph([(0, 0), (1, 1), (0, 4), (1, 5), (2, 2), (2, 6)]) >>> G.neighbors(1) [1, 5] If the key is absent from the table the result will be the empty list. This method is also defined for dictionaries, where the only possible results are a unary list if the key is present or an empty list if the key is absent. G.reachable(key) returns a kjSet of objects reachable on any path in the graph that begins at key. The key itself will occur in the result only if it lies on a loop of the graph. For example:: >>> G = kjGraph([(1, 0), (4, 1), (0, 2), (3, 2), (6, 3), (2, 4), (5, 0)]) >>> G.reachable(5) kjSet([0, 4, 1, 2]) Again this method is also defined for dictionaries. The method returns a kjSet rather than a list because this made sense to me at the time. X.items() returns a list of the members of the structure. For example:: >>> X = kjSet([0, 1, 2, 0, 1]) >>> X.items() [1, 0, 2] >>> X = kjGraph([(3, 0), (2, 2), (1, 2), (2, 0), (2, 0), (3, 0)]) >>> X.items() [(1, 2), (3, 0), (2, 2), (2, 0)] G.keys(), G.values() return the left members and right members of pairs in the graph G respectively. For example:: >>> G = kjGraph([(4, 8), (0, 9), (1, 10), (4, 9), (3, 7), (3, 8), (2, >>> 7)]) >>> G.keys() [4, 0, 1, 3, 2] >>> G.values() [8, 9, 10, 9, 7, 8, 7] Note that keys eliminates redundancies, whereas values does not. These functions are also defined for dictionaries but are not defined for sets. S.ident() generates an "identity dictionary" from the set S, the graph containing exactly those members (x,x) where x is a member of S. For example, the following calculation determines the "self-loop" elements of G:: >>> G kjGraph([(0, 0), (0, 3), (0, 2), (1, 4), (9, 9), (2, 5)]) >>> I = kjSet(G).ident() >>> I & G kjGraph([(0, 0), (9, 9)]) (In the previous release ident produced a graph, but now that the algebraic operators have been generalized I opted to produce the more specific dictionary type. This operation is now redundant since it is the same as kjDict(S).) G.tclosure() generates the transitive closure graph derived from the graph G. For example:: >>> G = kjGraph([(1, 3), (4, 1), (3, 0), (3, 1)]) >>> G.tclosure() kjGraph([(1, 3), (4, 1), (1, 0), (1, 1), (4, 3), (3, 0), (3, 1), (3, 3), (4, 0)] X.Clean() produces None if table X has experienced a non-monotone update (a deletion or a dictionary key collision) or was algebraically derived from a table that had experienced a non-monotone update, in all other cases it returns the table X itself. This is particularly useful for testing whether the unions of dictionaries or the transpose of a dictionary was unambiguous. For example:: >>> D = kjDict([('name', 'A. Watters'), ('ssn', 123)]) >>> D2 = kjDict([('ssn', 999), ('salary', 9000000)]) >>> D3 = D + D2; print D3 kjDict([('name', 'A. Watters'), ('ssn', 999), ('salary', 9000000)]) if D3.Clean() != None: ... print D3["name"], " makes ", D3["salary"] ... else: ... print "ambiguous dictionary union" ... ambiguous dictionary union Relational natural join anyone? X.Wash(), X.Soil() force a table to appear to be clean or dirty respectively, both returning None. Included for completeness. D.remap(X) produces a dictionary that is the result of remapping D by X, but it produces None if the remapping causes a key collision. For example to rename keys l and f to lname and fname respectively, preserving ssn, equating ssn with enum, and disregarding all other keys for D we could write. For example:: >>> D = kjDict([("f","aaron"), ("l","watters"), ("m","robert"), ("ssn",123)] ) >>> G = kjGraph() >>> G["ssn"]="enum" >>> G = (G + ~G).tclosure() # symmetric and transitive closure >>> G["lname"] = "l"; G["fname"] = "f" >>> D.remap(G) kjDict([('enum', 123), ('ssn', 123), ('lname', 'watters'), ('fname', 'aaron')]) This may seem strange, but it can be a very useful way of transforming collections of dictionaries. This operation is exactly the same as kjDict(X*D).Clean() but faster. (I use it a lot, so I optimized it -- it can correspond to projection, equality selection, and renaming in the relational algebra). D.dump(X) packs right members of a dictionary into a compact form. This function has two behaviors:: >>> D = kjUndump(("name","age","ssn"), ("aaron",12,12345)) >>> D kjDict([('name', 'aaron'), ('age', 12), ('ssn', 12345)]) >>> D.dump(("ssn",)) 12345 >>> D.dump(("name","ssn")) ('aaron', 12345) Called with an argument of form:: D.dump( (key,) ) (ie, a tuple of length one) it produces the same result as:: D[key] Alternatively, called with an argument of form:: D.undump( (k1, k2, ..., kn) ) (ie, a tuple of length greater than one) it produces that same result as:: ( D[k1], D[k2], ..., D[kn] ) This function is the parallel operation to the dictionary initializer kjUndump, which together are designed to make it easy to pack and unpack information from kjDicts. It is also defined on graphs, in which case the choice of for the resulting mapped items may be arbitrary. len(X) return the number of entries in X (which is the number of pairs in the case of graphs or dictionaries). del X[key] deletes the key from the structure. In the case of sets, this simply removes an element. In the case of dictionaries and graphs this method removes all entries with left member key. For example:: >>> G = kjGraph([(1, 3), (4, 1), (3, 0), (3, 1)]) >>> del G[3] >>> G kjGraph([(1, 3), (4, 1)]) This is an "in place" mutation operation -- it will raise an error if the object has been hashed. Hashing ------- These types are hashable, that is, they may be used as keys in hash structures and you may apply the function hash(X) to them. The kjGraph and kjDict structures also allow hashing even if some of their right members are unhashable. The "down side" of this "hashing unhashables" feature is that if two structures of the same type only differ on their unhashable right members they will hash to the same value -- which can make hash table look-ups slow. A "rule of thumb" is to only use kjDicts and kjGraphs as keys of a hash table structure if the set of keys is expected to nearly always differ on hashable components. However, once a table's hash value has been computed for any reason, that table becomes immutable -- any attempts to mutate the structure in place (using index assignment, del, X.delete_arc, or X.add) will raise a TypeError. Other Properties ---------------- Objects of these types may be compared for equality where X==Y succeeds if and only if X and Y contain the same members. Mixed type equality comparisons between kj-tables are allowed, where if S==D succeeds if and only if D consists of the pairs (e,e) for each element e of S, and similarly for S==G. Objects of these types may also be used as booleans where only an empty structure is equivalent to false. One questionable aspect of the implementation is the use of the indexing notation. Although it may be completely avoided, both kjSets and kjGraphs allow indexing. In the case of sets S[object]=anything inserts the object as a member of the set and disregards anything, and a retrieval S[object] returns 1 if object is a member of the set or raises an key error otherwise. For example:: >>> S kjSet([1, 3, 2]) >>> S["this"] = "that" >>> S kjSet([1, 3, 2, 'this']) >>> S["this"] 1 >>> S["that"] KeyError: that In the case of graphs G[object]=map adds (object, map) as a new arc of the graph, and G[object] retrieves an arbitrary neighbor associated with object, or raises a KeyError if there is none. For example:: >>> G kjGraph([(1, 3), (4, 1)]) >>> G[1] = 9 >>> G kjGraph([(1, 3), (4, 1), (1, 9)]) >>> G[1] 3 >>> G[6] KeyError: 6 Some may find this use of indexing notation non-intuitive, but others may find it appealing, as far as I know. Index assignment is an "in place" mutation operation -- it will raise an error if the object has been hashed. Algebraic Operations -------------------- The implementation provides a number of common set theoretical operations over these structures. All the set algebraic operations are side effect free (and they may be applied to tables which have been hashed). These operations may be applied to tables with differing types, except where noted. Except for intersection and difference, a binary operation applied to objects of different types produces an object of the "more general" type, i.e, S+D produces a (possibly dirty) dictionary, S+G produces a graph, D+G produces a graph. Binary operations applied to objects of the same type produces an object of that type. Generally, when a set S is used in permitted mixed-mode algebra with a graph or a dictionary it "acts like" the identity dictionary S.ident(). The built in algebraic operations are as follows. Union produces the union of two structures of the same type, invoked using either the notation X+Y or X|Y. For example:: >>> kjGraph([(1,3), (4,1), (1,9)]) + kjSet([6,7,2]) kjGraph([(1, 3), (4, 1), (1, 9), (6, 6), (7, 7), (2, 2)]) If dictionary D1 contains (key, map1) and dictionary (or set) D2 contains (key, map2) then D1+D2 will be a dirty dictionary containing one of the pairs, but not the other. Difference produces the set difference of two structures of the same type, invoked using the notation X-Y. For example:: >>> kjSet([1,2,5,7]) - kjSet([1,2,4,8]) kjSet([7, 5]) Differences of graphs and dictionaries are allowed, where X-Y produces an object of the same type as X, but mixed differences are not allowed when one of the arguments is a set (yet). Composition with notation G1*G2 produces the graph containing (s1,d2) whenever there is an arc (s1,d1) in G1 and an arc (d1,d2) in G2}. For example:: >>> G1 = kjGraph([(0, 1), (1, 2), (3, 0), (3, 4), (2, 3)]) >>> G2 = kjGraph([(4, 0), (0, 1), (1, 2), (3, 1), (2, 0)]) >>> G1*G2 kjGraph([(0, 2), (1, 0), (3, 1), (3, 0), (2, 1)]) Any two tables can be composed, producing an object of the more general type. Composing two sets is a slower way to compute their intersection. Transposition with notation ~G produces the graph containing (d, s) if and only if G contains (s, d). For example:: >>> G = kjGraph([(0, 0), (3, 2), (6, 4), (20, 1), (23, 3), (26, 5)]) >>> ~G kjGraph([(0, 0), (4, 6), (1, 20), (3, 23), (2, 3), (5, 26)]) Transposition is defined for dictionaries, but if there are key collisions the winning pair will be decided arbitrarily and the resulting table will be dirty. For example:: >>> ~kjDict([("hello","hi"), ("hola","hi"), ("beat it","bye")]) kjDict([('bye', 'beat it'), ('hi', 'hola')]) This operation is not defined for sets. Intersection produces the set intersection of two structures invoked using the notation X&Y. For example:: >>> G = kjGraph([(0,0), (3,2), (6,4), (20,1), (23,3), (26,5), (2,23)]) >>> G & ~G.tclosure() kjGraph([(0, 0), (3, 2), (23, 3), (2, 23)]) Mixed mode intersections between graphs and dictionaries are allowed producing the less general dictionary type. Mixed mode intersections where one of the arguments is a set is not permitted. Note: The graph and dictionary operations of composition, reachability, transitive closure, and transposition assume that "right members" (values) are hashable. If any right member is not hashable these functions may raise a TypeError, for example:: >>> X = kjGraph([ (1,{}) ]) >>> ~X TypeError: unhashable type Here the empty Python dictionary is not a hashable type, so it could not be used in the transposed graph as a left member. On performance -------------- These structures use a hash table based representation which should deliver expected good performance for many applications. Nevertheless, as with all hash implementations there is a theoretical possibility of very bad worst case performance. Furthermore, inserts and deletes occasionally cause the internal structure to resize, so although the average speed for inserts and deletes is expected to be "near constant", once in a while an insert or delete may be slow. In addition, since the kjGraph implementation hashes using the left member only from each graph arc, graphs where many nodes have a very large number of neighbors may have poor access times. In this case it may appropriate to use a "set of pairs" or a "dict of sets" representation in place of a kjGraph, if this is possible, or some alternative implementation. The implementation of G.tclosure is "quick and dirty (keep it simple, stupid)" and leaves much room for speed improvements. It may be slow for large and complex graphs. If this is a problem I might be enticed to improve it, let me know. Someday I'd like to make the deletion operations faster (by a constant factor), but I'm not highly motivated here since I personally tend to build up tables without ever deleting anything. Miscellaneous comments ---------------------- Once again I'd like to commend Guido and the other Python contributors on their work. It's a delight to know that Python is nice both at the front end and at the back end. The package is written in C but descends from an ancestor (not suitable for public viewing) which was written exclusively in Python. I wrote this module (1) as an experimented in extending Python using C and (2) as an experiment in migrating a Python implementation to a C implementation. The result is a package which I hope may be useful to someone. This release is about twice as fast as previous releases thanks to permiscuous use of C macros in the implementation. Additionally, mixed-type operations, coercions, and a few additional methods have been added in this release. There is one defined constant in the C code you might want to play with: GSIZE -- the number of elements of the table heaped together in one "lump" (i.e, the size of an unordered subarray of the table). Roughly speaking if GSIZE is large the table will resize less often, and usually use space more efficiently. Generally speaking larger values will also make the accesses slower, but with a value less than around 64 this may not always be true on some machines with fancy memory caching (just guessing here, really). The default value is 6, which works pretty well on my machines. GSIZE also represents the basic size allocated for the smallest possible table, so if you expect to use lots of small sets a large GSIZE may not be advisable. Don't fiddle with the other constants unless you are willing to debug possible problems that may result. Bugs ---- Release 2 had a hole in the initializers that caused undefined behavior. It has been fixed in 2.1. Release 2.1 wouldn't link under Python 1.2. This has been fixed in 2.2. The first release would crash on certain graph operations (transpose, reachability, composition, transitive closure) applied to graphs that contained unhashable nodes. Now they raise an error instead. Previous releases also had a serious bug that sometimes corrupted the internal structure of kjSets. I don't know of any remaining "real" bugs -- the rest of this section discusses possibly confusing "features." As mentioned above in several places, structures that have been hashed may not be subsequently modified -- attempts to modify hashed structures will raise TypeError. Mixed mode differences and intersections are not allowed when one of the arguments is a set (as mentioned). Some unions and transposes on dictionaries will produce a dirty dictionary if there are key collisions, and the key collisions will be decided arbitrarily. Similarly, coercing a graph to a dictionary will produce a dirty dictionary if there are key collisions. See the section on Dirtiness above. The kjGraph implementation does not represent nodes with no edges. Programmers may work around this either by wrapping the graph in a class with a node set, or by adopting some appropriate convention that I leave to their infinitely creative imaginations. Please let me know if you find some other bug or confusing feature. At this point I consider the package to be reasonably well tested, but I offer no warrantees. gadfly-1.0.0/doc/kwParsing.txt0100644000157700012320000012055507467324766015276 0ustar rjonestechParser generator in Python: kwParsing ===================================== :Author: Aaron Watters :Version: $Revision: 1.3 $ This is the documentation for the kjParsing package, an experimental parser generator implemented in Python which generates parsers implemented in Python. It won't serve as a complete reference on programming language syntax and interpretation, but it will review terminology for the knowledgable and I hope it will pique the interest of the less experienced. Introduction ------------ The kjParsing package is a parser generator written in Python which generates parsers for use in Python. These modules and their documentation and demo files may be of use for classes on parsing, compiling, or formal languages, and may also be helpful to people who like to create experimental interpreters or translators or compilers. The package consists of three Python modules: kjParser, kjParseBuild, and kjSet. Together these modules are called the kjParsing package. The package also includes some documentation and demo files and a COPYRIGHT file which explains the conditions for copying and propagating this code and the fact that the author assumes no responsibility for any difficulties resulting from the use of this package by anyone (including himself). What a Parser Does ------------------ Parsers can be part of a lot of different things: compilers, interpreters, translators, or code generators, among others. Nevertheless, at an abstract level parsers all translate expressions of a language into computational actions. Parsers generated by the kjParseBuild module may do three different sorts of actions: Value Computation The parser may build a data structure as the result of the expression. For example the silly LispG grammar from the file "DLispShort.py" can construct integers, strings and lists from string representations. >>> from DLispShort import LispG, Context >>> LispG.DoParse1( ' ("list with string and int" 23) ', Context) ['list with string and int', 23] >>> Environment Modification The parser may modify the context of the computation. For example the LispG grammar allows the assignment of values to internal variable names. >>> LispG.DoParse1( '(setq Variable (4 5 9))', Context) [4, 5, 9] >>> Context['Variable'] [4, 5, 9] >>> (Here the second result indicates that the string 'Variable' has been associated with the value [4,5,9] in the Context structure, which in this case is a simple python dictionary.) External Side Effects The parser may also perform external actions. For example the LispG grammar has the ability to print values to the terminal. >>> LispG.DoParse1( '( (print Variable) (print "bye bye") )', Context ) [4, 5, 9] bye bye [[4, 5, 9], 'bye bye'] >>> (Here the first two lines are the results of printing and the last is the value of the expression.) More realistic parsers will perform more interesting actions, of course. To implement a parser using kjParseBuild you must define the grammar to parse and associate each rule and terminal of the grammar with an action which defines the computational meaning of each language construct. The grammar generation process consists of two phases Generation During this phase you must define the syntax of the language and function bindings that define the semantics of the language. When you've debugged the syntax and semantics you can dump the grammar object representing the syntax only to a grammar file which can be reloaded without re-analyzing the language syntax. For large grammars each regeneration may require significant time and computational resources. Use During this phase you may load the grammar file without re-analyzing the grammar on each use. However, the semantics functions must still be rebound on each load. The reloaded grammar object augmented with interpretation functions may be used to parse strings of the language. Note that the functions that define the semantics of the language are must be bound in both phases. A function for _`building a simple grammar`:: 1 # from file DLispShort.py (with small differences) 2 def buildSimpleGrammar(): 3 import kjParseBuild 4 LispG = kjParseBuild.NullCGrammar() 5 LispG.SetCaseSensitivity(0) 6 DeclareTerminals(LispG) 7 LispG.Keywords("setq print") 8 LispG.punct("().") 9 LispG.Nonterms("Value ListTail") 10 LispG.comments([LISPCOMMENTREGEX]) 11 LispG.Declarerules(GRAMMARSTRING) 12 LispG.Compile() 13 LispG.MarshalDump('testlisp_mar.py') 14 BindRules(LispG) 15 return LispG Defining a Grammar ------------------ A programming language grammar is conventionally divided into several components: Keywords These are special strings that "highlight" a language construct. Familiar keywords from Python and Pascal and C are "if", "else", and "while". Terminals These are special patterns of characters that indicate a value in the language. For example many programming languages will classify the string 123 as an instance of the integer nonterminal and the string snark (not contained in quotes) as an instance of the nonterminal identifier or variable. Terminals are usually restricted to very simple constructs like identifiers, numbers, and strings. More complex things (such as a "date" data type) might be better handled by nonterminals and rules. Nonterminals These are "place holders" for language constructs of the grammar. They represent parts of the grammar which sometimes expand to great size and complexity. For instance the C language grammar presented by Kernigan and Ritchie has a nonterminal translationUnit which represents a complete C language module, a nonterminal conditionalExpression which represents a truth valued expression of the language. Punctuations These are special characters or strings which are recognized as separate entities even if they aren't physically separated from other strings by white space. For example, most languages would "see" the string if0 as a single token (probably an identifier) even if if is a keyword, whereas if(0) would be recognized the same as if ( 0 ) because parentheses are normally considered punctuations. Except for the special treatment at recognition, punctuations are similar to keywords. The syntax of a language describes how to recognize the components of the language. To define a language syntax using kjParseBuild you must create a null compilable grammar object to contain the grammar (in `building a simple grammar`_ this is done on line 3 using the class constructor kjParseBuild.NullCGrammar() creating the grammar object LispG) and define the components of the grammar and the rules for recognizing the components. The component definitions and rule declarations, as well as the specification of case sensitivity and comment patterns, are performed on lines 4 through 10 of `building a simple grammar`_ for the LispG grammar. Declaring Case Sensitivity and Comments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are two nuances to parsing not yet mentioned: case sensitivity and comments. Some grammars are not case sensitive in recognizing keywords or identifiers. For example ANSI standard SQL (which is not case sensitive for keywords or identifiers) recognizes Select, select, SELECT, and SeLect all as the keyword SELECT. To specify the case sensitivity of the grammar for keywords only use:: GRAMMAROBJECT.SetCaseSensitivity(TrueOrFalse) where TrueOrFalse is 0 for no case sensitivity or 1 for case sensitivity. This must be done before any keyword declarations for the grammar. All other syntax declarations may be done in any order before the compilation of the grammar object. In `building a simple grammar`_ the LispG grammar object is declared to be case insensitive on line 4. Comments are patterns in the input string which are ignored (or more precisely interpreted as white space) by the language. To declare a sequence of regular expressions to be interpreted as a comment in a grammar use:: GRAMMAROBJECT.comments(LIST_OF_COMMENT_REGULAR_EXPR_STRINGS) For example, line 9 or `building a simple grammar`_ declares the constant string previously declared as:: LISPCOMMENTREGEX = ";.*" to represent a comment of the grammar LispG. For the syntax of regular expression strings you must look elsewhere, but as a hint ";.*" represents any string commencing with a semicolon, followed by any sequence of characters up to, but not including, a newline. Declaring Keywords, Punctuations, and Terminals ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To declare keywords for your grammar use:: GRAMMAROBJECT.Keywords( STRING ) where STRING is a white space separated string of keywords. Line 6 of `building a simple grammar`_ declares setq and print as keywords of LispG. To declare nonterminals for your grammar, similarly, use:: GRAMMAROBJECT.Nonterms( STRING ) where STRING is a white space separated string of nonterminal names. Line 8 of `building a simple grammar`_ declares Value and ListTail as nonterminals of the LispG. Similarly, use:: GRAMMAROBJECT.punct( STRING ) to declare a sequence of punctuations for the grammar, except that in this case the string must not contain any white space. Line 7 of `building a simple grammar`_ declares parentheses and dot to be punctuations of the LispG. If you have a lot of keywords, punctuations, or nonterminals you can make many separate calls to the appropriate declaration methods with different strings. These declarations will cause the grammar to recognize the declared keyword strings (when separated from other strings by white space or punctuations) and punctuations as special tokens of the grammar at the lowest level of parsing. The parsing process derives nonterminals of the grammar at a higher level as discussed below. A small difficulty with kjParseBuild is that the strings ``@R``, ``::``, ``>>``, and ``##`` cannot be used as names of keywords for the grammar because they are used to specify rule syntax in the "metagrammar". If you need these in your grammar they may be implemented as "trivial" terminals. For example:: Grammar.Addterm("poundpound", "##", echo) I'm unsure whether this patch is good enough. Does anyone have any advice for me? If this is a bad problem for some grammar the keywords of the meta grammar can be changed of course, but this is a hack. Declaring Terminals Defining the terminals of a grammar:: # from DLispShort.py def DeclareTerminals(Grammar): Grammar.Addterm("int", INTREGEX, intInterp) Grammar.Addterm("str", STRREGEX, stripQuotes) Grammar.Addterm("var", VARREGEX, echo) This shows the declarations for installing the int, str, and var terminals in the grammar. This is given as a separate function because the declarations define both the syntax and semantics for the terminals, and therefore must be called both during grammar generation and after loading the generated grammar object. To declare a terminal for a grammar use:: GRAMMAROBJECT.Addterm(NAMESTR, REGEXSTR, FUNCTION) This declaration associates both a regular expression string REGEXSTR and an interpretation function FUNCTION to the terminal of the grammar named by the string NAMESTR. The FUNCTION defines the semantics of the terminal as describe below and the REGEXSTR specifies a regular expression for recognizing the string. For example on line 2 of Figure TermDef the var terminal is associated with the regular expression string:: STRREGEX = '"[^\n"]*"' which matches any string starting with double quotes and ending with double quotes which contains neither double quotes nor a newline. Declaring Rules of the Grammar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _`grammar string`: A grammar definition string:: # from DLispShort.py GRAMMARSTRING =""" Value :: ## indicates Value is the root nonterminal for the grammar @R SetqRule :: Value >> ( setq var Value ) @R ListRule :: Value >> ( ListTail @R TailFull :: ListTail >> Value ListTail @R TailEmpty :: ListTail >> ) @R Varrule :: Value >> var @R Intrule :: Value >> int @R Strrule :: Value >> str @R PrintRule :: Value >> ( print Value ) """ To declare the rules of a grammar use the simple rule definition language which comes with kjParseBuild, for example as shown in Figure GramStr. Line 10 of `building a simple grammar`_ uses the string defined above to associate the rules with the grammar using:: GRAMMAROBJECT.DeclareRules(RULE_DEFINITION_STRING) This declaration does not analyse the string; analysis and syntax/semantics errors are reported by ``*.Compile()`` described below. The rule definition language allows you to identify the root nonterminal of your grammar and specify a sequence of named derivation rules for the grammar. It also allows comments which start with ``##`` and end with a newline. An acceptible string for the rule definition language looks like:: RootNonterminalName :: NamedRule1 NamedRule2 ... Here the Root nonterminal name should be the nonterminal that "stands for" any complete string of the language. Furthermore, each named rule looks like:: @R NameString :: GoalNonterm >> RuleBody where the name string for the rule is a string without whitespace, the goal nonterminal is the nonterminal that the rule derives, and the rule body is a sequence of keywords, punctuations and nonterminals separated by white space. Rule names are used for mapping rules to semantic interpretations and should be unique. Note that punctuations for the grammar you are defining are not punctuations for the rule definition language (which has none), so they must be separated from other tokens by white space. The keyword for the rule definition language ``@R``, ``::``, ``>>`` must also be separated from other tokens by whitespace in the rule definition string. Furthermore, all punctuations, keywords, nonterminals, and terminals used in the rules must be declared for the grammar before the grammar is compiled (if one isn't the compilation will fail with an error). As a bit of sugar you may break up the declarations of rules:: LispG.DeclareRules("Value::\n") LispG.DeclareRules(" @R SetqRule :: Value >> ( setq var Value )\n") LispG.DeclareRules(" @R ListRule :: Value >> ( ListTail\n") ... This might be useful for larger grammars. A Brief Discussion of Derivations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The rules for a grammar don't really describe how to parse a string of the language, they actually describe how to derive a string of the grammar. For this reason it is possible to create a grammar which derives the same string in two different ways; such grammars are termed ambiguous. If you try to generate a parser for an ambiguous grammar the parse generation process will cause the parser generation process to complain. For a more precise definition of the derivation of a language string from a grammar see the "further readings" below. For illustrative purposes, and to help explain how to define semantics functions, consider the following derivation of the string:: ( 123 ( setq x "this" ) ) using the rules declared above (`grammar string`_): +------------------------------------+------------+ | Derivation | Rule used | +====================================+============+ | Value1 >> ( ListTail1 | ListRule | +------------------------------------+------------+ | ListTail1 >> Value2 ListTail2 | TailFull | +------------------------------------+------------+ | Value2 >> [int = 123] | Intrule | +------------------------------------+------------+ | ListTail2 >> Value3 ListTail3 | TailFull | +------------------------------------+------------+ | Value3 >> (setq [var='x'] Value4) | SetqRule | +------------------------------------+------------+ | Value4 >> [string='this'] | StrRule | +------------------------------------+------------+ | ListTail3 >> ) | TailEmpty | +------------------------------------+------------+ To obtain the string derived we simply substitute the representations derived for each of the numbered nonterminals and terminals of the derivation. So the right-to-left derivation steps for (123 (setq x "this")) are: +-----+------------------------------------+-------------+ | (1) | Value1 | | +-----+------------------------------------+-------------+ | (2) | ( ListTail1 | (ListRule) | +-----+------------------------------------+-------------+ | (3) | ( Value2 ListTail2 | (TailFull) | +-----+------------------------------------+-------------+ | (4) | ( 123 ListTail2 | (Intrule) | +-----+------------------------------------+-------------+ | (5) | ( 123 Value3 ListTail3 | (TailFull) | +-----+------------------------------------+-------------+ | (6) | ( 123 ( setq x Value4 ) ListTail3 | (SetqRule) | +-----+------------------------------------+-------------+ | (7) | ( 123 ( setq x "this" ) ListTail3 | (StrRule) | +-----+------------------------------------+-------------+ | (8) | ( 123 ( setq x "this" ) ) | (TailEmpty) | +-----+------------------------------------+-------------+ Compiling the Grammar Syntax, and Storing the Compilation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Once you have defined all the keywords, comments, terminals, nonterminals, punctuations, and rules of your grammer you may create the datastructures needed for parsing by compiling the grammar using:: GRAMMAROBJECT.Compile() Line 11 of `building a simple grammar`_ performs the compilation for the LispG grammar. If the compilation succeeds you may use:: GRAMMAROBJECT.MarshalDump( OUTPUTFILE ) to store the compiled grammar structure to a file that may be later loaded without recompiling the grammar. Here MarshalDump will create a binary "marshalled" representation for the grammar in the OUTPUTFILE. For example line 13 of `building a simple grammar`_ marshalls a representation for LispG to the file testlisp_mar.py. TESTLisp.GRAMMAR() will then reconstruct the internal structure of LispG as a grammar object and return the grammar object as the result of the function. Nevertheless, compilation of the grammar by itself does not yeild a grammar that will do any useful parsing [Actually, it will do "parsing" using default actions (implemented as a function which simply return the list argument).] Rules must be associated with computational actions before useful parsing can be done. Defining a Semantics ~~~~~~~~~~~~~~~~~~~~ Two sorts of objects require semantic actions that define their meaning: rules and terminals. All semantic actions must be defined as Python functions and bound in the grammar before parsing can be performed. Before you can define the semantics of your language in Python you better have a pretty good idea of what the components of the language are supposed to represent, of course. Using your intuitive understanding of the language you can: Decide what the context of the computation should be and how it should be implemented as a Python structure. If the process of Parsing must modify the context, then then the context structure must be a "mutable" python structure. In the case of LispG the context is simply a structure that maps "internal" variable names to values, implemented as a simple Python dictionary mapping name strings to the appropriate value. Decide what kind of Python value each terminal of the grammar represents. In the case of LispG str should represent a string value corresponding to the string recognized (minus the surrounding quotes). int should represent an integer value corresponding to the string recognized. var should represent the string representing the variable name recognized (the name must be translated to a corresponding value at a higher level since the terminal interpretation functions don't have access to the context structure). Decide what kind of Python structure or value each nonterminal represents. In the case of the LispG grammar: Value represents a Python integer, string, or list. ListTail represents a Python list containing the members of the tail of a list. Decide how each rule should derive a structure corresponding to the Goal (left hand side) of the rule based on the values corresponding to the terminals and nonterminals on the right hand side of the rule. In the case of the LispG grammar (refer to Figure GramStr for rule definitions): SetqRule should return whatever the Value terminal in the body represents. ListRule should return the list represented by the ListTail nonterminal of the body. TailFull should return the result of adding the value corresponding to the Value nonterminal of the list to the front of the list corresponding to the Listtail nonterminal of the body. Varrule should return the value from the computational context that corresponds to the variable name represented by the var terminal of the body. Intrule should return the integer corresponding to the int terminal of the body. Strrule should return the string corresponding to the str terminal of the body. PrintRule should return the value represented by the Value nonterminal of the body. Decide what side effects, if any, each rule should have on the computational context or externally. In the case of the LispG grammar: SetqRule should associate the variable name represented by var to the value represented by Value in the body. PrintRule should print the value corresponding to the Value nonterminal to the screen. The other rules of LispG should have no internal or external side effects. More complex languages may require much more complex contexts, values and side effects, including function definitions, modules, database table accesses, user authorization verifications, and/or file creation, among other possibilities. Having determined the intuitive semantics of the language you may now specify implement the semantic functions and bind them in your grammar. Semantics for Terminals ~~~~~~~~~~~~~~~~~~~~~~~ To define the meaning of a terminal you must create a Python function that translates a string (which the parser has recognized as an instance of the terminal) into an appropriate value. For instance, when the LispG grammar recognizes a string:: "this is a string" the interpretation function should translate the recognized string into the Python string it represents: namely, the same string but with the double quotes stripped off. The following "string intepretation function" will perform this simple interpretation. So:: # from DLispShort.py def stripQuotes( str ): return str[1:len(str)-1] Similarly, when the parser recognizes a string as an integer, the associated interpretation function should translate the string into a Python integer. The binding of interpretation functions to terminal names is performed by the Addterm method previously mentioned. For example, line 2 of Figure TermDef associates the stripQuotes function to the nonterminal named str. All functions passed to Addterm should take a single string argument which represents the recognized string, and return a value which represents the semantic interpretation for the input string. Semantics for Rules ~~~~~~~~~~~~~~~~~~~ The semantics of rules is more interesting since they may have side effects and require the kind of recursive thinking that gives most people headaches. The semantics for rules are specified by functions. To perform the semantic action associated with a rule, the "reduction function" should perform any side effects (to the computational context or externally) and return a result value that represents the interpretation for the nonterminal at the head of the rule. The reduction functions for the rules:: # from DLispShort.py def EchoValue( list, Context ): return list[0] def VarValue( list, Context ): varName = list[0] if Context.has_key(varName): return Context[varName] else: raise NameError, "no such lisp variable in context "+varName def NilTail( list, Context ): return [] def AddToList( list, Context ): return [ list[0] ] + list[1] def MakeList( list, Context ): return list[1] def DoSetq( list, Context): Context[ list[2] ] = list[3] return list[3] def DoPrint( list, Context ): print list[2] return list[2] Binding named rules to interpretation functions:: # from DLispShort.py def BindRules(LispG): LispG.Bind( "Intrule", EchoValue ) LispG.Bind( "Strrule", EchoValue ) LispG.Bind( "Varrule", VarValue ) LispG.Bind( "TailEmpty", NilTail ) LispG.Bind( "TailFull", AddToList ) LispG.Bind( "ListRule", MakeList ) LispG.Bind( "SetqRule", DoSetq ) LispG.Bind( "PrintRule", DoPrint ) The Python functions that define the semantics of the rules of LispG appear above and the declarations that bind the rule names to the functions in the grammar object LispG appear in Figure ruleBind. Each "reduction function" for a rule must take two arguments: a list representing the body of the rule and a context structure which represents the computational context of the computation. The list argument will have the same length as the body of the rule, counting the keywords and punctuations as well as the terminals and nonterminals. For example the SetqRule has a body with five tokens:: @R SetqRule :: Value >> ( setq var Value ) so the DoSetq function should expect the parser to deliver a Python list argument with five elements of form:: list = [ '(', 'SETQ', VARIABLE_NAME, VALUE_RESULT, ')' ] note that the "names" of keywords and punctuations appear in the appropriate positions (0, 1, and 4) of the list corresponding to their positions in SetqRule. Furthermore, the position occupied by the terminal var in SetqRule has been replaced by a string representing a variable name in the list and the position occupied by the nonterminal Value in SetqRule has been replaced by a Python value. More generally, the parser will call reduction functions for rules with a list representing the "interpreted body of the rule" where keywords and punctuations are interpreted as themselves (i.e., their names), except that letters will be in upper case if the grammar is not case sensitive; terminals are interpreted as values previously returned by a call to the appropriate terminal interpretation function; and nonterminals are interpreted as values previously returned by a reduction function for a rule that derived this terminal. Although, the occurrence of the keyword names in the list may seem useless, it may have its purposes. For example, a careful programmer might check them during debugging to make sure the right function was bound to the right rule. To determine how to implement the semantics of a rule you must refer to the semantic decisions you made earlier. For example, above we specified that the setq construct should bind the variable name recieved ( list[2]) to the value ( list[3]) in the Context, and return the value ( list[3]) as the result of the expression. Translated into the more concise language of Python this is exactly what DoSetq shown in Figure RedFun does. To bind a rule name to a (previously declared) reduction function use:: GRAMMAROBJECT.Bind( RULENAME, FUNCTION ) where RULENAME is the string name for the rule previously declared for the grammar GRAMMAROBJECT and FUNCTION is the appropriate reduction function for the rule. These bindings for LispG are shown in Figure ruleBind. A Bit on the Parsing Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following is not a precise definition of the actions of a Parser, but it may help you understand how the parsing process works and the order in which rules are recognized and functions are evaluated. Parsing ``(123 (setq x "this"))``: +-+---------------------------+----------------------+----------------------+ | |Tokens seen S | input remaining | rule R and | | | | | function call | +=+===========================+======================+======================+ |0| |(123 (setq x "this")) | | +-+---------------------------+----------------------+----------------------+ |1| ( 123 |(setq x "this")) |Intrule | +-+---------------------------+----------------------+----------------------+ | | | |Value2 = | | | | |EchoValue([123],C)) | +-+---------------------------+----------------------+----------------------+ |2|( Value2 ( setq x "this" |)) | StrRule | +-+---------------------------+----------------------+----------------------+ | | | |Value4 = | | | | |EchoValue(['this'],C) | +-+---------------------------+----------------------+----------------------+ |3|( Value2 ( setq x Value4 ) |) |SetqRule | +-+---------------------------+----------------------+----------------------+ | | | |Value3 = DoSetq(['(', | | | | |'SETQ','x',Value4,')']| | | | |,C) | +-+---------------------------+----------------------+----------------------+ |4|( Value2 Value3 ) | |TailEmpty | +-+---------------------------+----------------------+----------------------+ | | | |ListTail3 = | | | | |NilTail([')'],C) | +-+---------------------------+----------------------+----------------------+ |5|( Value2 Value3 ListTail3 | |TailFull | +-+---------------------------+----------------------+----------------------+ | | | |ListTail2 = | | | | |AddToList([Value3, | | | | |ListTail3],C) | +-+---------------------------+----------------------+----------------------+ |6|( Value2 ListTail2 | |TailFull | +-+---------------------------+----------------------+----------------------+ | | | |ListTail3 = | | | | |AddToList([Value2, | | | | |ListTail2],C) | +-+---------------------------+----------------------+----------------------+ |7|( ListTail3 | |ListRule | +-+---------------------------+----------------------+----------------------+ | | | |Value1 = | | | | |MakeList(['(',Value1],| | | | |C) | +-+---------------------------+----------------------+----------------------+ |8|Value1 | | | +-+---------------------------+----------------------+----------------------+ Technically, each entry of S is tagged with the kind of token it represents (keyword, nonterminal, or terminal) and the name of the token it represents (e.g., Value, str) as well as the values shown. The table above illustrates the sequence of reduction actions performed by LispG when parsing the input string (123 (setq x "this")). We can think of this parse as "reversing" the derivation process shown in Figure Derive using the rule reduction functions to obtain semantic interpretations for the nonterminals. At the lowest level of parsing a lexical analyser examines the unread portion of the input string tries to match a prefix of the input string with a keyword or a regular expression for a terminal (ignoring comments and whitespace, except as separators). The analyser "passes" the recognized token to the higher level parser together with its interpreted value. The interpreted value of a terminal is determined by using the appropriate interpretation function and the interpreted value of a keyword is simply its name (in upper case, if the grammer is not case sensitive). For example the LispG lexical analyser recognizes '(' as a keyword with the value '(' and "this" as an instance of the nonterminal str with the value 'this'. The higher level parser accepts tokens T from the lexical analyser and does one of two things with them If the most recent token values V the parser has saved on its "tokens seen" stack S "looks like" the body B of a rule R and the current token is a token that could follow the nonterminal N at the head of R, then the parser evaluates the reduction function F associated with R, using the values V from the stack S that match the body B together with the computational context C. The resulting value F(V,C) replaces the values V Otherwise the current token is shifted onto the "tokens seen" stack S and the parser moves on to the next token. The above is a lie. Actually, the parsing process is much smarter than this, but from a users perspective this simplification may be helpful. Figure Parse shows "reduction" steps and not the "shifts", and glosses over the lexical analysis and other nuances, but it illustrates the idea of the parsing process nonetheless. For example at step 2 the parse recognizes the last token on the stack S (an instance of the "str" terminal with value "this") as matching the body of StrRule, and replaces it with the an instance of the nonterminal Value with value determined by the reduction of StrRule. In this case StrRule is associated with the reduction function EchoValue, so the result of the reduction is given by EchoValue( 'this', C ) where C is the context structure for the Parse. At Step 3 the most recent entries of S:: V = ['(', 'SETQ', 'x', Value4, ')'] match the body of the rule SetqRule, so they are replaced on S by an instance of the Value nonterminal with value determined by:: Value3 = DoSet( V, C ) Finally, at step 8, the interpretation associated with Value1 (an instance of the root nonterminal for LispG) is considered the result of the computation. Parsing with a Grammar ~~~~~~~~~~~~~~~~~~~~~~ Before you can perform a parse you probably must create a computational context for the parse. In the case of LispG the context is simply a dictionary so we may initialize:: Context = {} To create a context for Parsing. There are two methods which provide the primary interfaces for the parsing process for a grammar:: RESULT = GRAMMAROBJECT.Parse1(STRING, CONTEXT) (RESULT, CONTEXT) = GRAMMAROBJECT.Parse(STRING, CONTEXT) The second allows you to make explicit in code that uses parsing the possibility that a parse may alter the context of the parse -- aside from that the two functions are identical. Example usage for Parse1 using LispG were given earlier. Storing and Reloading a Grammar Object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The process of compiling a grammar may take significant time and consume significant quantities of memory. To free up memory from structures in a compilable grammar object that aren't needed after compilation use GRAMMAR.CleanUp(). Once you have debugged the syntax and semantics of your grammar you may store syntactic information for the grammar using the Reconstruct method already mentioned. The declarations created by Reconstruct only defines the syntax for the grammar. The semantics must be rebound separately. But it is much better to use UnMarshalGram as shown below, which stores the grammar in a binary format. For example, line 13 of `building a simple grammar`_ creates a file testlisp_mar.py containing a function GRAMMAR() which will reconstruct the syntax for the LispG grammar:: # from DLispShort.py def unMarshalLispG(): import kjParser LispG = kjParser.UnMarshalGram('testlisp_mar') DeclareTerminals(LispG) BindRules(LispG) return LispG This function can then be used in another file, provided GrammarBuild() given in `building a simple grammar`_ has been executed at some point in the past, thusly:: import DLispShort LGrammar = DLispShort.unMarshalLispG() Errors raised ~~~~~~~~~~~~~ You may see the following errors: LexTokenError This usually means the lowest level of the parser ran into a string it couldn't recognize. BadPunctError You tried to make a whitespace character a punctuation. This is not currently allowed. EOFError, SyntaxError You tried to parse a string that is not valid for the grammar. TokenError During parser generation you used a string in the rule definitions that wasn't previously registered as a terminal, nonterminal, or punctuation. NotSLRError You attempted to build a grammar that is not "SLR" according to the definition of Aho and Ullman. Either the grammar is ambiguous, or it doesn't have a derivation for the root nonterminal, or it is too tricky for the generator. Furthermore NondetError, ReductError, FlowError, ParseInitError, UnkTermError or errors raised by other modules shouldn't happen. If an error that shouldn't happen happens there are two possibilities (1) you have fiddled with the code or data structures and you broke something, or (2) there is a serious bug in the module. Possible Gotchas ~~~~~~~~~~~~~~~~ This package has a number of known deficiencies, and there are probably many that are yet to be discovered. Syntax errors are not reported nicely. Sorry. Currently, there is no way to to resolve grammar ambiguities. For example a C construct:: if (x) if (y) x = 0; else y = 1; could have the else associated with either the first or second if; the grammar doesn't indicate which. This is normally resolved by informing the parser generator to prefer one binding or the other. No method for providing a preference is implemented here, yet. Let me know if you need such a method or if you have any suggestions. Keywords of the meta-grammar cannot name tokens of the object grammar (see footnote above). If you want keywords to be recognized without case sensitivity you must declare G.SetCaseSensitivity(0) before any keyword declarations. Name and regular expression collisions are not always checked and reported. If you name two rules the same, for example, you may get undefined behavior. The lexical analysis implementation is not as fast as it could be (of course). It also sees all white space as a "single space" so, for example, if indentation is significant in your grammar (as in Python) you'll need a different lexical analyzer. Also if x=+y means something different from x = + y (as it did in the original C, I believe) you may have trouble. Happily the lexical component can be easily "plug replaced" by another implementation if needed. Also, the system currently only handles SLR grammars (as defined by Aho and Ullman), as mentioned above. If you get a NonSLRError during grammar compilation you need a better parser generator. I may provide one, if I have motivation and time. I know of no outright bugs. Trust me, they're there. Please find them for me and tell me about them. I'm not a big expert on parsing so I'm sure I've made some errors, particularly at the lexical level. Further Reading ~~~~~~~~~~~~~~~ A standard reference for parsing and compiler, interpreter, and translator implementation is Principles of Compiler Design, by Aho and Ullman (Addison Wesley). gadfly-1.0.0/doc/network.txt0100644000157700012320000002443307467601133015002 0ustar rjonestechGadfly Server Operations ======================== :Version: $Revision: 1.2 $ To permit multiple processes to access and modify a single database instance, and to reduce the overhead per process of connecting to a Gadfly database a Gadfly database may be run in server mode. A Gadfly server can use a DBA (data base administrator) configured start-up script to set up optimized query accesses and certain forms of security. For example to startup a server for the test database "test" in directory "_test_dir" (created by ./run_tests) use:: gfserve 2222 test _test_dir admin or to start up the same server with some non-priviledged policies and some named prepared queries (as initialized in test/gfstest.startup(...)) use:: gfserve 2222 test _test_dir admin gfstest In both cases the admin password for the server is "admin" and the server runs on port 2222. See the doc string for gadfly.server for more information on the command line arguments. Only one process should directly access a gadfly database at once (not mediated by a server), so if a server is running, no other server for that database should be started and no other process should connect in "non-server" mode to that database. Motivation ---------- There are several reasons to run a server: to allow multiple processes to access the same database; to allow password protected restricted access to the database by non-priviledged agents; and to permit faster access to the database by providing globally shared prepared statements. Using a server also eliminates the need to start up and load the database many times -- and startup time could be considerable if the database is large. For example I imagine that simple Gadfly servers may be of use to implement database enabled CGI scripts, whereas the "non-server" Gadfly will only run with CGI scripts that do not modify the database, and the startup time for Gadfly might make those scripts unacceptibly slow if the database is large. Furthermore, by using the security features a Gadfly server could be configured to allow restricted data distribution across a network without compromising the integrity of the database. Security -------- The primary goal of Gadfly server security is to prevent accidental or malicious destruction of a database. Security is arbitrated by policies. Policies have passwords that are never transmitted in clear text. However, a "captured" command could potentially be repeated by a hostile program even without knowing the password. It is not a good idea to run admin or other unrestricted commands on a network that may have hostile parties sniffing the network. As with the rest of the system I provide no guarantees, but for many purposes the level of security provided may be acceptible. To be specific passwords are used to generate md5 certificates for all server accesses (please see gadfly.gfsocket for implementation details). A server always has an "admin" policy that is permitted to shutdown, restart, or force a checkpoint on the server. By default the admin policy also has the ability to run arbitrary SQL statements such as "drop table x". This ability can be disabled in a startup function if needed:: admin_policy.general_queries=0 Other policies can be created that have very restricted access. For example the following startup function initializes two policies beyond the admin policy that can only access certain tables in specific ways (from test/gfstest.py):: def startup(admin_policy, connection, Server_instance): """example startup script. add a policies test and test1 passwords same test1 is allowed to query the likess table by name test is allowed to update likes where drinker='nan' also add prepared query dumpwork to admin_policy. """ from gadfly.server import Policy admin_policy["dumpwork"] = "select * from work" test1 = Policy("test1", "test1", connection, queries=0) test = Policy("test", "test", connection, queries=0) test1["qlike"] = "select * from likes where drinker=?" test["updatenan"] = """ update likes set beer=?, perday=? where drinker='nan' """ test["getnan"] = """ select * from likes where drinker='nan' """ return {"test": test, "test1": test1} Please see the doc string for gadfly.server for more information on creating startup functions. A policy with queries disabled (queries=0) can only execute named queries. By using such policies a DBA can configure a server such that client programs can only read certain tables, can only update certain rows of certain tables in certain ways, and so forth. Even policies with "unrestricted access" (queries=1) can provide performance benefits if they have associated named, prepared queries (like "dumpwork" above). At the moment the SQL parser slows down gadfly a bit, and prepared queries will only be parsed once for all clients. After the first access subsequent accesses may be noticably faster (10x faster in some cases), especially if the server has the kjbuckets builtin C module. However, with queries=1 the policy can execute any SQL statement. NOTE: The server runs all accesses (once the complete message has been read from the network) serially -- there is no concurrent access permitted to a Gadfly instance at this time. For this reason a "large query" may cause the server to "freeze" and delay other accesses. Incomplete requests due to network delays or other problems will not freeze the server, however (sockets are polled using select.select). NOTE: All server accesses run in "autocommit mode" at this time. A successful access automatically triggers a database commit (but an unsuccessful access will rollback). As an optimization, however, checkpoints only occur occasionally, once per a given number of accesses, configurable by setting:: Server_instance.check_loop = 100 Start up -------- Servers can be started from the command line using the gadfly.server script interpretation (as shown above) or using gadfly.server.Server(...) from another program. See the doc strings and source for gadfly.server and test/gfstest.py for more information. Shut down --------- NOTE: this is current inaccurate - gfserver needs to be extended to provide this functionality Servers can be shut down from the command line interpretation of gfclient or from another program using the gadfly.client.gfclient(...) class shutdown() method, but only using the admin policy with the admin password. For example to shut down the server started above:: gfclient shutdown 2222 admin See the doc strings and source for gadfly.server and test/gfstest.py for more information. Client Access ------------- Client access to a gadfly server is similar to the normal Python DB-SIG DBAPI access to gadfly, except that it is sometimes faster and can potentially be run from any machine reachable on the network (if the client program knows the password). To access a gadfly server from a remote machine the only python modules required (in addition to the standard libraries) are gadfly.client and gadfly.gfsocket. Initialize a connection with a given "POLICY" with "PASSWORD" to a running server on "machine.domain.com" using port number 2222 with: from gadfly.client import gfclient:: conn = gfclient("POLICY", 2222, "PASSWORD", "machine.domain.com") Note that policy names and passwords are case sensitive. Queries and other statements are normally executed via cursors. Obtain a cursor from a connection using:: cursor = connection.cursor() Execute a statement in a cursor using:: cursor.execute(statement) or to provide dynamic parameters:: cursor.execute(statement, dynamic_parameters) For example:: cursor.execute("select * from work") ... cursor.execute("select * from work where name=?", ("carla",)) The dynamic parameters work the same as described in the the main gadfly documentation page. In particular INSERT VALUES can insert several rows at once by using a list of tuples for the rows. If there is any problem (bad policy name, bad password, server not running, queries not allowed for this policy) the execute will generate an exception. To run a named/prepared query (initialized at startup) use execute_prepared, which takes a prepared statement name rather than a query string:: cursor.execute_prepared("updatenan", ("rollingrock", 1)) ... cursor.execute_prepared("getnan") The execute_prepared method works just like the execute method except that the "name" must be the name of a query initialized by the startup(...) function at server startup. NOTE: by default any execution that sends or recieves "too much data" will be aborted. Edit gadfly.gfsocket (both on the client end and on the server end if different) if you wish to disable this sanity check feature:: LEN_LIMIT=10e8 As with other dbapi cursors the results of a query can be extracted as a list of tuples using (after execute):: result_list = cursor.fetchall() The other fetches (fetchone and fetchmany) have not been implemented yet (partially since they don't make much sense in this context). Both named and unnamed statements may be semicolon separated sequences of several SQL statements, but if they are they will return no results. Interactive Client Access ------------------------- The gfplus command provides interactive access to a Gadfly server. Implementation Comments ----------------------- For your information the server/client interaction is much like "finger" or "http" -- each client access is a separate TCP/Stream connection where the client sends a request and the server sends a response. After each access the connection is closed and the next access generates a new connection. I did it that way, because it was a simple and robust strategy (witness the success of HTTP). Please note: Although I have attempted to provide a robust implementation for this software I do not guarantee its correctness. I hope it will work well for you but I do not assume any legal responsibility for problems anyone may have during use of these programs. gadfly-1.0.0/doc/recover.txt0100644000157700012320000001370307465430476014764 0ustar rjonestechGadfly Recovery =============== :Version: $Revision: 1.1.1.1 $ In the event of a software glitch or crash Gadfly may terminate without having stored committed updates. A recovery strategy attempts to make sure that the unapplied commited updates are applied when the database restarts. It is always assumed that there is only one primary (server) process controlling the database (possibly with multiple clients). Gadfly uses a simple LOG with deferred updates recovery mechanism. Recovery should be possible in the presence of non-disk failures (server crash, system crash). Recovery after a disk crash is not available for Gadfly as yet, sorry. Due to portability problems Gadfly does not prevent multiple processes from "controlling" the database at once. For read only access multiple instances are not a problem, but for access with modification, the processes may collide and corrupt the database. For a read-write database, make sure only one (server) process controls the database at any given time. The only concurrency control mechanism that provides serializability for Gadfly as yet is the trivial one -- the server serves all clients serially. This will likely change for some variant of the system at some point. This section explains the basic recovery mechanism. Normal operation ---------------- Precommit ~~~~~~~~~ During normal operations any active tables are in memory in the process. Uncommitted updates for a transaction are kept in "shadow tables" until the transaction commits using:: connection.commit() The shadow tables remember the mutations that have been applied to them. The permanent table copies are only modified after commit time. A commit commits all updates for all cursors for the connection. Unless the autocommit feature is disabled (see below) a commit normally always triggers a checkpoint too. A rollback:: connection.rollback() explicitly discards all uncommitted updates and restores the connection to the previously committed state. There is a 3rd level of shadowing for statement sequences executed by a cursor. In particular the design attempts to make sure that if:: cursor.execute(statement) fails with an error, then the shadow database will contain no updates from the partially executed statement (which may be a sequence of statements) but will reflect other completed updates that may have not been committed. Commit ~~~~~~ At commit, operations applied to shadow tables are written out in order of application to a log file before being permanently applied to the active database. Finally a commit record is written to the log and the log is flushed. At this point the transaction is considered committed and recoverable, and a new transaction begins. Finally the values of the shadow tables replace the values of the permanent tables in the active database, (but not in the database disk files until checkpoint, if autocheckpoint is disabled). Checkpoint ~~~~~~~~~~ A checkpoint operation brings the persistent copies of the tables on disk in sync with the in-memory copies in the active database. Checkpoints occur at server shut down or periodically during server operation. The checkpoint operation runs in isolation (with no database access allowed during checkpoint). Note: database connections normally run a checkpoint after every commit, unless you set:: connection.autocheckpoint = 0 which asks that checkpoints be done explicitly by the program using:: connection.commit() # if appropriate connection.checkpoint() Explicit checkpoints should make the database perform better, since the disk files are written less frequently, but in order to prevent unneeded (possibly time consuming) recovery operations after a database is shutdown and restarted it is important to always execute an explicit checkpoint at server shutdown, and periodically during long server runs. Note that if any outstanding operations are uncommitted at the time of a checkpoint (when autocheckpoint is disabled) the updates will be lost (ie, it is equivalent to a rollback). At checkpoint the old persistent value of each table that has been updated since the last checkpoint is copied to a back up file, and the currently active value is written to the permanent table file. Finally if the data definitions have changed the old definitions are stored to a backup file and the new definitions are written to the permanent data definition file. To signal successful checkpoint the log file is then deleted. At this point (after log deletion) the database is considered quiescent (no recovery required). Finally all back up table files are deleted. [Note, it might be good to keep old logs around... Comments?] Each table file representation is annotated with a checksum, so the recovery system can check that the file was stored correctly. Recovery -------- When a database restarts it automatically determines whether the last active instance shut down normally and whether recovery is required. Gadfly discovers the need for recovery by detecting a non-empty current log file. To recover the system Gadfly first scans the log file to determine committed transactions. Then Gadfly rescans the log file applying the operations of committed transactions to the in memory table values in the order recorded. When reading in table values for the purpose of recovery Gadfly looks for a backup file for the table first. If the backup is not corrupt, its value is used, otherwise the permanent table file is used. After recovery Gadfly runs a normal checkpoint before resuming normal operation. Please note: Although I have attempted to provide a robust implementation for this software I do not guarantee its correctness. I hope it will work well for you but I do not assume any legal responsibility for problems anyone may have during use of these programs. gadfly-1.0.0/doc/sql.txt0100644000157700012320000002162107473327706014114 0ustar rjonestechGadfly SQL constructs ===================== :Version: $Revision: 1.3 $ This document describes SQL constructs supported by Gadfly. The presentation does not define the complete syntax -- see sqlgram.py for the precise syntax as BNF -- nor the complete semantics -- see a good book on SQL for more detailed coverage of semantic (or use the source, Luke ;c) ). Also, please have a look at my evolving database course notes for more coverage of SQL. Examples of all supported constructs are also shown in the test suite source file gftest.py. This document is only a very brief guide, primarily of use to those who already understand something about SQL -- it is neither a tutorial nor a detailed discussion of syntax and semantics. .. contents:: The Standard, with omissions ---------------------------- Gadfly supports a large subset of ODBC 2.0 SQL. The reason ODBC 2.0 is chosen is because it provides a fairly strong set of constructs, but does not include some of the more obscure features of other SQL standards which would be extremely difficult and complex to implement correctly (and perhaps, not used very frequently (?)). Supported features include views, groupings, aggregates, subquery expressions, quantified subquery comparisons, EXISTS, IN, UNION, EXCEPT, INTERSECT, searched mutations and Indices, among others (see below). Some important omissions from ODBC 2.0 at this point are - Nulls. - Outer joins. - CHECK conditions. - Enforced data type constraints. - Alter table (can't implement until NULLs arrive). - Date, Time, and Interval data types It is hoped these will be implemented at some future time. Less important omissions include Cursor based updates and deletes (justification: if you really need them the db design is flawed, and it's possible to use python instead). LIKE string predicate (justification: use Python regexes in python code). Users and permissions (justification: Gadfly is not intended for full multiuser use at this time). These may or may not be implemented at some future time. Statements ---------- All interaction with SQL databases is mediated by SQL statements, or statement sequences. Statement sequences are statements separated by semicolons. SQL keywords and user defined names are not case sensitive (but string values are, of course). SQL statements include the following. Select Statement ~~~~~~~~~~~~~~~~ The select statement derives a table from tables in the database. It's general form is:: sub_query optorder_by Where sub_query is given by:: SELECT alldistinct select_list FROM table_reference_list optwhere optgroup opthaving optunion Read the statement:: SELECT [DISTINCT|ALL] expressions or * FROM tables [WHERE condition] [GROUP BY group-expressions] [HAVING aggregate-condition] [union-clause] [ORDER BY columns] as follows: 1) Make all combinations of rows from the tables (FROM line) 2) Eliminate those combinations not satisfying condition (WHERE line) 3) (if GROUP present) form aggregate groups that match on group-expressions 4) (if HAVING present) eliminate aggregate groups that don't satisfy the aggregate-condition. 5) compute the columns to keep (SELECT line). 6) (if union-clause present) combine (union, except, intersect) the result with the result of another select statement. 7) if DISTINCT, throw out redundant entries. 8) (if ORDER present) order the result by the columns (ascending or descending as specified, with precedence as listed). This reading has little to do with the actual implementation, but the answer produced should match this intuitive reading. Create and drop table ~~~~~~~~~~~~~~~~~~~~~ The create and drop table constructs initialize and destroy a table structure, respectively:: CREATE TABLE user_defined_name ( colelts ) DROP TABLE user_defined_name The colelts declare the names of the columns for the table and their data types. The data types are not checked or enforced in any way at this time. Table mutations (INSERT, UPDATE, DELETE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Insert, Update, and Delete statements insert rows into tables, modify rows in tables in place, or remove rows from tables respectively:: INSERT INTO table_name optcolids insert_spec DELETE FROM user_defined_name optwhere UPDATE user_defined_name SET assns optwhere The insert statement has two variants (in this implementation) INSERT sub-select and INSERT VALUES:: insert into r (a,b,c) select a,b,c from s insert into r (a,b,c) values (1,2,3) The first inserts the result of a SELECT statement into the target table and the other inserts explicit values (which may be dynamic parameters, see below). Cursor based updates are not supported at the SQL level, eg:: update r set a=1 where current of curs is not supported. Indices ~~~~~~~ The create and drop index statements initialize and destroy index structures respectively:: CREATE INDEX user_defined_name ON user_defined_name ( namelist ) DROP INDEX user_defined_name Indices allow fast access to a table, based on values for the indexed columns in the namelist. Indices can be UNIQUE, meaning that the attributes of the index cannot take on the same values in the table twice:: CREATE UNIQUE INDEX user_defined_name ON user_defined_name ( namelist ) Unique indices can be used to enforce primary and secondary key constraints. After a UNIQUE index on a table is created inserts that attempt to insert repeat values for the indexed columns will be rejected. Views ~~~~~ Create view and drop view statements initialize and drop views, respectively:: CREATE VIEW user_defined_name optnamelist AS select_statement DROP VIEW user_defined_name Views are "derived tables" which are defined as stored SELECT statements. They can be used as tables, except that they cannot be directly mutated. It is possible to "implement your own views in Python". Please see remotetest.py, gfintrospect and the FAQ for discussion. Conditions ---------- Conditions are truth valued boolean expressions formed from basic conditions possibly combined using NOT, AND, OR (where NOT has highest precedence and OR has lowest precedence) and parentheses. Basic conditions include simple comparisons:: expression = expression expression < expression expression > expression expression <= expression expression >= expression expression <> expression Variants of the simple comparisons are the quantified subquery comparisons:: expression = ANY ( subquery ) expression = ALL ( subquery ) (and similarly for the other comparison operators). The IN predicate tests membership (like =ANY):: expression IN ( subquery ) expression NOT IN ( subquery ) For all the quantified comparisons and IN the subquery must generate a single column table. Also included are the the BETWEEN and NOT BETWEEN predicates:: expression BETWEEN expression AND expression expression NOT BETWEEN expression AND expression The most general subquery predicate is EXISTS and NOT EXISTS which places no restriction on the subquery:: EXISTS (subquery) NOT EXISTS (subquery) Expressions ----------- Expressions occur in conditions (WHERE, HAVING, etc.), in UPDATE searched assignments, and in the select list of select statements. Expressions are formed from primary expressions, possibly combined using the standard arithmetic operators and parentheses with the normal precedence. Primary expressions include numeric and string literals. Numeric literals supported are the Python numeric literals. String constants are set off by apostrophies, where two apostrophe's in sequence represent an apostrophy in the string: 'SQL string literals ain''t pretty' Column name expressions may be unqualified if they are unambiguous, or may be qualified with a table name or table alias:: bar frequents.bar f.bar The rules for scoping of column names are not covered here. Column names in subqueries may refer to bindings in the query (or queries) that contain the sub-query. Subquery expressions of form:: ( select_statement ) must produce a single column and single row table. Aggregate operations are only permitted in the select list or in the HAVING condition of SELECT statements (including subselects):: COUNT(*) COUNT(expression) AVG(expression) MAX(expression) SUM(expression) MIN(expression) and also including the non-standard extension MEDIAN:: MEDIAN(expression) Aggregate operations can be applied to distinct values as in:: COUNT(DISTINCT expression) The Dynamic expression "?" is a placeholder for a value bound at evaluation time (from Python values). See the gadfly overview_ doc "Dynamic Values" section for more details on the use of dynamic parameters. .. _overview: overview.html gadfly-1.0.0/doc/structure.txt0100644000157700012320000000353107465430476015355 0ustar rjonestechkj stuff ======== kjbuckets0.py kjbuckets in pure python kjParseBuild.py python code for building a parser from a grammar kjParser.py python for parser interpretation kjSet.py sets implemented using mappings Gadfly ====== database.py Main entry point for gadfly sql. store.py storage objects introspection.py View based introspection and extension views bindings.py rule bindings for sql grammar. sql.py grammar generation stuff for sql. This module does not bind any rule semantics, it just generates the parser data structures. grammar.py sql grammar, partial, based on ODBC 2.0 programmer's ref operations.py Database DDL/DML statement semantics semantics.py sql semantics - provides kjbuckets Installation ------------ setup.py Gadfly installation script. Build the sql grammar. sqlwhere.py this module indicates where the sql datastructures are marshalled. Auto generated on install: better not touch! Scripts ------- scripts/gfclient.py client access to gadfly server uses gfsocket scripts/gfserve.py gadfly server uses gadfly, gfsocket Tests ----- test/gfstest.py Test script for gadfly client and server test/gftest.py test script for gadfly test/remotetest.py Demonstration of the Remote View protocol for adding specially implemented Views in an application. test/SQLTESTG.py this was used for debugging null productions test/sqlgtest.py sql grammar tests Demos ===== demo/idl.py IDL parser demo/relalg.py Simple relational algebra interpreter. demo/pygram.py Rules for python based on grammar given in Programming Python by Mark Lutz demo/pylint.py python lint using kwParsing demo/DLispShort.py Grammar generation for lisp lists with strings, ints, vars, print, and setq demo/DumbLispGen.py Test for kjParseBuild module automatic parser generation. Lisp lists with strings, ints, vars, and setq. gadfly-1.0.0/gadfly/0040755000157700012320000000000007512763043013246 5ustar rjonestechgadfly-1.0.0/gadfly/scripts/0040755000157700012320000000000007512763043014735 5ustar rjonestechgadfly-1.0.0/gadfly/scripts/__init__.py0100644000157700012320000000000007467204760017036 0ustar rjonestechgadfly-1.0.0/gadfly/scripts/gfplus.py0100644000157700012320000002207507512763003016606 0ustar rjonestech#!/usr/local/bin/python ''' An Interactive Shell for the Gadfly RDBMS (http://gadfly.sf.net/) Jeff Berliner (jeff@popmail.med.nyu.edu) -- 11/24/1998 (old URL http://shamrock.med.nyu.edu/~jeff/gfplus/) gfplus is a simple interactive shell for Gadfly, based losely on Oracle's SQL*Plus tool. gfplus allows you to type SQL directly, and handles allocating resources, and dealing with output. Thanks to: Aaron Watters! Richard Jones Lars M. Garshol Marc Risney ''' __version__ = '$Revision: 1.6 $' # $Id: gfplus.py,v 1.6 2002/07/10 07:45:07 richard Exp $ import os, sys, string, traceback, time, operator, cmd, re try: import gadfly except ImportError: print 'Unable to load Gadfly. Check your PYTHONPATH and try again.' sys.exit() try: # if the readline module exists, it will provide command line recall import readline # and other editing features to raw_input(). rl = '[readline]' except ImportError: rl = '' ## pp() function by Aaron Watters, posted to gadfly-rdbms@egroups.com 1/18/99 # Thanks Aaron!! def pp(cursor): try: rows = cursor.fetchall() except: return "No description" desc = cursor.description names = [] maxen = [] for d in desc: n = d[0] names.append(n) maxen.append(len(n)) rcols = range(len(desc)) rrows = range(len(rows)) for i in rrows: rows[i] = rowsi = map(str, rows[i]) for j in rcols: maxen[j] = max(maxen[j], len(rowsi[j])) for i in rcols: maxcol = maxen[i] name = names[i] names[i] = name + (" " * (maxcol-len(name))) for j in rrows: val = rows[j][i] rows[j][i] = val + (" " * (maxcol-len(val))) for j in rrows: rows[j] = ' | '.join(rows[j]) names = ' | '.join(names) width = reduce(operator.add, maxen) + 3*len(desc) rows.insert(0, "=" * width) rows.insert(0, names) return '\n'.join(rows, "\n") class GadflyShell(cmd.Cmd): prompt = 'GF> ' prompt2 = '... ' def __init__(self): print '\ngfplus %s -- Interactive gadfly shell %s\n' %(__version__, rl) t = time.localtime(time.time()) print time.strftime("%A %B %d, %Y %I:%M %p", t) print 'Using: ' # flag added for client/server usage. Will be set to 1 if the # arguments passed to gfplus appear to request a server connection self.SERVER = 0 # parse command line if os.environ.has_key('GADFLY_HOME'): # If environment variable exists, use the data stored in it. # Don't prompt. loc, dbase = os.path.split(os.environ.get('GADFLY_HOME')) print 'DB:',dbase print 'Loc:',loc,'\n' elif len(sys.argv) < 2: # no arguments passed dbase = raw_input('DB Name: ') loc = raw_input('DB Location: ') elif len(sys.argv) < 3: # assume db name was passed, ask for location dbase = sys.argv[1] print 'DB Name: %s' % dbase loc = raw_input('DB Location: ') elif len(sys.argv) < 4: # assume all arguments were passed. dbase = sys.argv[1] loc = sys.argv[2] print 'DB:',dbase print 'Loc:',loc,'\n' elif len(sys.argv) == 5: # assume caller is requesting a client/server conn. dbase = sys.argv[1] port = sys.argv[2] passwd = sys.argv[3] machine = sys.argv[4] self.SERVER = 1 print 'Policy:', dbase print 'Loc: %s:%s\n' %(machine, port) else: # none of the above print 'usage: %s [dbname] [loc]' %sys.argv[0] sys.exit() if not self.SERVER: try: self.db = gadfly.gadfly(dbase,loc) except IOError, msg: print 'Unable to locate database "%s" at location "%s".'%( dbase, loc) foo = raw_input('Create? (Yy/Nn) ') # if 'y', create a new DB. if string.lower(string.strip(foo)) == 'y': self.db = gadfly.gadfly() self.db.startup(dbase,loc) else: # otherwise, non-'y', exit. sys.exit(-1) else: from gadfly.gfclient import gfclient self.db = gfclient(dbase, passwd, machine, int(port)) # create a DB cursor to execute our SQL in. self.cur = self.db.cursor() # for "do something to the last command" commands self.last_command = '' def do_exit(self, arg): ''' exit gfplus, commiting changes ''' print 'Commit...', sys.stdout.flush() self.db.commit() self.db.close() print 'exit' return 1 def do_EOF(self, arg): print return self.do_exit(arg) def do_commit(self, arg): ''' commit database ''' self.db.commit() def do_rollback(self, arg): ''' rollback to last commit ''' self.db.rollback() def precmd(self, line): if line.strip() in ('/', '!!'): line = self.lastcmd if line.startswith('s/') or line.startswith('c/'): line = 'change '+line[2:] return line def emptyline(self): pass def postcmd(self, stop, line): '''I need to have the last command altered _after_ the current command is run ''' self.last_command = self.lastcmd return stop def do_desc(self, table): ''' List columns for table named in table ''' sql = 'select column_name from __columns__ where table_name = ?' self.cur.execute(sql, (string.upper(table),)) if self.SERVER: print '\n'+pp(self.cur)+'\n' else: print '\n'+self.cur.pp()+'\n' def do_change(self, arg): '''Repeat the last command, but change it according to the re arg: s/pattern/replace c/pattern/replace ''' if not self.last_command: print 'No last command to change' # TODO: allow \-escaped / to appear in the arg pattern, repl = arg.split('/') line = re.sub(pattern, repl, self.last_command) print line return self.onecmd(line) def do_use(self, dbase): ''' Switch active databases. dbase becomes the new name, and prompts the user for the location. Commits changes to original DB. ''' self.db.commit() self.db.close() loc = raw_input('Loc: ') self.db = gadfly.gadfly(dbase, loc) self.cur = self.db.cursor() self.SERVER = 0 print '\nNow using %s\n'%dbase def do_help(self, arg): ''' display help screen ''' print ''' gfplus -- Interactive Gadfly Shell Commands: ; Execute SQL commands on gadfly database help This screen commit Commit changes to database rollback Rollback changes to last committed state desc Display all columns in a table or view use Switch active database to exit Exit gfplus, commiting changes ''' def default(self, arg): ''' Run the command to Gadfly ''' # make sure we have a whole query query = arg.strip() while not query.endswith(';'): query = query + ' ' + raw_input(self.prompt2).strip() try: self.cur.execute(query[:-1]) except: # Gadfly returned an error, use traceback to # print it. traceback.print_exc() return # display results if query.startswith('select'): f = self.cur.fetchall() if len(f) > 0: if self.SERVER: print pp(self.cur) else: print self.cur.pp() else: print 'No rows returned.' else: print 'OK' def main(): shell = GadflyShell() shell.cmdloop() if __name__ == '__main__': main() # # $Log: gfplus.py,v $ # Revision 1.6 2002/07/10 07:45:07 richard # Final commits before 1.0.0 release # # Revision 1.5 2002/05/24 01:56:05 richard # we need a main() # # Revision 1.4 2002/05/14 23:52:55 richard # - fixed commit-after-open bug (no working_db) # - added more functionality to gfplus: # / or !! repeat last command (blank line does the same thing) # (s|c)/pat/repl repeat last but RE sub pat for repl # - corrected gfplus exit code # # Revision 1.3 2002/05/12 23:56:55 richard # Cleaned up gfplus (uses cmd.Cmd, buncha other cleanups) # Removed the redundant gfclient # # Revision 1.2 2002/05/12 09:53:13 richard # Corrected MANIFEST # Fixed setup to create scripts that call main() # Fixed gfplus, a lot ;) # # Revision 1.1 2002/05/11 23:32:06 richard # added gfplus, docco # # gadfly-1.0.0/gadfly/scripts/gfserver.py0100644000157700012320000000653107467216203017134 0ustar rjonestech"""gadfly server mode script usage python gfserve.py port database directory password [startup] test example python gfserve.py 2222 test dbtest admin gfstest port is the port to listen to database is the database to start up. (must exist!) directory is the directory the database is in. password is the administrative access password. startup if present should be the name of a module to use for startup. The Startup module must contain a function Dict = startup(admin_policy, connection, Server_instance) which performs any startup actions on the database needed and returns either None or a Dictionary of name > policy objects where the policy objects describe policies beyond the admin policy. The startup function may also modify the admin_policy (disabling queries for example). The arguments passed to startup are: admin_policy: the administrative policy eg you could turn queries off for admin, using admin only for server maintenance, or you could add prepared queries to the admin_policy. connection: the database connection eg you could perform some inserts before server start also needed to make policies. Server_instance Included for additional customization. Create policies using P = gfserve.Policy(name, password, connection, queries=0) -- for a "secure" policy with only prepared queries allowed, or P = gfserve.Policy(name, password, connection, queries=1) -- for a policy with full access arbitrary statement execution. add a "named prepared statement" to a policy using P[name] = statement for example P["updatenorm"] = ''' update frequents set bar=?, perweek=? where drinker='norm' ''' in this case 'updatenorm' requires 2 dynamic parameters when invoked from a client. Script stdout lists server logging information. Some server administration services (eg shutdown) are implemented by the script interpretion of gfclient.py. """ # $Id: gfserver.py,v 1.1 2002/05/11 13:28:35 richard Exp $ import sys def main(): """start up the server.""" try: done = 0 argv = sys.argv nargs = len(argv) #print nargs, argv if nargs<5: sys.stderr.write("gfserve: not enough arguments: %s\n\n" % argv) sys.stderr.write(__doc__) return [port, db, dr, pw] = argv[1:5] print "gfserve startup port=%s db=%s, dr=%s password omitted" % ( port, db, dr) port = int(port) startup = None if nargs>5: startup = argv[5] print "gfserve: load startup module %s" % startup S = Server(int(port), db, dr, pw, startup) S.init() print "gfserve: server initialized, setting stderr=stdout" sys.stderr = sys.stdout print "gfserve: starting the server" S.start() done = 1 finally: if not done: print __doc__ if __name__=="__main__": main() # # $Log: gfserver.py,v $ # Revision 1.1 2002/05/11 13:28:35 richard # Checked over the server code. Split out functionality into modules and # scripts. Renamed documentation to "network". Made sure the gftest suite # worked (will need to formalise it though). # # gadfly-1.0.0/gadfly/__init__.py0100644000157700012320000000232607467104370015360 0ustar rjonestech''' Package marker for the gadfly package, also provides backaward compatibility. :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: __init__.py,v 1.5 2002/05/11 02:59:04 richard Exp $: ''' # $Id: __init__.py,v 1.5 2002/05/11 02:59:04 richard Exp $ # make old code still work from database import gadfly # # $Log: __init__.py,v $ # Revision 1.5 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.4 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.3 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.2 2002/05/07 04:38:39 anthonybaxter # import * considered stoopid. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/bindings.py0100644000157700012320000003241207467104370015415 0ustar rjonestech""" Rule bindings for sql grammar. :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: bindings.py,v 1.4 2002/05/11 02:59:04 richard Exp $: """ import semantics def elt0(list, context): """return first member of reduction""" return list[0] def elt1(list, context): """return second member""" return list[1] def elt2(list, context): return list[2] def returnNone(list, context): return None def stat1(list, context): """return list of len 1 of statements""" return list #def statn(list, context): # """return a list of statement reductions""" # [stat, semi, statlist] = list # statlist.insert(0, stat) # return statlist def thingcommalist(l, c): [thing, comma, list] = l list.insert(0, thing) return list def listcommathing(l, c): [list, comma, thing] = l list.append(thing) return list statn = thingcommalist selstat = elt0 insstat = elt0 createtablestat = elt0 droptablestat = elt0 delstat = elt0 updatestat = elt0 createindexstat = elt0 dropindexstat = elt0 createviewstat = elt0 dropviewstat = elt0 # drop view statement stuff def dropview(l, c): [drop, view, name] = l return semantics.DropView(name) # create view statement stuff def createview(l, c): [create, view, name, namelist, as, selection] = l return semantics.CreateView(name, namelist, selection) optnamelist0 = returnNone optnamelistn = elt1 # drop index statement stuff def dropindex(l, c): [drop, index, name] = l return semantics.DropIndex(name) # create index statement stuff def createindex(l, c): [create, index, name, on, table, op, namelist, cp] = l return semantics.CreateIndex(name, table, namelist) def createuniqueindex(l, c): [create, unique, index, name, on, table, op, namelist, cp] = l return semantics.CreateIndex(name, table, namelist, unique=1) names1 = stat1 namesn = listcommathing # update statement stuff def update(l, c): [upd, name, set, assns, condition] = l return semantics.UpdateOp(name, assns, condition) def assn(l, c): [col, eq, exp] = l return (col, exp) def assn1(l, c): [ (col, exp) ] = l result = semantics.TupleCollector() result.addbinding(col, exp) return result def assnn(l, c): [ result, comma, (col, exp) ] = l result.addbinding(col, exp) return result # delete statement stuff def deletefrom(l, c): [delete, fromkw, name, where] = l return semantics.DeleteOp(name, where) # drop table stuff def droptable(l, c): [drop, table, name] = l return semantics.DropTable(name) # create table statement stuff def createtable(list, context): [create, table, name, p1, colelts, p2] = list return semantics.CreateTable(name, colelts) colelts1 = stat1 coleltsn = listcommathing #def coleltsn(list, c): # [c1, cc, ce] = list # c1.append(ce) # return c1 coleltid = elt0 coleltconstraint = elt0 def coldef(l, c): [colid, datatype, default, constraints] = l return semantics.ColumnDef(colid, datatype, default, constraints) optdef0 = returnNone optcolconstr0 = returnNone stringtype = exnumtype = appnumtype = integer = float = varchar = elt0 varcharn = elt0 # insert statement stuff def insert1(l, c): [insert, into, name, optcolids, insert_spec] = l return semantics.InsertOp(name, optcolids, insert_spec) optcolids0 = returnNone optcolids1 = elt1 colids1 = stat1 colidsn = listcommathing def insert_values(l, c): return semantics.InsertValues(l[2]) def insert_query(l, c): return semantics.InsertSubSelect(l[0]) litlist1 = stat1 litlistn = listcommathing sliteral0 = elt0 def sliteralp(l, c): [p, v] = l return +v def sliterald(l, c): [l1, m, l2] = l return l1 - l2 def sliterals(l, c): [l1, p, l2] = l return l1 + l2 def sliteralm(l, c): [m, v] = l return -v # select statement stuff def selectx(list, context): [sub, optorder_by] = list #sub.union_select = optunion sub.order_by = optorder_by # number of dynamic parameters in this parse. sub.ndynamic = context.ndynamic() return sub psubselect = elt1 def subselect(list, context): [select, alldistinct, selectlist, fromkw, trlist, optwhere, optgroup, opthaving, optunion] = list sel = semantics.Selector(alldistinct, selectlist, trlist, optwhere, optgroup, opthaving, # store # of dynamic parameters seen in this parse. ndynamic = context.ndynamic() ) sel.union_select = optunion return sel def ad0(list, context): return "ALL" adall = ad0 def addistinct(list, context): return "DISTINCT" def where0(list, context): return semantics.BTPredicate() # true where1 = elt1 group0 = returnNone group1 = elt2 colnames1 = stat1 colnamesn = listcommathing having0 = returnNone having1 = elt1 union0 = returnNone def union1(l, c): [union, alldistinct, selection] = l return semantics.Union(alldistinct, selection) def except1(l, c): [union, selection] = l alldistinct = "DISTINCT" return semantics.Except(alldistinct, selection) def intersect1(l, c): [union, selection] = l alldistinct = "DISTINCT" return semantics.Intersect(alldistinct, selection) order0 = returnNone order1 = elt2 #orderby = elt2 sortspec1 = stat1 sortspecn = listcommathing def sortint(l, c): [num, ord] = l from types import IntType if type(num)!=IntType or num<=0: raise ValueError, `num`+': col position not positive int' return semantics.PositionedSort(num, ord) def sortcol(l, c): [name, ord] = l return semantics.NamedSort(name, ord) def optord0(l, c): return "ASC" optordasc = optord0 def optorddesc(l, c): return "DESC" ## table reference list returns list of (name, name) or (name, alias) def trl1(l, c): [name] = l return [(name, name)] def trln(l,c): [name, comma, others] = l others.insert(0, (name, name)) return others def trl1a(l,c): [name, alias] = l return [(name, alias)] def trlna(l,c): [name, alias, comma, others] = l others.insert(0, (name, alias)) return others def trl1as(l,c): [name, as, alias] = l return [(name, alias)] def trlnas(l,c): [name, as, alias, comma, others] = l others.insert(0, (name, alias)) return others tablename1 = elt0 columnid1 = elt0 def columnname1(list, context): [ci] = list return columnname2([None, None, ci], context) def columnname2(list, context): [table, ignore, col] = list return semantics.BoundAttribute(table, col) def dynamic(list, context): # return a new dynamic parameter int = context.param() return semantics.BoundAttribute(0, int) # expression stuff def literal(list, context): [lit] = list return semantics.Constant(lit) def stringstring(l, c): """two strings in sequence = apostrophe""" [l1, l2] = l value = "%s'%s" % (l1.value0, l2) return semantics.Constant(value) numlit = literal stringlit = literal primarylit = elt0 primary1 = elt0 factor1 = elt0 term1 = elt0 exp1 = elt0 def expplus(list, context): [exp, plus, term] = list return exp + term def expminus(list, context): [exp, minus, term] = list return exp - term def termtimes(list, context): [exp, times, term] = list return exp * term def termdiv(list, context): [exp, div, term] = list return exp / term plusfactor = elt1 def minusfactor(list, context): [minus, factor] = list return -factor primaryexp = elt1 primaryset = elt0 def countstar(l, c): return semantics.Count("*") def distinctset(l, c): [agg, p1, distinct, exp, p2] = l return set(agg, exp, 1) distinctcount = distinctset def allset(l, c): [agg, p1, exp, p2] = l return set(agg, exp, 0) allcount = allset def set(agg, exp, distinct): import semantics if agg=="AVG": return semantics.Average(exp, distinct) if agg=="COUNT": return semantics.Count(exp, distinct) if agg=="MAX": return semantics.Maximum(exp, distinct) if agg=="MIN": return semantics.Minimum(exp, distinct) if agg=="SUM": return semantics.Sum(exp, distinct) if agg=="MEDIAN": return semantics.Median(exp, distinct) raise NameError, `agg`+": unknown aggregate" average = count = maximum = minimum = summation = median = elt0 def predicateeq(list, context): [e1, eq, e2] = list return e1.equate(e2) def predicatene(list, context): [e1, lt, gt, e2] = list return ~(e1.equate(e2)) def predicatelt(list, context): [e1, lt, e2] = list return e1.lt(e2) def predicategt(list, context): [e1, lt, e2] = list return e2.lt(e1) def predicatele(list, context): [e1, lt, eq, e2] = list return e1.le(e2) def predicatege(list, context): [e1, lt, eq, e2] = list return e2.le(e1) def predbetween(list, context): [e1, between, e2, andkw, e3] = list return semantics.BetweenPredicate(e1, e2, e3) def prednotbetween(list, context): [e1, notkw, between, e2, andkw, e3] = list return ~semantics.BetweenPredicate(e1, e2, e3) predicate1 = elt0 bps = elt1 bp1 = elt0 # exists predicate stuff predexists = elt0 def exists(l, c): [ex, paren1, subquery, paren2] = l return semantics.ExistsPred(subquery) def notbf(list, context): [ notst, thing ] = list return ~thing # quantified predicates nnall = elt0 nnany = elt0 def predqeq(list, context): [exp, eq, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantEQ(exp, subq) else: return ~semantics.QuantNE(exp, subq) def predqne(list, context): [exp, lt, gt, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantNE(exp, subq) else: return ~semantics.QuantEQ(exp, subq) def predqlt(list, context): [exp, lt, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantLT(exp, subq) else: return ~semantics.QuantGE(exp, subq) def predqgt(list, context): [exp, gt, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantGT(exp, subq) else: return ~semantics.QuantLE(exp, subq) def predqle(list, context): [exp, less, eq, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantLE(exp, subq) else: return ~semantics.QuantGT(exp, subq) def predqge(list, context): [exp, gt, eq, allany, p1, subq, p2] = list if allany=="ANY": return semantics.QuantGE(exp, subq) else: return ~semantics.QuantLT(exp, subq) # subquery expression def subqexpr(list, context): [p1, subq, p2] = list return semantics.SubQueryExpression(subq) def predin(list, context): [exp, inkw, p1, subq, p2] = list return semantics.InPredicate(exp, subq) def prednotin(list, context): [exp, notkw, inkw, p1, subq, p2] = list return ~semantics.InPredicate(exp, subq) def predinlits(list, context): [exp, inkw, p1, lits, p2] = list return semantics.InLits(exp, lits) def prednotinlits(list, context): [exp, notkw, inkw, p1, lits, p2] = list return ~semantics.InLits(exp, lits) bf1 = elt0 def booln(list, context): [ e1, andst, e2 ] = list return e1&e2 bool1 = elt0 def searchn(list, context): [ e1, orst, e2 ] = list return e1 | e2 search1 = elt0 colalias = elt0 # select list stuff def selectstar(l,c): return "*" selectsome = elt0 select1 = elt0 # selectsub returns (expression, asname) def select1(list, context): [ (exp, name) ] = list result = semantics.TupleCollector() result.addbinding(name, exp) return result def selectn(list, context): [ selectsubs, comma, select_sublist ] = list (exp, name) = select_sublist selectsubs.addbinding(name, exp) return selectsubs def selectit(list, context): [exp] = list return (exp, None) # no binding! def selectname(list, context): [exp, as, alias] = list return (exp, alias) colalias = elt0 #### do the bindings. # note: all reduction function defs must precede this assign VARS = vars() class punter: def __init__(self, name): self.name = name def __call__(self, list, context): print "punt:", self.name, list return list class tracer: def __init__(self, name, fn): self.name = name self.fn = fn def __call__(self, list, context): print self.name, list return self.fn(list, context) def BindRules(sqlg): for name in sqlg.RuleNameToIndex.keys(): if VARS.has_key(name): #print "binding", name sqlg.Bind(name, VARS[name]) # nondebug #sqlg.Bind(name, tracer(name, VARS[name]) ) # debug else: print "unbound", name sqlg.Bind(name, punter(name)) return sqlg # # $Log: bindings.py,v $ # Revision 1.4 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.3 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/08 00:31:52 richard # More cleanup. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/client.py0100644000157700012320000000624707467205746015116 0ustar rjonestechimport socket from gadfly import gfsocket # copied from gfserve # shut down the server (admin policy only) # arguments = () # shutdown the server with no checkpoint SHUTDOWN = "SHUTDOWN" # restart the server (admin only) # arguments = () # restart the server (recover) # no checkpoint RESTART = "RESTART" # checkpoint the server (admin only) # arguments = () # checkpoint the server CHECKPOINT = "CHECKPOINT" # exec prepared statement # arguments = (prepared_name_string, dyn=None) # execute the prepared statement with dynamic args. # autocommit. EXECUTE_PREPARED = "EXECUTE_PREPARED" # exec any statement (only if not disabled) # arguments = (statement_string, dyn=None) # execute the statement with dynamic args. # autocommit. EXECUTE_STATEMENT = "EXECUTE_STATEMENT" class gfclient: closed = 0 def __init__(self, policy, password, machine, port): self.policy = policy self.port = int(port) self.password = password self.machine = machine def open_connection(self): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((self.machine, self.port)) return sock def send_action(self, action, arguments, socket): gfsocket.send_certified_action( self.policy, action, arguments, self.password, socket) def checkpoint(self): return self.simple_action(CHECKPOINT) def simple_action(self, action, args=()): """only valid for admin policy: force a server checkpoint""" sock = self.open_connection() self.send_action(action, args, sock) data = gfsocket.recv_data(sock) data = gfsocket.interpret_response(data) return data def restart(self): """only valid for admin policy: force a server restart""" return self.simple_action(RESTART) def shutdown(self): """only valid for admin policy: shut down the server""" return self.simple_action(SHUTDOWN) def close(self): self.closed = 1 def commit(self): # right now all actions autocommit pass # cannot rollback, autocommit on success rollback = commit def cursor(self): """return a cursor to this policy""" if self.closed: raise ValueError, "connection is closed" return gfClientCursor(self) class gfClientCursor: statement = None results = None description = None def __init__(self, connection): self.connection = connection # should add fetchone fetchmany def fetchall(self): return self.results def execute(self, statement=None, params=None): con = self.connection data = con.simple_action(EXECUTE_STATEMENT, (statement, params)) (self.description, self.results) = data def execute_prepared(self, name, params=None): con = self.connection data = con.simple_action(EXECUTE_PREPARED, (name, params)) if data is None: self.description = self.results = None else: (self.description, self.results) = data def setoutputsizes(self, *args): pass # not implemented def setinputsizes(self): pass # not implemented gadfly-1.0.0/gadfly/database.py0100644000157700012320000003565607470321526015377 0ustar rjonestech""" Main entry point for gadfly sql. :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: database.py,v 1.7 2002/05/14 23:52:54 richard Exp $: """ error = "gadfly_error" verbosity = 0 from store import Database0, File_Storage0, Transaction_Logger class gadfly: """as per the DBAPI spec "gadfly" is the connection object.""" closed = 0 verbose = verbosity # debug! # this will be loaded on first instance creation sql = None def __init__(self, databasename=None, directory=None, forscratch=0, autocheckpoint=1, verbose=0): # on first gadfly usage, load up the SQL grammar if self.sql is None: from gadfly import sql, bindings sql = sql.getSQL() self.sql = bindings.BindRules(sql) verbose = self.verbose = self.verbose or verbose # checkpoint on each commit if set self.autocheckpoint = autocheckpoint if verbose: print "initializing gadfly instance", databasename, directory, \ forscratch self.is_scratch = forscratch self.databasename = databasename self.directory = directory self.fs = None self.database = None # db global transaction id self.transid = 0 if databasename is not None: self.open() def transaction_log(self): if self.verbose: print "new transaction log for", `self.database.log`, self.transid return Transaction_Logger(self.database.log, self.transid, self.is_scratch) def checkpoint(self): """permanently record committed updates""" # note: No transactions should be active at checkpoint for this implementation! # implicit abort of active transactions! verbose = self.verbose if verbose: print "checkpointing gadfly instance", self.databasename db = self.database log = db.log # dump committed db to fs fs = self.fs if fs and db and not db.is_scratch: # flush the log if log: if verbose: print "gadfly: committing log" log.commit() elif verbose: print "gadfly: no log to commit" if verbose: print "gadfly: dumping mutated db structures" fs.dump(db) elif verbose: print "gadfly: no checkpoint required" if verbose: print "gadfly: new transid, reshadowing" self.transid = self.transid+1 self.working_db.reshadow(db, self.transaction_log()) def startup(self, databasename, directory, scratch=0, verbose=0): verbose = self.verbose if verbose: print "gadfly: starting up new ", databasename, directory, scratch if self.database: raise error, "cannot startup, database bound" self.databasename = databasename self.directory = directory db = self.database = Database0() db.is_scratch = scratch or self.is_scratch if verbose: print 'gadfly.startup: new working_db' self.fs = File_Storage0(databasename, directory) self.working_db = Database0(db, self.transaction_log()) # commit initializes database files and log structure if verbose: print 'gadfly.startup: commit' self.commit() # for now: all transactions serialized # working db shared among all transactions/cursors self.transid = self.transid+1 if verbose: print 'gadfly.startup: new working_db' self.working_db = Database0(db, self.transaction_log()) def restart(self): """Reload and rerun committed updates from log, discard uncommitted """ # mainly for testing recovery. if self.verbose: print "gadfly: restarting database", self.databasename self.database.clear() if self.working_db is not None: self.working_db.clear() self.working_db = None self.database = None self.open() def open(self): """ (re)load existing database """ if self.verbose: print "gadfly: loading database", self.databasename if self.directory: directory = self.directory else: directory = "." fs = self.fs = File_Storage0(self.databasename, directory) db = self.database = fs.load() #self.sql) self.transid = self.transid+1 self.working_db = Database0(db, self.transaction_log()) self.commit() def add_remote_view(self, name, definition): """add a remote view to self. Must be redone on each reinitialization! Must not recursively reenter the query evaluation process for this database! "Tables" added in this manner cannot be update via SQL. """ self.database[name] = definition self.working_db[name] = definition def close(self): """checkpoint and clear the database""" if self.closed: return if self.verbose: print "gadfly: closing database", self.databasename db = self.database if not db.is_scratch: self.checkpoint() if db: db.clear() wdb = self.working_db if wdb: wdb.clear() self.working_db = None self.closed = 1 def commit(self): """Commit the working database+transaction, flush log, new transid """ verbose = self.verbose autocheckpoint = self.autocheckpoint if self.verbose: print "gadfly: committing", self.transid, self.databasename self.transid = self.transid+1 fs = self.fs db = self.database wdb = self.working_db if not wdb: # no working database return wdblog = wdb.log if wdblog: if self.verbose: print "gadfly: committing working_db", wdblog wdblog.commit(verbose=self.verbose) wdb.commit() if fs and db and not db.is_scratch: if autocheckpoint: if verbose: print "gadfly: autocheckpoint" # skips a transid? self.checkpoint() else: if verbose: print "gadfly: no autocheckpoint" wdb.reshadow(db, self.transaction_log()) else: if verbose: print "gadfly: scratch db, no logging, just reshadow" wdb.reshadow(db, self.transaction_log()) def rollback(self): """discard the working db, new transid, recreate working db""" verbose = self.verbose if verbose: print "gadfly: rolling back", self.transid, self.databasename if not (self.fs or self.database): raise error, "unbound, cannot rollback" # discard updates in working database self.working_db.clear() self.transid = self.transid+1 self.working_db.reshadow(self.database, self.transaction_log()) #self.open() def cursor(self): if self.verbose: print "gadfly: new cursor", self.databasename db = self.database if db is None: raise error, "not bound to database" return GF_Cursor(self) def dumplog(self): log = self.database.log if log: log.dump() else: print "no log to dump" def table_names(self): return self.working_db.relations() def DUMP_ALL(self): print "DUMPING ALL CONNECTION DATA", self.databasename, self.directory print print "***** BASE DATA" print print self.database print print "***** WORKING DATA" print print self.working_db class GF_Cursor: verbose = verbosity arraysize = None description = None EVAL_DUMP = 0 # only for extreme debugging! def __init__(self, gadfly_instance): verbose = self.verbose = self.verbose or gadfly_instance.verbose if verbose: print "GF_Cursor.__init__", id(self) self.connection = gadfly_instance self.results = None self.resultlist = None self.statement = None # make a shadow of the shadow db! (in case of errors) self.shadow_db = Database0() self.reshadow() self.connection = gadfly_instance def reshadow(self): if self.verbose: print "GF_Cursor.reshadow", id(self) db = self.connection.working_db shadow = self.shadow_db shadow.reshadow(db, db.log) if self.verbose: print "rels", shadow.rels.keys() def close(self): if self.verbose: print "GF_Cursor.close", id(self) self.connection = None def reset_results(self): if self.verbose: print "GF_Cursor.reset_results", id(self) rs = self.results if rs is None: raise error, "must execute first" if len(rs)!=1: raise error, "cannot retrieve multiple results" rel = rs[0] rows = rel.rows() atts = rel.attributes() tupatts = tuple(atts) resultlist = list(rows) if len(tupatts)==1: att = tupatts[0] for i in xrange(len(resultlist)): resultlist[i] = (resultlist[i][att],) else: for i in xrange(len(resultlist)): resultlist[i] = resultlist[i].dump(tupatts) self.resultlist = resultlist def fetchone(self): if self.verbose: print "GF_Cursor.fetchone", id(self) r = self.resultlist if r is None: self.reset_results() r = self.resultlist if len(r)<1: raise error, "no more results" result = r[0] del r[0] return result def fetchmany(self, size=None): if self.verbose: print "GF_Cursor.fetchmany", id(self) r = self.resultlist if r is None: self.reset_results() r = self.resultlist if size is None: size = len(r) result = r[:size] del r[:size] return result def fetchall(self): if self.verbose: print "GF_Cursor.fetchall", id(self) return self.fetchmany() def execute(self, statement=None, params=None): """execute operations, commit results if no error""" success = 0 verbose = self.verbose if verbose: print "GF_Cursor.execute", id(self) if statement is None and self.statement is None: raise error, "cannot execute, statement not bound" if statement!=self.statement: if verbose: print "GF_cursor: new statement: parsing" # only reparse on new statement. self.statement=statement from semantics import Parse_Context context = Parse_Context() cs = self.commands = self.connection.sql.DoParse1(statement, context) else: if verbose: print "GF_cursor: old statment, not parsing" cs = self.commands # always rebind! (db may have changed) if verbose: print "GF_Cursor: binding to temp db" # make a new shadow of working db # (should optimize?) self.reshadow() # get shadow of working database database = self.shadow_db if self.EVAL_DUMP: print "***" print "*** dumping connection parameters before eval" print "***" print "*** eval scratch db..." print print database print print "*** connection data" print self.connection.DUMP_ALL() print "********** end of eval dump" for i in xrange(len(cs)): if verbose: print "GFCursor binding\n", cs[i] print database.rels.keys() cs[i] = cs[i].relbind(database) cs = self.commands self.results = results = list(cs) # only unshadow results on no error try: for i in xrange(len(cs)): results[i] = cs[i].eval(params) success = 1 finally: #print "in finally", success # only on no error... if success: # commit updates in shadow of working db (not in real db) if verbose: print "GFCursor: successful eval, storing results in wdb" database.log.flush() # database commit does not imply transaction commit. database.commit() else: if verbose: print \ "GFCursor: UNSUCCESSFUL EVAL, discarding results and log entries" self.statement = None self.results = None self.resultlist = None database.log.reset() # handle curs.description self.description = None if len(results)==1: result0 = results[0] try: atts = result0.attributes() except: pass else: descriptions = list(atts) fluff = (None,) * 6 for i in xrange(len(atts)): descriptions[i] = (atts[i],) + fluff self.description = tuple(descriptions) self.resultlist = None def setoutputsize(self, *args): # not implemented pass def setinputsizes(self, *args): # not implemented pass def pp(self): """return pretty-print string rep of current results""" from string import join stuff = map(repr, self.results) return join(stuff, "\n\n") # # $Log: database.py,v $ # Revision 1.7 2002/05/14 23:52:54 richard # - fixed commit-after-open bug (no working_db) # - added more functionality to gfplus: # / or !! repeat last command (blank line does the same thing) # (s|c)/pat/repl repeat last but RE sub pat for repl # - corrected gfplus exit code # # Revision 1.6 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.5 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.4 2002/05/08 00:31:52 richard # More cleanup. # # Revision 1.3 2002/05/07 23:19:02 richard # Removed circular import (at import time at least) # # Revision 1.2 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/gfsocket.py0100644000157700012320000001720607467216203015430 0ustar rjonestech""" Socket interactions for gadfly client and server :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: gfsocket.py,v 1.4 2002/05/11 13:28:35 richard Exp $: """ import sys, select, time, marshal, md5 # responses SUCCESS = "SUCCESS" EXCEPTION = "EXCEPTION" def reply_exception(exception, info, socket): """send an exception back to the client""" # any error is invisible to client #from gfserve import ServerError try: reply( (EXCEPTION, (exception, info)), socket) except: #info = "%s %s" % (sys.exc_type, sys.exc_value) socket.close() #raise ServerError, "reply_exception failed: "+`info` def reply_success(data, socket): """report success with data back to client""" reply( (SUCCESS, data), socket) def reply(data, socket): marshaldata = marshal.dumps(data) send_packet(socket, marshaldata) socket.close() def send_packet(socket, data): """blast out a length marked packet""" send_len(data, socket) socket.send(data) def send_len(data, socket): """send length of data as cr terminated int rep""" info = `len(data)`+"\n" socket.send(info) def send_certified_action(actor_name, action, arguments, password, socket): marshaldata = marshal.dumps( (action, arguments) ) cert = certificate(marshaldata, password) #print actor_name, cert, marshaldata marshaldata = marshal.dumps( (actor_name, cert, marshaldata) ) send_packet(socket, marshaldata) def unpack_certified_data(data): # sanity check unpack = (actor_name, certificate, marshaldata) = marshal.loads(data) return unpack def recv_data(socket, timeout=10): """receive data or time out""" endtime = time.time() + timeout reader = Packet_Reader(socket) done = 0 while not done: timeout = endtime - time.time() if timeout<0: raise IOError, "socket time out (1)" (readable, dummy, error) = select.select([socket], [], [socket], timeout) if error: raise IOError, "socket in error state" if not readable: raise IOError, "socket time out (2)" reader.poll() done = (reader.mode==READY) return reader.data def interpret_response(data): """interpret response data, raise exception if needed""" (indicator, data) = marshal.loads(data) if indicator==SUCCESS: return data elif indicator==EXCEPTION: # ??? raise EXCEPTION, data else: raise ValueError, "unknown indicator: "+`indicator` # packet reader modes LEN = "LEN" DATA = "DATA" READY = "READY" ERROR = "ERROR" BLOCK_SIZE = 4028 LEN_LIMIT = BLOCK_SIZE * 10 class Packet_Reader: """nonblocking pseudo-packet reader.""" # packets come in as decimal_len\ndata # (note: cr! not crlf) # kick too large requests if set limit_len = LEN_LIMIT def __init__(self, socket): self.socket = socket self.length = None self.length_remaining = None self.len_list = [] self.data_list = [] self.received = "" self.data = None self.mode = LEN def __len__(self): if self.mode is LEN: raise ValueError, "still reading length" return self.length def get_data(self): if self.mode is not READY: raise ValueError, "still reading" return self.data def poll(self): mode = self.mode if mode is READY: raise ValueError, "data is ready" if mode is ERROR: raise ValueError, "socket error previously detected" socket = self.socket (readable, dummy, error) = select.select([socket], [], [socket], 0) if error: self.socket.close() self.mode = ERROR raise ValueError, "socket is in error state" if readable: if mode is LEN: self.read_len() # note: do not fall thru automatically elif mode is DATA: self.read_data() def read_len(self): """assume socket is readable now, read length""" socket = self.socket received = self.received len_list = self.len_list if not received: # 10 bytes at a time until len is read. received = socket.recv(10) while received: # consume, test one char input = received[0] received = received[1:] if input == "\n": # done reading length try: length = self.length = int(''.join(len_list)) except: self.mode = ERROR socket.close() raise ValueError, "bad len string? "+`len_list` self.received = received self.length_remaining = length self.mode = DATA limit_len = self.limit_len if limit_len and length>limit_len: raise ValueError, "Length too big: "+`(length, limit_len)` return if len(len_list)>10: self.mode = ERROR socket.close() raise ValueError, "len_list too long: "+`len_list` len_list.append(input) if not received: (readable, dummy, error) = select.select(\ [socket], [], [socket], 0) if error: self.mode = ERROR socket.close() raise ValueError, "socket in error state" if readable: received = socket.recv(10) # remember extra data received. self.received = received def read_data(self): # assume socket is readable socket = self.socket received = self.received length_remaining = self.length_remaining data_list = self.data_list if received: data_list.append(received) self.received = "" length_remaining = length_remaining - len(received) recv_len = max(length_remaining, BLOCK_SIZE) received = socket.recv(recv_len) if received: data_list.append(received) length_remaining = length_remaining - len(received) if length_remaining<1: self.mode = READY self.data = ''.join(data_list) self.length_remaining = length_remaining def certificate(String, password): """generate a certificate for a string, using a password""" if not String: raise ValueError, "cannot generate certificate for empty string" taggedstring = password + String return md5.new(taggedstring).digest() def certify(String, cert, password): """check a certificate for a string""" return certificate(String, password) == cert # # $Log: gfsocket.py,v $ # Revision 1.4 2002/05/11 13:28:35 richard # Checked over the server code. Split out functionality into modules and # scripts. Renamed documentation to "network". Made sure the gftest suite # worked (will need to formalise it though). # # Revision 1.3 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.2 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/grammar.py0100644000157700012320000003070507467104370015251 0ustar rjonestech""" SQL grammar, partial, based on ODBC 2.0 programmer's ref Note: if you change the grammar, you must rebuild the compiled grammar file by executing this script: python grammar.py :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: grammar.py,v 1.4 2002/05/11 02:59:04 richard Exp $: """ # TODO: someday add subquery precedence to allow more general selects. def DeclareTerminals(Grammar): import string # user_defined_name alphanum = string.letters+string.digits + "_" userdefre = "[%s][%s]*"%(string.letters +"_", alphanum) def userdeffn(str): from string import upper return upper(str) Grammar.Addterm("user_defined_name", userdefre, userdeffn) # character_string_literal charstre = "'[^']*'" def charstfn(str): return str[1:-1] Grammar.Addterm("character_string_literal", charstre, charstfn) # numeric_literal digits = string.digits intre = "[%s][%s.jJ]*"%(digits,digits) numlitre = "%s([Ee][+-]?%s)?"%(intre, intre) def numlitfn(str): """Note: this is "safe" because regex filters out dangerous things. """ return eval(str) Grammar.Addterm("numeric_literal", numlitre, numlitfn) sqlrules = """ statement_list :: @R stat1 :: statement_list >> statement @R statn :: statement_list >> statement ; statement_list @R dropindexstat :: statement >> drop_index_statement @R createindexstat :: statement >> create_index_statement @R selstat :: statement >> select_statement @R insstat :: statement >> insert_statement @R createtablestat :: statement >> create_table_statement @R droptablestat :: statement >> drop_table_statement @R delstat :: statement >> delete_statement_searched @R updatestat :: statement >> update_statement_searched @R createviewstat :: statement >> create_view_statement @R dropviewstat :: statement >> drop_view_statement ## drop view statement @R dropview :: drop_view_statement >> DROP VIEW user_defined_name ## create view statement @R createview :: create_view_statement >> CREATE VIEW user_defined_name optnamelist AS select_statement @R optnamelist0 :: optnamelist >> @R optnamelistn :: optnamelist >> ( namelist ) ## drop index statement @R dropindex :: drop_index_statement >> DROP INDEX user_defined_name ## create index statement @R createindex :: create_index_statement >> CREATE INDEX user_defined_name ON user_defined_name ( namelist ) @R createuniqueindex :: create_index_statement >> CREATE UNIQUE INDEX user_defined_name ON user_defined_name ( namelist ) @R names1 :: namelist >> user_defined_name @R namesn :: namelist >> namelist , user_defined_name ## update statement @R update :: update_statement_searched >> UPDATE user_defined_name SET assns optwhere @R assn1 :: assns >> assn @R assnn :: assns >> assns , assn @R assn :: assn >> column_identifier = expression ##### ## delete statement @R deletefrom :: delete_statement_searched >> DELETE FROM user_defined_name optwhere ## drop table @R droptable :: drop_table_statement >> DROP TABLE user_defined_name ## create table statement ( restricted ) @R createtable :: create_table_statement >> CREATE TABLE user_defined_name ( colelts ) @R colelts1 :: colelts >> colelt @R coleltsn :: colelts >> colelts , colelt @R coleltid :: colelt >> column_definition @R coleltconstraint :: colelt >> column_constraint_definition ## column constraints deferred @R coldef :: column_definition >> column_identifier data_type optdefault optcolconstraints ## optdefault deferred @R optdef0 :: optdefault >> ## optcolconstraint deferred @R optcolconstr0 :: optcolconstraints >> @R stringtype :: data_type >> character_string_type @R exnumtype :: data_type >> exact_numeric_type @R appnumtype :: data_type >> approximate_numeric_type @R integer :: exact_numeric_type >> INTEGER @R float :: approximate_numeric_type >> FLOAT @R varchar :: character_string_type >> VARCHAR @R varcharn :: character_string_type >> VARCHAR ( numeric_literal ) ## insert statement @R insert1 :: insert_statement >> INSERT INTO table_name optcolids insert_spec @R optcolids0 :: optcolids >> @R optcolids1 :: optcolids >> ( colids ) @R colids1 :: colids >> column_identifier @R colidsn :: colids >> colids , column_identifier @R insert_values :: insert_spec >> VALUES ( litlist ) @R insert_query :: insert_spec >> sub_query @R litlist1 :: litlist >> sliteral @R litlistn :: litlist >> litlist , sliteral @R sliteral0 :: sliteral >> literal @R sliteralp :: sliteral >> + literal ## hack to permit complexes @R sliterals :: sliteral >> sliteral + literal @R sliterald :: sliteral >> sliteral - literal @R sliteralm :: sliteral >> - literal ## select statement @R subselect :: sub_query >> SELECT alldistinct select_list FROM table_reference_list optwhere optgroup opthaving optunion ## @R psubselect :: sub_query >> ( sub_query ) @R selectx :: select_statement >> sub_query optorder_by @R ad0 :: alldistinct >> @R adall :: alldistinct >> ALL @R addistinct :: alldistinct >> DISTINCT @R where0 :: optwhere >> @R where1 :: optwhere >> WHERE search_condition @R group0 :: optgroup >> @R group1 :: optgroup >> GROUP BY colnamelist @R colnames1 :: colnamelist >> column_name @R colnamesn :: colnamelist >> colnamelist , column_name @R having0 :: opthaving >> @R having1 :: opthaving >> HAVING search_condition @R union0 :: optunion >> @R union1 :: optunion >> UNION alldistinct sub_query @R except1 :: optunion >> EXCEPT sub_query @R intersect1 :: optunion >> INTERSECT sub_query @R order0 :: optorder_by >> @R order1 :: optorder_by >> ORDER BY sortspeclist ##@R orderby :: order_by_clause >> ORDER BY sortspeclist @R sortspec1 :: sortspeclist >> sort_specification @R sortspecn :: sortspeclist >> sortspeclist , sort_specification ## really, should be unsigned int @R sortint :: sort_specification >> numeric_literal opt_ord @R sortcol :: sort_specification >> column_name opt_ord @R optord0 :: opt_ord >> @R optordasc :: opt_ord >> ASC @R optorddesc :: opt_ord >> DESC ## table reference list (nasty hack alert) @R trl1 :: table_reference_list >> user_defined_name @R trln :: table_reference_list >> user_defined_name , table_reference_list @R trl1a :: table_reference_list >> user_defined_name user_defined_name @R trlna :: table_reference_list >> user_defined_name user_defined_name , table_reference_list @R trl1as :: table_reference_list >> user_defined_name AS user_defined_name @R trlnas :: table_reference_list >> user_defined_name AS user_defined_name , table_reference_list ## select list @R selectstar :: select_list >> * @R selectsome :: select_list >> selectsubs @R select1 :: selectsubs >> select_sublist @R selectn :: selectsubs >> selectsubs , select_sublist @R selectit :: select_sublist >> expression @R selectname :: select_sublist >> expression AS column_alias @R colalias :: column_alias >> user_defined_name ## search condition @R search1 :: search_condition >> boolean_term @R searchn :: search_condition >> boolean_term OR search_condition @R bool1 :: boolean_term >> boolean_factor @R booln :: boolean_term >> boolean_factor AND boolean_term @R bf1 :: boolean_factor >> boolean_primary @R notbf :: boolean_factor >> NOT boolean_primary @R bp1 :: boolean_primary >> predicate @R bps :: boolean_primary >> ( search_condition ) ## predicate (simple for now!!!) @R predicate1 :: predicate >> comparison_predicate ## comparison predicate (simple for now!!!) @R predicateeq :: comparison_predicate >> expression = expression @R predicatelt :: comparison_predicate >> expression < expression @R predicategt :: comparison_predicate >> expression > expression @R predicatele :: comparison_predicate >> expression < = expression @R predicatege :: comparison_predicate >> expression > = expression @R predicatene :: comparison_predicate >> expression < > expression @R predbetween :: comparison_predicate >> expression BETWEEN expression AND expression @R prednotbetween :: comparison_predicate >> expression NOT BETWEEN expression AND expression ## exists predicate @R predexists :: predicate >> exists_predicate @R exists :: exists_predicate >> EXISTS ( sub_query ) ## quantified predicate @R predqeq :: predicate >> expression = allany ( sub_query ) @R predqne :: predicate >> expression < > allany ( sub_query ) @R predqlt :: predicate >> expression < allany ( sub_query ) @R predqgt :: predicate >> expression > allany ( sub_query ) @R predqle :: predicate >> expression < = allany ( sub_query ) @R predqge :: predicate >> expression > = allany ( sub_query ) @R nnall :: allany >> ALL @R nnany :: allany >> ANY ## in predicate @R predin :: predicate >> expression IN ( sub_query ) @R prednotin :: predicate >> expression NOT IN ( sub_query ) @R predinlits :: predicate >> expression IN ( litlist ) @R prednotinlits :: predicate >> expression NOT IN ( litlist ) ## subquery expression @R subqexpr :: expression >> ( sub_query ) ## expression (simple for now!!!) @R exp1 :: expression >> term @R expplus :: expression >> expression + term @R expminus :: expression >> expression - term @R term1 :: term >> factor @R termtimes :: term >> term * factor @R termdiv :: term >> term / factor @R factor1 :: factor >> primary @R plusfactor :: factor >> + factor @R minusfactor :: factor >> - factor @R primary1 :: primary >> column_name @R primarylit :: primary >> literal @R primaryexp :: primary >> ( expression ) @R primaryset :: primary >> set_function_reference @R stringlit :: literal >> character_string_literal @R stringstring :: literal >> literal character_string_literal @R numlit :: literal >> numeric_literal ## set functions (nasty hack!) @R countstar :: set_function_reference >> COUNT ( * ) @R distinctcount :: set_function_reference >> COUNT ( DISTINCT expression ) @R allcount :: set_function_reference >> COUNT ( expression ) @R distinctset :: set_function_reference >> aggregate ( DISTINCT expression ) @R allset :: set_function_reference >> aggregate ( expression ) @R average :: aggregate >> AVG ##@R count :: aggregate >> COUNT @R maximum :: aggregate >> MAX @R minimum :: aggregate >> MIN @R summation :: aggregate >> SUM @R median :: aggregate >> MEDIAN ## dynamic parameter (varies quite a bit from ODBC spec) @R dynamic :: literal >> ? ## column name @R columnname1 :: column_name >> column_identifier @R columnname2 :: column_name >> table_name . column_identifier @R tablename1 :: table_name >> user_defined_name @R columnid1 :: column_identifier >> user_defined_name """ nonterms = """ sliteral exists_predicate set_function_reference aggregate sortspeclist sort_specification opt_ord drop_table_statement delete_statement_searched update_statement_searched assns assn insert_statement litlist colelt optcolconstraints optdefault optcolids insert_spec create_table_statement colids colelts column_constraint_definition column_definition data_type character_string_type exact_numeric_type approximate_numeric_type expression term factor primary literal comparison_predicate column_alias column_identifier table_name boolean_term boolean_factor boolean_primary predicate selectsubs expression alias sub_query statement_list statement select_statement alldistinct subselect select_list table_reference_list optwhere optgroup opthaving order_by_clause select_sublist optunion optorder_by search_condition colnamelist column_name table_reference table_name create_index_statement namelist drop_index_statement allany create_view_statement drop_view_statement optnamelist """ keywords = """ INDEX ON ANY IN VIEW AS EXCEPT INTERSECT EXISTS AVG COUNT MAX MIN SUM MEDIAN UPDATE DROP DELETE FROM SET INSERT INTO VALUES CREATE TABLE INTEGER FLOAT VARCHAR AND OR NOT SELECT FROM WHERE HAVING GROUP BY UNION ALL DISTINCT AS ORDER ASC DESC BETWEEN UNIQUE """ puncts = """.,*;=<>{}()?+-/""" # terminals user_defined_name, character_string_literal, numeric_literal # # $Log: grammar.py,v $ # Revision 1.4 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.3 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/introspection.py0100644000157700012320000001304607467104370016522 0ustar rjonestech""" View based introspection and extension views :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: introspection.py,v 1.3 2002/05/11 02:59:04 richard Exp $: """ # $Id: introspection.py,v 1.3 2002/05/11 02:59:04 richard Exp $ import store class RemoteView(store.View): """Virtual superclass. See text for methods and members to define.""" # Usually redefine __init__ def __init__(self): pass # set static (static=1) or dynamic (static=0) # for static tuple seq is generated once per load # for non-static tuple seq is regenerated once per query # which uses the view. static = 0 # define the column_names column_names = ['Column1'] # define the row generator def listing(self): """return list of values (1 column) or list of tuples of values (>1 column). size of elts should match number of columns.""" return [0] # possibly redefine __repr__ and irepr def __repr__(self): return "" % (self.__class__, id(self)) irepr = __repr__ # for introspective methods possibly redefine relbind def relbind(self, db, atts): return self ### don't touch the following unless you are a guru! cached_rows = None def uncache(self): if self.static: return self.cached_rows = None def attributes(self): from string import upper return map(upper, self.column_names) def rows(self, andseqs=0): cached_rows = self.cached_rows if cached_rows is None: tups = list(self.listing()) from semantics import kjbuckets undump = kjbuckets.kjUndump attributes = tuple(self.attributes()) for i in xrange(len(tups)): tups[i] = undump(attributes, tups[i]) cached_rows = self.cached_rows = tups tups = cached_rows[:] if andseqs: return (tups, range(len(tups))) else: return tups class DualView(RemoteView): """static one row one column view for testing. (Inspired by Oracle DUAL relation).""" # trivial example extension view static = 1 column_names = ['Column1'] def listing(self): return [0] class DictKeyValueView(RemoteView): """Less trivial example. Dict keys/values converted to strings""" static = 0 # regenerate in case dict changes column_names = ["key", "value"] mapstring = 1 def __init__(self, dict=None): if dict is None: dict = {} self.dict = dict def listing(self): items = self.dict.items() if self.mapstring: def mapper(item): return tuple(map(str, item)) return map(mapper, items) else: return items class RelationsView(DictKeyValueView): """list of relations and whether they are views.""" column_names = ["table_name", "is_view"] mapstring = 0 def relbind(self, db, atts): rels = db.rels dict = {} for relname in rels.keys(): dict[relname] = rels[relname].is_view self.dict = dict return self class IndicesView(DictKeyValueView): """list of indices and relations they index""" column_names = ["index_name", "table_name", "is_unique"] mapstring = 0 def relbind(self, db, atts): rels = db.rels dict = {} for relname in rels.keys(): rel = rels[relname] if not rel.is_view: index_list = rels[relname].index_list for index in index_list: dict[index.name] = (relname, index.unique) self.dict = dict return self def listing(self): L = [] dict = self.dict keys = dict.keys() for k in keys: L.append( (k,) + dict[k] ) return L class DataDefsView(DictKeyValueView): """Data defs (of non-special views) and definition dumps.""" column_names = ["name", "defn"] mapstring = 1 def relbind(self, db, atts): self.dict = db.datadefs return self class ColumnsView(RemoteView): """table_names and columns therein.""" column_names = ["table_name", "column_name"] def relbind(self, db, atts): rels = db.rels pairs = [] for relname in rels.keys(): for att in rels[relname].attributes(): pairs.append( (relname, att) ) self.pairs = pairs return self def listing(self): return self.pairs class IndexAttsView(ColumnsView): """indices and attributes.""" column_names = ["index_name", "column_name"] def relbind(self, db, atts): indices = db.indices pairs = [] for iname in indices.keys(): for att in indices[iname].attributes(): pairs.append( (iname, att) ) self.pairs = pairs return self # # $Log: introspection.py,v $ # Revision 1.3 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.2 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/kjParseBuild.py0100644000157700012320000013344107467104370016203 0ustar rjonestech"""Python code for building a parser from a grammar :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: kjParseBuild.py,v 1.6 2002/05/11 02:59:04 richard Exp $: """ # BUGS: # A bad grammar that has no derivations for # the root nonterminal may cause a name error # on the variable "GoodStartingPlace" # this needs to be modified so the RULEGRAM is loaded from a # compiled representation if available. import string import kjSet import kjParser import re # import some constants from kjParser import TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, \ TRANSFLAG, KEYFLAG, NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN PMODULE = kjParser.THISMODULE # errors raised here TokenError = "TokenError" # may happen on autogen with bad grammar NotSLRError = "NotSLRError" # may happen for nonSLR grammar # set this flag to abort automatic generation on Errors ABORTONERROR = 0 # token used to mark null productions NULLTOKEN = (None,None) class CFSMachine(kjParser.FSMachine): ''' a derived FSM class, with closure computation methods defined (compilable FSMachine) ''' def __init__(self, nonterm): kjParser.FSMachine.__init__(self, nonterm) def Eclosure(self, Epsilon, DoNullMaps=0): ''' return the epsilon closure of the FSM as a new FSM DoNullMap, if set, will map unexpected tokens to the "empty" state (usually creating a really big fsm) ''' Closure = CFSMachine( self.root_nonTerminal ) # compute the Epsilon Graph between states EGraph = kjSet.NewDG([]) for State in range(0,self.maxState+1): # every state is E-connected to self kjSet.AddArc( EGraph, State, State ) # add possible transition on epsilon (ONLY ONE SUPPORTED!) key = (State, Epsilon) if self.StateTokenMap.has_key(key): keymap = self.StateTokenMap[key] if keymap[0][0] != MOVETOFLAG: raise TypeError, "unexpected map type in StateTokenMap" for (Flag,ToState) in keymap: kjSet.AddArc( EGraph, State, ToState ) #endfor # transitively close EGraph kjSet.TransClose( EGraph ) # Translate EGraph into a dictionary of lists EMap = {} for State in range(0,self.maxState+1): EMap[State] = kjSet.Neighbors( EGraph, State ) # make each e-closure of each self.state a state of the closure FSM. # here closure states assumed transient -- reset elsewhere. # first do the initial state Closure.States[ Closure.initial_state ] = \ [TRANSFLAG, kjSet.NewSet(EMap[self.initial_state]) ] # do all other states (save initial and successful final states) #for State in range(0,self.maxState+1): # if State != self.initial_state \ # and State != self.successful_final_state: # Closure.NewSetState(TRANSFLAG, kjSet.NewSet(EMap[State]) ) ##endfor # compute set of all known tokens EXCEPT EPSILON Tokens = kjSet.NewSet( [] ) for (State, Token) in self.StateTokenMap.keys(): if Token != Epsilon: kjSet.addMember(Token, Tokens) # tranform it into a list Tokens = kjSet.get_elts(Tokens) # for each state of the the closure FSM (past final) add transitions # and add new states as needed until all states are processed # (uses convention that states are allocated sequentially) ThisClosureState = 1 while ThisClosureState <= Closure.maxState: MemberStates = kjSet.get_elts(Closure.States[ThisClosureState][1]) # for each possible Token, compute the union UTrans of all # e-closures for all transitions for all member states, # on the Token, make UTrans a new state (if needed), # and transition ThisClosureState to UTrans on Token for Token in Tokens: UTrans = kjSet.NewSet( [] ) for MState in MemberStates: # if MState has a transition on Token, include # EMap for the destination state key = (MState, Token) if self.StateTokenMap.has_key(key): DStateTup = self.StateTokenMap[key] if DStateTup[0][0] != MOVETOFLAG: raise TypeError, "unknown map type" for (DFlag, DState) in DStateTup: for EDState in EMap[DState]: kjSet.addMember(EDState, UTrans) #endif #endfor MState # register UTrans as a new state if needed UTState = Closure.NewSetState(TRANSFLAG, UTrans) # record transition from # ThisClosureState to UTState on Token if DoNullMaps: Closure.SetMap( ThisClosureState, Token, UTState) else: if not kjSet.Empty(UTrans): Closure.SetMap( ThisClosureState, Token, UTState) #endfor Token ThisClosureState = ThisClosureState +1 #endwhile return Closure def NewSetState(self, kind, InSet): ''' add an set-marked state to self if not present uses self.States[s][1] as the set marking the state s only used by Eclosure above ''' # return existing state if one is present that matches the set LastState= self.maxState # skip state 0 (successful final state)??? for State in range(1,LastState+1): MarkSet = self.States[State][1] if kjSet.Same(InSet,MarkSet): return State # nonlocal #endfor # if not exited then allocate a new state LastState = LastState + 1 self.States[LastState] = [ kind , InSet ] self.maxState = LastState return LastState class Ruleset: ''' Ruleset class, used to compute NFA and then DFA for parsing based on a list of rules. ''' def __init__(self, StartNonterm, Rulelist): self.StartNonterm = StartNonterm self.Rules = Rulelist def compFirst(self): ''' method to compute prefixes and First sets for nonterminals ''' # uses the special null production token NULLTOKEN # snarfed directly from Aho+Ullman (terminals glossed) First = kjSet.NewDG([]) # repeat the while loop until no change is made to First done = 0 while not done: # assume we're done until a change is made to First done = 1 # iterate through all rules looking for a new arc to add # indicating Terminal > possible first token derivation # for R in self.Rules: GoalNonterm = R.Nonterm Bodylength = len(R.Body) # look through the body of the rule up to the token with # no epsilon production (yet seen) Bodyindex = 0 Processindex = 1 while Processindex: # unless otherwise indicated below, don't go to next token Processindex = 0 # if index is past end of body then record # an epsilon production for this nonterminal if Bodyindex >= Bodylength: if not kjSet.HasArc(First, GoalNonterm, NULLTOKEN ): kjSet.AddArc( First, GoalNonterm, NULLTOKEN ) done = 0 # change made to First else: # otherwise try to add firsts of this token # to firsts of the Head of the rule. Token = R.Body[Bodyindex] (type, name) = Token if type in (KEYFLAG,TERMFLAG): # try to add this terminal to First for GoalNonterm if not kjSet.HasArc(First, GoalNonterm, Token): kjSet.AddArc( First, GoalNonterm, Token) done = 0 elif type == NONTERMFLAG: # try to add each First entry for nonterminal # to First entry for GoalNonterm for FToken in kjSet.Neighbors( First, Token ): if not kjSet.HasArc(First, GoalNonterm, FToken): kjSet.AddArc( First, GoalNonterm, FToken) done = 0 # does this nonterminal have a known e production? if kjSet.HasArc( First, Token, NULLTOKEN ): # if so, process next token in rule Processindex = 1 else: raise TokenError, "unknown token type in rule body" #endif Bodyindex = Bodyindex + 1 #endwhile Processindex #endfor R in self.Rules #endwhile not done self.First = First def compFollow(self): ''' computing the Follow set for the ruleset the good news: I think it's correct. the bad news: It's slower than it needs to be for epsilon cases. ''' Follow = kjSet.NewDG([]) # put end marker on follow of start nonterminal kjSet.AddArc(Follow, self.StartNonterm, kjParser.ENDOFFILETOKEN) # now compute other follows using the rules; # repeat the loop until no change to Follow. while not self.compFollowRules(Follow): pass self.Follow = Follow def compFollowRules(self, Follow): done = 1 # assume done unless Follow changes for R in self.Rules: newdone = self.compFollowRule(Follow, R) if not newdone: done = 0 return done def compFollowRule(self, Follow, R): done = 1 # work backwards in the rule body to # avoid retesting for epsilon nonterminals Bodylength = len(R.Body) # the tail of rule may expand to null EpsilonTail = 1 # loop starts at the last for BodyIndex in range(Bodylength-1, -1, -1): Token = R.Body[BodyIndex] (Ttype,Tname) = Token if Ttype not in (KEYFLAG, TERMFLAG, NONTERMFLAG): raise TokenError, "unknown token type in rule body" if Ttype in (KEYFLAG,TERMFLAG): # keywords etc cancel epsilon tail, otherwise ignore EpsilonTail = 0 continue # if the tail expands to epsilon, map # follow for the goal nonterminal to this token # and also follow for the tail nonterms if EpsilonTail: # add follow for goal for FToken in kjSet.Neighbors(Follow,R.Nonterm): if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) # follow changed, loop again done = 0 # add follow for tail members #for Index2 in range(BodyIndex+1, Bodylength): # TailToken = R.Body[Index2] # for FToken in kjSet.Neighbors(Follow,TailToken): # if not kjSet.HasArc(Follow,Token,FToken): # kjSet.AddArc(Follow,Token,FToken) # done = 0 #endif EpsilonTail # if we are not at the end use First set for next token if BodyIndex != Bodylength-1: NextToken = R.Body[BodyIndex+1] (NTtype, NTname) = NextToken if NTtype in (KEYFLAG,TERMFLAG): if not kjSet.HasArc(Follow, Token, NextToken): kjSet.AddArc(Follow, Token, NextToken) done = 0 elif NTtype == NONTERMFLAG: for FToken in kjSet.Neighbors(self.First, NextToken): if FToken != NULLTOKEN: if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) done = 0 continue # next token expands to epsilon: # add its follow, unless already done above for FToken in kjSet.Neighbors(Follow, NextToken): if not kjSet.HasArc(Follow, Token, FToken): kjSet.AddArc(Follow, Token, FToken) done = 0 else: raise TokenError, "unknown token type in rule body" # finally, check whether next iteration has epsilon tail if not kjSet.HasArc(self.First, Token, NULLTOKEN): EpsilonTail = 0 return done def DumpFirstFollow(self): First = self.First Follow = self.Follow print "First:" for key in First.keys(): name = key[1] print name," :: ", for (flag2,name2) in First[key].keys(): print name2,", ", print print "Follow:" for key in Follow.keys(): name = key[1] print name," :: ", for (flag2,name2) in Follow[key].keys(): print name2,", ", print def FirstOfTail(self, Rule, TailIndex, Token=None): ''' computing the "first" of the tail of a rule followed by an optional terminal. doesn't include NULLTOKEN requires self.First to be computed ''' Result = kjSet.NewSet( [] ) # go through all tokens in rule tail so long as there is a # null derivation for the remainder Nullprefix = 1 BodyLength = len(Rule.Body) ThisIndex = TailIndex while Nullprefix and ThisIndex < BodyLength: RToken = Rule.Body[ThisIndex] (RTtype, RTname) = RToken if RTtype == NONTERMFLAG: for FToken in kjSet.Neighbors(self.First, RToken): if FToken != NULLTOKEN: kjSet.addMember(FToken, Result) #endfor # check whether this symbol might have a null production if not kjSet.HasArc(self.First, RToken, NULLTOKEN): Nullprefix = 0 elif RTtype in [KEYFLAG, TERMFLAG]: kjSet.addMember(RToken, Result) Nullprefix = 0 else: raise TokenError, "unknown token type in rule body" ThisIndex = ThisIndex + 1 #endwhile # add the optional token if given and Nullprefix still set if Nullprefix and Token != None: kjSet.addMember(Token, Result) return Result def compSLRNFA(self): '''compute an SLR NFA for the ruleset with states for each SLR "item" and transitions, eg: X > .AB on A maps to X > A.B on epsilon maps to A > .ZC and A > .WK an item is a pair (rulenumber, bodyposition) where body position 0 is interpreted to point before the beginning of the body. SLR = "simple LR" in Aho+Ullman terminology ''' NFA = CFSMachine(self.StartNonterm) Nrules = len(self.Rules) itemStateMap = {} for Ruleindex in range(0,Nrules): Rule = self.Rules[Ruleindex] # make an item for each "dot" position in the body for DotPos in range(0, len(Rule.Body) + 1): item = (Ruleindex, DotPos) itemState = NFA.NewState(TRANSFLAG, [item]) itemStateMap[item] = itemState #endfor DotPos #endfor Ruleindex # now that the states are initialized # compute transitions except for the last item of a rule # (which has none) for Ruleindex in range(0,Nrules): Rule = self.Rules[Ruleindex] for DotPos in range(0, len(Rule.Body)): item = (Ruleindex, DotPos) CurrentToken = Rule.Body[DotPos] ThisState = itemStateMap[item] NextState = itemStateMap[ (Ruleindex, DotPos + 1) ] NFA.SetMap( ThisState, CurrentToken, NextState ) # if the current token is a nonterminal # ad epsilon transitions to first item for any # rule that derives this nonterminal (CTtype, CTname) = CurrentToken if CTtype == NONTERMFLAG: for Rule2index in range(0,Nrules): Rule2 = self.Rules[Rule2index] Head = Rule2.Nonterm if Head == CurrentToken: NextState = itemStateMap[( Rule2index, 0 )] NFA.SetMap( ThisState, NULLTOKEN, NextState ) #endfor Rule2index #endif CTtype == NONTERMFLAG #endfor DotPos #endfor Ruleindex # must handle the initial state properly here! # Make a dummy state with e-transitions to all first items # for rules that derive the initial nonterminal ThisState = NFA.initial_state GoodStartingPlace = None for Ruleindex in range(0,Nrules): Rule = self.Rules[Ruleindex] Head = Rule.Nonterm if Head == self.StartNonterm: GoodStartingPlace= (Ruleindex, 0) NextState = itemStateMap[ GoodStartingPlace ] NFA.SetMap( ThisState, NULLTOKEN, NextState ) # fix the NFA.States entry if GoodStartingPlace == None: raise NotSLRError, "No derivation for root nonterminal." NFA.States[ NFA.initial_state ] = \ [ 'transient', GoodStartingPlace ] self.SLRNFA = NFA #enddef compSLRNFA def ItemDump(self, item): ''' dump an item ''' (ruleindex, position) = item Rule = self.Rules[ruleindex] print Rule.Nonterm[1],' >> ', for bindex in range(0, len(Rule.Body)): if position == bindex: print " (*) ", print Rule.Body[bindex][1], if position == len(Rule.Body): print " (*) " else: print def SLRItemIsFinal(self, item): ''' utility function -- returns true if an item is a final item ''' (ruleindex, position) = item Rule = self.Rules[ruleindex] if position == len(Rule.Body): return 1 else: return 0 def DumpSLRNFA(self): ''' dump the NFA ''' NFA = self.SLRNFA print "root: ", NFA.root_nonTerminal for key in NFA.StateTokenMap.keys(): map = NFA.StateTokenMap[key] (fromstate, token) = key fromitem = NFA.States[ fromstate ][1] self.ItemDump(fromitem) print " on ", token[1], " maps " for Tostate in map: Toitem = NFA.States[Tostate][1] print " ", self.ItemDump(Toitem) def compDFA(self): ''' compute DFA for ruleset by computing the E-closure of the NFA ''' self.DFA = self.SLRNFA.Eclosure(NULLTOKEN) def DumpDFAsets(self): DFA = self.DFA print "root: ", DFA.root_nonTerminal for State in range(1, len(DFA.States) ): self.DumpItemSet(State) def DumpItemSet(self,State): DFA = self.DFA NFA = self.SLRNFA print print "STATE ", State, " *******" fromNFAindices = kjSet.get_elts(DFA.States[State][1]) for NFAindex in fromNFAindices: item = NFA.States[NFAindex][1] print " ", NFAindex, ": ", self.ItemDump(item) def SLRFixDFA(self): '''this function completes the computation of an SLR DFA by adding reduction states for each DFA state S containing item H > B. which reduces rule H > B for each token T in Follow of H. if S already has a transition for T then there is a conflict! assumes DFA and SLRNFA and Follow have been computed. ''' DFA = self.DFA NFA = self.SLRNFA # look through the states (except 0=success) of the DFA # initially don't add any new states, just record # actions to be done # uses convention that 0 is successful final state # ToDo is a dictionary which maps # (State, Token) to a item to reduce ToDo = {} Error = None for State in range(1, len(DFA.States) ): # look for a final item for a rule in this state fromNFAindices = kjSet.get_elts(DFA.States[State][1]) for NFAindex in fromNFAindices: item = NFA.States[NFAindex][1] # if the item is final remember to do the reductions... if self.SLRItemIsFinal(item): (ruleindex, position) = item Rule = self.Rules[ruleindex] Head = Rule.Nonterm Following = kjSet.Neighbors( self.Follow, Head ) for Token in Following: key = (State, Token) if not ToDo.has_key(key): ToDo[ key ] = item else: # it might be okay if the items are identical? item2 = ToDo[key] if item != item2: print "reduce/reduce conflict on ",key self.ItemDump(item) self.ItemDump(item2) Error = " apparent reduce/reduce conflict" #endif #endfor #endif #endfor NFAindex #endfor State # for each (State,Token) pair which indicates a reduction # record the reduction UNLESS the map is already set for the pair for key in ToDo.keys(): (State,Token) = key item = ToDo[key] (rulenum, dotpos) = item ExistingMap = DFA.map( State, Token ) if ExistingMap[0] == NOMATCHFLAG: DFA.SetReduction( State, Token, rulenum ) else: print "apparent shift/reduce conflict" print "reduction: ", key, ": " self.ItemDump(item) print "existing map ", ExistingMap Error = " apparent shift/reduce conflict" #endfor if Error and ABORTONERROR: raise NotSLRError, Error #enddef SLRfixDFA() def DoSLRGeneration(self): ''' do complete SLR DFA creation starting after initialization ''' self.compFirst() self.compFollow() self.compSLRNFA() self.compDFA() self.SLRFixDFA() ################ the following are interpretation functions ################ used by RULEGRAM meta grammar # some constants used here COMMENTFORM = "##.*\n" RSKEY = "@R" COLKEY = "::" LTKEY = ">>" IDNAME = "ident" # an identifier in the meta grammar is any nonwhite string # except the keywords @R :: >> or comment flag ## IDFORM = "[^" + string.whitespace + "]+" def IdentFun(string): ''' for identifiers simply return the string ''' return string def RootReduction(list, ObjectGram): ''' RootReduction should receive list of form [ nontermtoken, keyword COLKEY, RuleList ] ''' if len(list) != 3 or list[1] != COLKEY: raise FlowError, "unexpected metagrammar root reduction" return (list[0], list[2]) def NullRuleList(list, ObjectGram): ''' NullRuleList should receive list of form [] ''' if list != []: raise FlowError, "unexpected null RuleList form" return [] def FullRuleList(list, ObjectGram): ''' FullRuleList should receive list of form [ Rule, RuleList ] ''' if type(list) != type([]) or len(list)!=2: raise FlowError, "unexpected full RuleList form" NewRule = list[0] OldRules = list[1] return [NewRule] + OldRules def InterpRule(list, ObjectGram): ''' InterpRule should receive list of form [keyword RSKEY, RuleNameStr, keyword COLKEY, Nontermtoken, keyword LTKEY, Bodylist] ''' # check keywords: if len(list)!=6 or list[0]!=RSKEY or list[2]!=COLKEY or list[4]!=LTKEY: raise FlowError, "unexpected meta rule reduction form" ruleName = list[1] ruleNonterm = list[3] ruleBody = list[5] # upcase the the representation of keywords if needed if not ObjectGram.LexD.isCaseSensitive(): for i in range(0,len(ruleBody)): (flag, name) = ruleBody[i] if flag == KEYFLAG: ruleBody[i] = (KEYFLAG, string.upper(name)) elif not flag in (TERMFLAG, NONTERMFLAG): raise FlowError, "unexpected rule body member" rule = kjParser.ParseRule( ruleNonterm, ruleBody ) rule.Name = ruleName return rule def InterpRuleName(list, ObjectGram): ''' InterpRuleName should receive [ string ] ''' # add error checking? return list[0] def InterpNonTerm(list, ObjectGram): ''' InterpNonTerm should receive [ string ] ''' if type(list)!=type([]) or len(list)!=1: raise FlowError, "unexpected rulename form" Name = list[0] # determine whether this is a valid nonterminal if not ObjectGram.NonTermDict.has_key(Name): raise TokenError, "LHS of Rule must be nonterminal: "+Name return ObjectGram.NonTermDict[Name] def NullBody(list, ObjectGram): ''' NullBody should receive [] ''' if list != []: raise FlowError, "unexpected null Body form" return [] def FullBody(list,ObjectGram): ''' FullBody should receive [ string, Bodylist] must determine whether the string represents a keyword, a nonterminal, or a terminal of the object grammar. returns (KEYFLAG, string) (TERMFLAG, string) or (NONTERMFLAG, string) respectively ''' if type(list)!=type([]) or len(list)!=2: raise FlowError, "unexpected body form" Name = list[0] # Does the Name rep a nonterm, keyword or term # of the object grammar (in that order). if ObjectGram.NonTermDict.has_key(Name): kind = NONTERMFLAG elif ObjectGram.LexD.keywordmap.has_key(Name): kind = KEYFLAG elif ObjectGram.TermDict.has_key(Name): kind = TERMFLAG else: raise TokenError, "Rule body contains unregistered string: "+Name restOfBody = list[1] return [(kind, Name)] + restOfBody def ruleGrammar(): ''' function to generate a grammar for parsing grammar rules ''' LexD = kjParser.LexDictionary() # use SQL/Ansi style comments LexD.comment( COMMENTFORM ) # declare keywords RStart = LexD.keyword( RSKEY ) TwoColons = LexD.keyword( COLKEY ) LeadsTo = LexD.keyword( LTKEY ) # declare terminals ident = LexD.terminal(IDNAME, IDFORM, IdentFun ) # declare nonterminals Root = kjParser.nonterminal("Root") Rulelist = kjParser.nonterminal("RuleList") Rule = kjParser.nonterminal("Rule") RuleName = kjParser.nonterminal("RuleName") NonTerm = kjParser.nonterminal("NonTerm") Body = kjParser.nonterminal("Body") # declare rules # Root >> NonTerm :: Rulelist InitRule = kjParser.ParseRule( Root, \ [NonTerm, TwoColons, Rulelist], RootReduction ) # Rulelist >> RLNull = kjParser.ParseRule( Rulelist, [], NullRuleList) # Rulelist >> Rule Rulelist RLFull = kjParser.ParseRule( Rulelist, [Rule,Rulelist], FullRuleList) # Rule >> "@R :: NonTerm >> Body RuleR = kjParser.ParseRule( Rule, \ [RStart, RuleName, TwoColons, NonTerm, LeadsTo, Body],\ InterpRule) # Rulename >> ident RuleNameR = kjParser.ParseRule( RuleName, [ident], InterpRuleName) # NonTerm >> ident NonTermR = kjParser.ParseRule( NonTerm, [ident], InterpNonTerm) # Body >> BodyNull = kjParser.ParseRule( Body, [], NullBody) # Body >> ident Body BodyFull = kjParser.ParseRule( Body, [ident,Body], FullBody) # declare Rules list and Associated Name dictionary Rules = [RLNull, RLFull, RuleR, RuleNameR, NonTermR,\ BodyNull, BodyFull, InitRule] RuleDict = \ { "RLNull":0, "RLFull":1, "RuleR":2, "RuleNameR":3, \ "NonTermR":4, "BodyNull":5, "BodyFull":6 , "InitRule":7 } # make the RuleSet and compute the associate DFA RuleSet = Ruleset( Root, Rules ) RuleSet.DoSLRGeneration() # construct the Grammar object Result = kjParser.Grammar( LexD, RuleSet.DFA, Rules, RuleDict ) return Result #enddef RuleGrammar() # this is the rule grammar object for parsing RULEGRAM = ruleGrammar() class CGrammar(kjParser.Grammar): ''' a derived grammar class this is a compilable grammar for automatic parser generation. ''' def Keywords(self, Stringofkeys): ''' insert a white separated list of keywords into the LexD TODO: THIS SHOULD CHECK FOR KEYWORD/NONTERMINAL/PUNCT NAME COLLISIONS (BUT DOESN'T YET). ''' keywordlist = string.split(Stringofkeys) for keyword in keywordlist: self.LexD.keyword( keyword ) def punct(self, Stringofpuncts): ''' insert a string of punctuations into the LexD ''' for p in Stringofpuncts: self.LexD.punctuation(p) def comments(self, listOfCommentStrings): ''' register a list of regular expression strings to represent comments in LexD ''' for str in listOfCommentStrings: self.LexD.comment(str) def Nonterms(self, StringofNonterms): ''' register a white separated list of nonterminal strings ''' nonTermlist = string.split(StringofNonterms) for NonTerm in nonTermlist: self.NonTermDict[NonTerm] = kjParser.nonterminal(NonTerm) def Declarerules(self, StringWithRules): ''' initialize or add more rules to the RuleString ''' self.RuleString = self.RuleString + "\n" + StringWithRules def Compile(self, MetaGrammar=RULEGRAM): ''' The compilation function assumes NonTermDict RuleString LexD TermDict have all been set up properly (at least if the default MetaGrammar is used). On successful completion it will set up DFA RuleL RuleNameToIndex the following should return a list of rules with punctuations of self.LexD interpreted as trivial keywords keywords of seld.LexD interpreted as keywords and nonterminals registered in NonTermDict interpreted as nonterms. ParseResult should be of form ( (rootNT, RuleL), self ) ''' ParseResult = MetaGrammar.DoParse1( self.RuleString, self ) (RootNonterm, Rulelist) = ParseResult # make a ruleset and compute its DFA RuleS = Ruleset( RootNonterm, Rulelist ) RuleS.DoSLRGeneration() # make the rulename to index map to allow future bindings for i in range(0,len(Rulelist)): Rule = Rulelist[i] self.RuleNameToIndex[ Rule.Name ] = i # fill in the blanks self.DFA = RuleS.DFA self.RuleL = Rulelist # FOR DEBUG AND TESTING self.Ruleset = RuleS # DON'T clean up the grammar (misc structures are used) # in future bindings #enddef Compile def Reconstruct(self, VarName, Tofile, FName=None, indent=""): ''' Write a reconstructable representation for this grammar to a file EXCEPT: - rule associations to reduction functions will be lost (must be reset elsewhere) - terminals in the lexical dictionary will not be initialized IND is used for indentation, should be whitespace (add check!) FName if given will cause the reconstructed to be placed inside a function `FName`+"()" returning the grammar object NOTE: this function violates information hiding principles; in particular it "knows" the guts of the FSM and LexD classes ''' Reconstruction = codeReconstruct(VarName, Tofile, self, FName, indent) GrammarDumpSequence(Reconstruction) def MarshalDump(self, Tofile): ''' marshalling of a grammar to a file ''' Reconstruction = marshalReconstruct(self, Tofile) GrammarDumpSequence(Reconstruction) #endclass CGrammar def GrammarDumpSequence(ReconstructObj): ''' general procedure for different types of archiving for grammars ''' # assume an initialized Reconstruct Object with appropriate grammar etc. # put the lexical part ReconstructObj.PutLex() # put the rules ReconstructObj.PutRules() # put transitions ReconstructObj.PutTransitions() # finish up ReconstructObj.Cleanup() def NullCGrammar(): ''' function to create a "null CGrammar" ''' return CGrammar(None,None,None,{}) # utility classes class Reconstruct: ''' Grammar reconstruction objects encapsulate the process of grammar archiving. This "virtual class" is only for common behaviors of subclasses. ''' def MakeTokenArchives(self): # make a list of all tokens and # initialize token > int dictionary keys = self.Gram.DFA.StateTokenMap.keys() tokenToInt = {} tokenSet = kjSet.NewSet([]) for k in keys: kjSet.addMember(k[1], tokenSet) tokens = kjSet.get_elts(tokenSet) for i in range(0,len(tokens)): tokenToInt[ tokens[i] ] = i self.keys = keys self.tokens = tokens # global sub self.tokInt = tokenToInt # global sub class codeReconstruct(Reconstruct): ''' grammar reconstruction to a file ''' def __init__(self, VarName, Tofile, Grammar, FName=None, indent =""): # do global subs for each of these self.Var = VarName self.File = Tofile self.FName = FName self.Gram = Grammar # put the reconstruction in a function if FName is given if FName != None: Tofile.write("\n\n") Tofile.write(indent+"def "+FName+"():\n") IND = indent+" " else: IND = indent self.I = IND # global sub! Tofile.write("\n\n") Tofile.write(IND+"# ***************************BEGIN RECONSTRUCTION\n") Tofile.write(IND+"# Python declaration of Grammar variable "+VarName+".\n") Tofile.write(IND+"# automatically generated by module "+PMODULE+".\n") Tofile.write(IND+"# Altering this sequence by hand will probably\n") Tofile.write(IND+"# leave it unusable.\n") Tofile.write(IND+"#\n") Tofile.write(IND+"import "+PMODULE+"\n\n") Tofile.write(IND+"# variable declaration:\n") Tofile.write(IND+VarName+"= "+PMODULE+".NullGrammar()\n\n") # make self.keys list of dfa keys, # self.tokens list of grammar tokens, # self.tokInt inverted dictionary for self.tokens self.MakeTokenArchives() Tofile.write("\n\n"+IND+"# case sensitivity behavior for keywords.\n") if self.Gram.LexD.isCaseSensitive(): Tofile.write(IND+VarName+".SetCaseSensitivity(1)\n") else: Tofile.write(IND+VarName+".SetCaseSensitivity(0)\n") #enddef __init__ def PutLex(self): IND = self.I Tofile = self.File VarName = self.Var LexD = self.Gram.LexD tokens = self.tokens Tofile.write("\n\n"+IND+"# declaration of lexical dictionary.\n") Tofile.write(IND+"# EXCEPT FOR TERMINALS\n") Tofile.write(IND+VarName+".LexD.punctuationlist = ") Tofile.write(`LexD.punctuationlist`+"\n") Tofile.write(IND+"# now comment patterns\n") for comment in LexD.commentstrings: Tofile.write(IND+VarName+".LexD.comment("+`comment`+")\n") Tofile.write(IND+"# now define tokens\n") for i in range(0,len(tokens)): tok = tokens[i] (kind, name) = tok if kind == TERMFLAG: # put warning at end! # nonterminal not installed in lexical dictionary here! Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ") Tofile.write(PMODULE+".termrep("+`name`+")\n") elif kind == KEYFLAG: Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ") Tofile.write(VarName+".LexD.keyword("+`name`+")\n") elif kind == NONTERMFLAG: Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ") Tofile.write(PMODULE+".nonterminal("+`name`+")\n") else: raise FlowError, "unknown token type" #enddef PutLex def PutRules(self): IND = self.I VarName = self.Var Rules = self.Gram.RuleL Tofile = self.File Root = self.Gram.DFA.root_nonTerminal Tofile.write("\n\n"+IND+"# declaration of rule list with names.\n") Tofile.write(IND+"# EXCEPT FOR INTERP FUNCTIONS\n") nrules = len(Rules) Tofile.write(IND+VarName+".RuleL = [None] * "+`nrules`+"\n") for i in range(0,nrules): # put warning at end: # rule reduction function not initialized here! rule = Rules[i] name = rule.Name Tofile.write(IND+"rule = "+`rule`+"\n") Tofile.write(IND+"name = "+`name`+"\n") Tofile.write(IND+"rule.Name = name\n") Tofile.write(IND+VarName+".RuleL["+`i`+"] = rule\n") Tofile.write(IND+VarName+".RuleNameToIndex[name] = "+`i`+"\n") Tofile.write("\n\n"+IND+"# DFA root nonterminal.\n") Tofile.write(IND+VarName+".DFA.root_nonTerminal =") Tofile.write(`Root`+"\n") #enddef PutRules def PutTransitions(self): IND = self.I Tofile = self.File VarName = self.Var maxState = self.Gram.DFA.maxState tokenToInt = self.tokInt StateTokenMap = self.Gram.DFA.StateTokenMap keys = self.keys Tofile.write("\n\n"+IND+"# DFA state declarations.\n") for state in range(1, maxState+1): Tofile.write(IND+VarName+".DFA.States["+`state`+"] = ") Tofile.write('['+`TRANSFLAG`+']\n') Tofile.write(IND+VarName+".DFA.maxState = "+`maxState`+"\n") Tofile.write("\n\n"+IND+"# DFA transition declarations.\n") for key in keys: (fromState, TokenRep) = key TokenIndex = tokenToInt[TokenRep] TokenArg = VarName+".IndexToToken["+`TokenIndex`+"]" TMap = StateTokenMap[key] TMaptype = TMap[0][0] if TMaptype == REDUCEFLAG: # reduction rulenum = TMap[0][1] Args = "("+`fromState`+","+TokenArg+","+`rulenum`+")" Tofile.write(IND+VarName+".DFA.SetReduction"+Args+"\n") elif TMaptype == MOVETOFLAG: # MoveTo Args = "("+`fromState`+","+TokenArg+","+`TMap[0][1]`+")" Tofile.write(IND+VarName+".DFA.SetMap"+Args+"\n") else: raise FlowError, "unexpected else (2)" #enddef def Cleanup(self): Tofile = self.File RuleL = self.Gram.RuleL tokens = self.tokens VarName = self.Var IND = self.I FName = self.FName Tofile.write("\n\n"+IND+"# Clean up the grammar.\n") Tofile.write(IND+VarName+".CleanUp()\n") # if the Fname was given return the grammar as function result if FName != None: Tofile.write("\n\n"+IND+"# return the grammar.\n") Tofile.write(IND+"return "+VarName+"\n") Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n") Tofile.write(IND+"# You must bind the following rule names \n") Tofile.write(IND+"# to reduction interpretation functions \n") for R in RuleL: Tofile.write(IND+"# "+VarName+".Bind("+`R.Name`+", ??function??)\n") Tofile.write(IND+"#(last rule)\n") Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n") Tofile.write(IND+"# You must bind the following terminals \n") Tofile.write(IND+"# to regular expressions and interpretation functions \n") warningPrinted = 0 for tok in tokens: (kind, name) = tok if kind == TERMFLAG and tok != ENDOFFILETOKEN: Tofile.write(IND+"# "+VarName+\ ".Addterm("+`name`+", ??regularExp??, ??function??)\n") warningPrinted = 1 if not warningPrinted: Tofile.write(IND+"# ***NONE** \n") Tofile.write(IND+"#(last terminal)\n") Tofile.write(IND+"# ******************************END RECONSTRUCTION\n") #enddef #endclass class marshalReconstruct(Reconstruct): ''' Reconstruction using marshalling to a file encodes internal structures for grammar using marshal-able objects. Final marshalling to the file is done at CleanUp() storing one big list. ''' def __init__(self, Grammar, Tofile): self.Gram = Grammar self.File = Tofile # should archive self.tokens structure self.MakeTokenArchives() # archive this self.CaseSensitivity = Grammar.LexD.isCaseSensitive() def PutLex(self): LexD = self.Gram.LexD # archive these self.punct = LexD.punctuationlist self.comments = LexD.commentstrings def PutRules(self): # archive this self.Root = self.Gram.DFA.root_nonTerminal # make a list of tuples that can be used with # rule = apply(ParseRule, tuple[1]) # rule.Name = tuple[0] Rules = self.Gram.RuleL nrules = len(Rules) RuleTuples = [None] * nrules for i in range(nrules): rule = Rules[i] RuleTuples[i] = (rule.Name, rule.components()) #archive this self.RuleTups = RuleTuples def PutTransitions(self): keys = self.keys tokenToInt = self.tokInt StateTokenMap = self.Gram.DFA.StateTokenMap # archive this self.MaxStates = self.Gram.DFA.maxState # create two lists, # one for reductions with contents (fromState, tokennumber, rulenum) # one for movetos with contents (fromstate, tokennumber, tostate) # (note: token number not token itself to allow sharing) # to allow arbitrary growing, first use dicts: reductDict = {} nreducts = 0 moveToDict = {} nmoveTos = 0 for key in self.keys: (fromState, TokenRep) = key TokenIndex = tokenToInt[TokenRep] TMap = StateTokenMap[key] TMaptype = TMap[0][0] if TMaptype == REDUCEFLAG: rulenum = TMap[0][1] reductDict[nreducts] = (fromState, TokenIndex, rulenum) nreducts = nreducts + 1 elif TMaptype == MOVETOFLAG: ToState = TMap[0][1] moveToDict[nmoveTos] = (fromState, TokenIndex, ToState) nmoveTos = nmoveTos + 1 else: raise FlowError, "unexpected else" #endfor # translate dicts to lists reducts = [None] * nreducts for i in range(nreducts): reducts[i] = reductDict[i] moveTos = [None] * nmoveTos for i in range(nmoveTos): moveTos[i] = moveToDict[i] # archive these self.reducts = reducts self.moveTos = moveTos # TODO: document this new marshalling method in the docco! def Cleanup(self): ''' this is the function that does the marshalling ''' # dump the info self.File.write('tokens = %s\n'%`self.tokens`) self.File.write('punct = %s\n'%`self.punct`) self.File.write('comments = %s\n'%`self.comments`) self.File.write('RuleTups = %s\n'%`self.RuleTups`) self.File.write('MaxStates = %s\n'%`self.MaxStates`) self.File.write('reducts = %s\n'%`self.reducts`) self.File.write('moveTos = %s\n'%`self.moveTos`) self.File.write('Root = %s\n'%`self.Root`) self.File.write('CaseSensitivity = %s\n'%`self.CaseSensitivity`) # # $Log: kjParseBuild.py,v $ # Revision 1.6 2002/05/11 02:59:04 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.5 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.4 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.3 2002/05/07 04:03:14 richard # . major cleanup of test_gadfly # # Revision 1.2 2002/05/06 23:27:09 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/kjParser.py0100644000157700012320000014104507467104371015405 0ustar rjonestech"""Python for parser interpretation :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: kjParser.py,v 1.5 2002/05/11 02:59:05 richard Exp $: """ # BUGS: # Lexical error handling is not nice # Parse error handling is not nice # # Lex analysis may be slow for big grammars # Setting case sensitivity for keywords MUST happen BEFORE # declaration of keywords. import kjSet import string import re import string # set this flag for regression testing at each load RUNTESTS = 0 # set this flag to enable warning for default reductions WARNONDEFAULTS = 0 # some local constants TERMFLAG = -1 # FLAG FOR TERMINAL NOMATCHFLAG = -2 # FLAG FOR NO MATCH IN FSM MOVETOFLAG = -3 # FLAG FOR "SHIFT" IN SN FSM REDUCEFLAG = -4 # FLAG FOR REDUCTION IN FSM TRANSFLAG = -5 # FLAG FOR TRANSIENT STATE IN FSM KEYFLAG = -6 # FLAG FOR KEYWORD NONTERMFLAG = -7 # FLAG FOR NONTERMINAL TERMFLAG = -8 # FLAG FOR TERMINAL EOFFLAG = "*" # FLAG for End of file # set this string to the Module name (filename) # used for dumping reconstructable objects THISMODULE = "gadfly.kjParser" # regular expression for matching whitespace WHITERE = "["+string.whitespace+"]+" WHITEREGEX = re.compile(WHITERE) # local errors class LexTokenError(Exception): '''may happen on bad string''' class UnkTermError(Exception): ''' ditto ''' class BadPunctError(Exception): ''' if try to make whitespace a punct ''' class ParseInitError(Exception): ''' shouldn't happen? ''' class FlowError(Exception): ''' shouldn't happen!!! (bug) ''' class ReductError(Exception): ''' shouldn't happen? ''' class NondetError(Exception): ''' shouldn't happen? ''' # the end of file is interpreted in the lexical stream as # a terminal... # this should be appended to the lexical stream: ENDOFFILETOKEN = (TERMFLAG, EOFFLAG) # in FSM use the following terminal to indicate eof ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG) # Utility function for match conversion from regex to re def RMATCH(re, key, start=0): #print "RMATCH: %s -> %s <- start=%s" % (re.pattern, key, start) group = re.match(key, start) if group is None: #print "RMATCH: -1" return -1 len = group.end() - group.start() #print "RMATCH: %s (%s)" % (len, group.group()) return len # utility function for error diagnostics def DumpStringWindow(Str, Pos, Offset=15): L = [] L.append("near ::") start = Pos-Offset end = Pos+Offset if start<0: start = 0 if end>len(Str): end = len(Str) L.append(`Str[start:Pos]`+"*"+`Str[Pos:end]`) from string import join return join(L, "\n") class LexDictionary: '''Lexical dictionary class this data structure is used by lexical parser below. basic operations: LD.punctuation(string) registers a string as a punctuation EG: LD.punctuation(":") Punctuations are treated as a special kind of keyword that is recognized even when not surrounded by whitespace. IE, "xend" will not be recognized as "x end", but "x;" will be recognized as "x ;" if "end" is a regular keyword but ";" is a punctuation. Only single character punctuations are supported (now), ie, ":=" must be recognized as ":" "=" above the lexical level. LD.comment(compiled_reg_expression) registers a comment pattern EG LD.comment(regex.compile("--.*\n")) asks to recognize ansi/sql comments like "-- correct?\n" LD[compiled_reg_expression] = (TerminalFlag, Function) # assignment! specifies a regular expression that should be associated with the lexical terminal marker TerminalFlag EG: LD[regex.compile("[0-9]+")] = ("integer",string.atoi) the Function should be a function on one string argument that interprets the matching string as a value. if None is given, just the string itself will be used as the interpretation. (a better choice above would be a function which "tries" atoi first and uses atol on overflow). NOTE: ambiguity among regular expressions will be decided arbitrarily (fix?). LD[string] # retrieval! returns ((KEYFLAG, Keywordstring), Keywordstring) if the (entire) string matches a keyword or a punctuation Keywordstring. otherwise returns ((TERMFLAG, Terminalname), value) if the (entire) string matches the regular expression for a terminal flaged by Terminalname; value is the interpreted value. TerminalFlag better be something other than KEYFLAG! otherwise raises an error! comments not filtered here! the following additional functions are used for autodocumentation in declaring rules, etcetera. begin = LD.keyword("begin") sets variable "begin" to (KEYFLAG, "BEGIN") if "begin" maps to keyword "BEGIN" in LD integer = LD.terminal("integer") sets variable integer to ("integer", Function) if "integer" is a registered terminal Function is its associated interpretation function. ''' def __init__(self): # commentpatterns is simply a list of compiled regular expressions # that represent comments self.commentpatterns = [] # commentstrings is used for debugging/dumping/reconstruction etc. self.commentstrings = [] # punctuationlist is a string of punctuations self.punctuationlist = "" # keywordmap is a dictionary mapping recognized keyword strings # and punctuations to their constant representations. self.keywordmap = KeywordDict() # regexprlist is a list of triples (regex,Flag,function) mapping # regular expressions to their flag and interpreter function. self.regexprlist = [] def Dump(self): print "comments = ", self.commentstrings print "punctuations = ", self.punctuationlist print "keywordmap =" self.keywordmap.Dump() print "regexprlist =", self.regexprlist def __getitem__(self, key): # try to match string to a keyword if self.keywordmap.has_key(key): return self.keywordmap[key] # try to match a regular expression length = len(key) for regexpr, flag, function in self.regexprlist: index = RMATCH(regexpr, key) if index == length: break else: raise LexTokenError, "no match for string: " + `key` # use the function to interpret the string, if given if function != None: value = function(key) else: value = key return (flag, value) def keyword(self,str): ''' LD.keyword("this") will make a new keyword "this" if not found ''' # upcase the string, if needed if self.keywordmap.caseInsensitive: str = string.upper(str) if not self.keywordmap.has_key(str): # redundancy for to avoid excess construction during parsing token = (KEYFLAG,str) self.keywordmap[str] = (token,str) else: (token, str2) = self.keywordmap[str] return token def terminal(self, string, RegExpr=None, Function=None): ''' LD.terminal("this") will just look for "this" LD.terminal("this", RE, F) will register a new terminal RE must be a compiled regular expression or string reg ex F must be an interpretation function ''' if RegExpr != None and Function != None: if type(RegExpr) == type(""): RegExpr = re.compile(RegExpr) self[ RegExpr ] = ( string, Function) for regexpr, token, function in self.regexprlist: if token[1] == string: break else: raise UnkTermError, "no such terminal" return token def __setitem__(self,key,value): if type(key) == type(''): # if it's a string it must be a keyword if self.keywordmap.caseInsensitive: value = string.upper(value) key = string.upper(key) self.keywordmap[key] = ( (KEYFLAG, value), value) else: # otherwise it better be a compiled regular expression (not #verified) (Name, Function) = value Flag = (TERMFLAG, Name) regexpr = key self.regexprlist.append((regexpr, Flag, Function)) def comment(self, string): ''' register a regular expression as a comment ''' # regexpr better be a uncompiled string regular expression! # (not verified) regexpr = re.compile(string) self.commentpatterns = self.commentpatterns + [ regexpr ] self.commentstrings = self.commentstrings + [ string ] def punctuation(self,Instring): ''' register a string as a punctuation ''' if type(Instring) != type("") or len(Instring)!=1: raise BadPunctError, "punctuation must be string of length 1" if Instring in string.whitespace: raise BadPunctError, "punctuation may not be whitespace" self.punctuationlist = self.punctuationlist + Instring return self.keyword(Instring) def isCaseSensitive(self): ''' testing and altering case sensitivity behavior ''' return not self.keywordmap.caseInsensitive def SetCaseSensitivity(self, Boolean): ''' setting case sensitivity MUST happen before keyword declarations! ''' self.keywordmap.caseInsensitive = not Boolean def Token(self, String, StartPosition): ''' function to do same as __getitem__ above but looking _inside_ a string instead of at the whole string returns (token,skip) where token is one of ((KEYFLAG,name),name) or ((TERMFLAG,termname),value) and skip is the length of substring of string that matches thetoken ''' finished = 0 # dummy, exit should be nonlocal totalOffset = 0 while not finished: # flag EOF if past end of string? if len(String) <= StartPosition: return (ENDOFFILETERM, 0) # skip whitespace whitespacefound = 0 skip = RMATCH(WHITEREGEX,String, StartPosition) if skip > 0: StartPosition = StartPosition + skip totalOffset = totalOffset + skip whitespacefound = 1 # try to find comment, keyword, term in that order: # looking for comment commentfound = 0 for commentexpr in self.commentpatterns: offset = RMATCH(commentexpr,String,StartPosition) if offset != -1: if offset<1: info = DumpStringWindow(String,StartPosition) raise LexTokenError, "zero length comment "+info commentfound = 1 StartPosition = StartPosition + offset totalOffset = totalOffset + offset # looking for a keyword keypair = self.keywordmap.hasPrefix(String,StartPosition, self.punctuationlist) if keypair != 0: return ( keypair[0], keypair[1] + totalOffset) # looking for terminal for (regexpr, Flag, Function) in self.regexprlist: offset = RMATCH(regexpr,String,StartPosition) if offset != -1: matchstring = String[StartPosition : offset+StartPosition] if Function != None: value = Function(matchstring) else: value = matchstring return ((Flag, value) , offset + totalOffset) if not (commentfound or whitespacefound): info = DumpStringWindow(String,StartPosition) raise LexTokenError, "Lexical parse failure "+info # alternate, experimental implementation class lexdictionary: def __init__(self): self.skip = "" self.commentstrings = [] self.punctuationlist = "" self.keywordmap = KeywordDict() self.termlist = [] # list of (term, regex, flag, interpret_fn) self.uncompiled = 1 # only compile after full initialization. self.laststring= self.lastindex= self.lastresult = None def Dump(self, *k): raise "sorry", "not implemented" __getitem__ = Dump def keyword(self, str): kwm = self.keywordmap if kwm.caseInsensitive: str = string.upper(str) try: (token, str2) = kwm[str] except: token = (KEYFLAG, str) self.keywordmap[str] = (token,str) return token def terminal(self, str, regexstr=None, Function=None): if regexstr is not None: flag = (TERMFLAG, str) self.termlist.append( (str, regexstr, flag, Function) ) return flag else: for (s,fl,fn) in self.termlist: if fl[1]==str: return fl else: raise UnkTermError, "no such terminal" __setitem__ = Dump def comment(self, str): self.commentstrings.append(str) def punctuation(self, Instring): if type(Instring) != type("") or len(Instring)!=1: raise BadPunctError, "punctuation must be string of length 1" if Instring in string.whitespace: raise BadPunctError, "punctuation may not be whitespace" self.punctuationlist = self.punctuationlist + Instring return self.keyword(Instring) def SetCaseSensitivity(self, Boolean): self.keywordmap.caseInsensitive = not Boolean def Token(self, String, StartPosition): # shortcut for reductions. if self.laststring is String and self.lastindex == StartPosition: #print "lastresult", self.lastresult return self.lastresult self.lastindex = StartPosition self.laststring = String #print `String[StartPosition: StartPosition+60]` if self.uncompiled: self.compile() self.uncompiled = None finished = 0 totalOffset = 0 skipprog = self.skipprog keypairfn = self.keywordmap.hasPrefix punctlist = self.punctuationlist termregex = self.termregex while not finished: if len(String) <= StartPosition: result = self.lastresult = (ENDOFFILETERM, 0) return result # skip ws and comments #skip = skipprog.match(String, StartPosition) skip = RMATCH(skipprog, String, StartPosition) if skip>0: if skip==0: info = DumpStringWindow(String, StartPosition) raise LexTokenError, \ "zero length whitespace or comment "+info StartPosition = StartPosition + skip totalOffset = totalOffset + skip continue # look for keyword keypair = keypairfn(String, StartPosition, punctlist) if keypair!=0: #print "keyword", keypair result = self.lastresult = (keypair[0], keypair[1]+totalOffset) return result # look for terminal #print "Termregex: %s --> %s <-- start=%s" % (termregex.pattern, String, StartPosition) offset = termregex.match(String, StartPosition) if offset is not None: g = offset.group for (term, regex, flag, fn) in self.termlist: test = g(term) if test: #print "terminal", test if fn is not None: value = fn(test) else: value = test result = self.lastresult = ( (flag, value), offset.end() - offset.start() + totalOffset) return result # error if we get here info = DumpStringWindow(String, StartPosition) raise LexTokenError, "Lexical token not found "+info def isCaseSensitive(self): return not self.keywordmap.caseInsensitive def compile(self): from string import joinfields, whitespace import re skipregexen = self.commentstrings + [WHITERE] skipregex = "(" + joinfields(skipregexen, ")|(") + ")" #print skipregex; import sys; sys.exit(1) self.skipprog = re.compile(skipregex) termregexen = [] termnames = [] for (term, rgex, flag, fn) in self.termlist: fragment = "(?P<%s>%s)" % (term, rgex) termregexen.append(fragment) termnames.append(term) termregex = joinfields(termregexen, "|") self.termregex = re.compile(termregex) self.termnames = termnames LexDictionary = lexdictionary ##### test! #XXX # a utility class: dictionary of prefixes # should be generalized to allow upcasing of keyword matches class KeywordDict: def __init__(self, caseInsensitive = 0): self.FirstcharDict = {} self.KeyDict = {} self.caseInsensitive = caseInsensitive def Dump(self): if self.caseInsensitive: print " case insensitive" else: print " case sensitive" keys = self.KeyDict.keys() print " keyDict has ", len(keys), " elts" for key in keys: print " ", key," maps to ",self.KeyDict[key] firstchars = self.FirstcharDict.keys() print " firstcharDict has ", len(firstchars), " elts" for char in firstchars: print " ", char," maps to ",self.FirstcharDict[char] # set item assumes value has correct case already, if case sensitive def __setitem__(self, key, value): if len(key)<1: raise LexTokenError, "Keyword of length 0" if self.caseInsensitive: KEY = string.upper(key) else: KEY = key firstchar = KEY[0:1] if self.FirstcharDict.has_key(firstchar): self.FirstcharDict[firstchar] = \ self.FirstcharDict[firstchar] + [(KEY, value)] else: self.FirstcharDict[firstchar] = [(KEY, value)] self.KeyDict[KEY] = value # if String has a registered keyword at start position # return its canonical representation and offset, else 0 # keywords that are not punctuations should be # recognized only if followed # by a punctuation or whitespace char # def hasPrefix(self,String,StartPosition,punctuationlist): First = String[StartPosition:StartPosition+1] fcd = self.FirstcharDict caseins = self.caseInsensitive if caseins: First = string.upper(First) if fcd.has_key(First): Keylist = fcd[First] else: return 0 for (key,value) in Keylist: offset = len(key) EndPosition = StartPosition+offset match = String[StartPosition : EndPosition] if caseins: match = string.upper(match) if key == match: if len(key)==1 and key in punctuationlist: # punctuations are recognized regardless of nextchar return (value,offset) else: # nonpuncts must have punct or whitespace following #(uses punct as single char convention) if EndPosition == len(String): return (value, offset) else: nextchar = String[EndPosition] if nextchar in string.whitespace\ or nextchar in punctuationlist: return (value, offset) return 0 # if no exit inside for loop, fail def __getitem__(self,key): if self.caseInsensitive: key = string.upper(key) return self.KeyDict[key] def has_key(self,key): if self.caseInsensitive: key = string.upper(key) return self.KeyDict.has_key(key) # LexStringWalker walks through a string looking for # substrings recognized by a lexical dictionary # # ERROR REPORTING NEEDS IMPROVEMENT class LexStringWalker: def __init__(self, String, LexDict): self.Position = 0 self.NextPosition = 0 self.String = String self.LexDict = LexDict self.PastEOF = 0 self.Done = 0 def DUMP(self): return DumpStringWindow(self.String,self.Position) #reset not defined def more(self): return not self.PastEOF def getmember(self): (Token,skip) = self.LexDict.Token(self.String, self.Position) self.NextPosition = self.Position + skip if Token == ENDOFFILETERM: self.PastEOF = 1 return Token def next(self): if self.Done: data = self.DUMP() raise LexTokenError, "no next past end of file "+data elif self.PastEOF: self.Done=1 elif self.NextPosition > self.Position: self.Position = self.NextPosition else: dummy = self.getmember() if self.NextPosition <= self.Position: data = self.DUMP() raise LexTokenError, "Lexical walker not advancing "+data self.Position = self.NextPosition class ParserObj: ''' the parse class: Based loosely on Aho+Ullman, Principles of Compiler Design, Ch.6. except that they don't describe how to handle boundary conditions, I made them up myself. Note: This could be implemented using just functions; it's implemented as a class to facilitate diagnostics and debugging in case of failures of various sorts. a parse accepts a rule list a lexically analysed stream with methods stream.getmember() returns the current token on the stream stream.next() moves on to next token stream.more() returns false if current token is the last token and a FSM (finite state machine) with methods FSM.root_nonTerminal the nonterminal at which to start parsing FSM.initial_state the initial state to start at FSM.successful_final_state the final state to go to upon successful parse FSM.map(Current_State,Current_Token) returns either (TERMFLAG, 0) if Current_State is terminal (final or reduction). (NOMATCHFLAG, 0) if Current_State is nonterminal, but the Current_Token and Next_Token do not lead to a valid state in the FSM (MOVETOFLAG, Next_State) if Current_State is nonterminal and Current_Token, Next_token map to Next_State from Current_State. (REDUCEFLAG, Rulenum) if Current_State indicates a reduction at Current_Token for rule Rule number Rule and a Stack with methods (replaced with dictionary) (init: {-1:0} ) Stack.Top() returns top of stack (no pop) ( Stack[Stack[-1]] ) Stack.Push(Object) ( Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=Object ) Stack.MakeEmpty() ( Stack[-1]=0 ) Stack.IsEmpty() ( Stack[-1] == 0 ) Stack.Pop() ( Stack[-1] = Stack[-1]-1 ) stack contents created by Parser will be of form (State,Value) where Value was inserted at FSM state State. Value of form either (KEYFLAG, Name) (NontermName, reductionvalue) or (TerminalName, value) and an optional parameter Evaluate which if 0 indicates that rules should be evaluated, otherwise indicates that rules should just be reduced and the reduction structure should be used as the result of the rule rule objects must support methods Rule.reduce(Stack) pops off the elements corresponding to the body of the Rule from the stack and returns (NewStack,Red) where NewStack is the stack minus the body and Red is the result of evaluating the reduction function on this instance of the rule. Rule.Nonterm the nonterminal at the head of the rule ''' # Evaluate determines whether rules should be evaluated # after reductions. Context is an argument passed to the # list reduction function # def __init__(self, Rulelist, Stream, FSM, Stack, Evaluate=1, Context=None): self.Rules = Rulelist self.LexStream = Stream self.FSM = FSM self.Stack = Stack self.Context = Context # start with empty stack, initial_state, no nonterminal #self.Stack[-1] = 0# self.Stack.MakeEmpty() self.Stack[:] = [] self.State = FSM.initial_state self.currentNonterm = None self.Evaluate = Evaluate def DoOneReduction(self): ''' DoOneReduction accepts tokens from the stream and pushes them onto the stack until a reduction state is reached. Resolve the reduction ''' current=self.State FSM=self.FSM Stack = self.Stack Context = self.Context Stream = self.LexStream # the internal FSM.StateTokenMap dictionary is used directly here. STMap = FSM.StateTokenMap #if FSM.final_state(current): # raise ParseInitError, 'trying to reduce starting at final state' tokenVal = Stream.getmember() #print "tokenVal", tokenVal token = tokenVal[0] # push the token and traverse FSM until terminal state is reached #(flag, nextThing) = FSM.map(current, token) key = (current, token) try: (flag, nextThing) = STMap[key][0] except KeyError: flag = NOMATCHFLAG while flag == MOVETOFLAG: nextState = nextThing #print current, " shift ", token, # no sanity check, possible infinite loop # push current token and next state ThingToPush = (nextState, tokenVal) #print "pushing ", ThingToPush #Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=ThingToPush Stack.append(ThingToPush) #Stack.Push( ThingToPush ) # move to next token, next state Stream.next() # error if end of stream if not Stream.more(): # optimized Stream.PastEOF (?) data = Stream.DUMP() raise EOFError, 'end of stream during parse '+data current = nextState tokenVal = Stream.getmember() token = tokenVal[0] #MAP = FSM.map(current,token) key = (current, token) try: (flag, nextThing) = STMap[key][0] except KeyError: flag = NOMATCHFLAG # at end of while loop we should be at a reduction state if flag == REDUCEFLAG: rulenum = nextThing #print current, " reduce ", token, self.Rules[rulenum] # normal case # perform reduction rule = self.Rules[rulenum] Nonterm = rule.Nonterm self.currentNonterm = Nonterm (Stack, reduct) = rule.reduce( Stack , Context ) GotoState = self.GotoState(rule) # push the Gotostate and result of rule reduction on stack ThingToPush = (GotoState, (Nonterm, reduct) ) # push the result of the reduction and exit normally #print "pushing ", ThingToPush #Stack[-1]=Stack[-1]+1; Stack[Stack[-1]]=ThingToPush Stack.append(ThingToPush) #Stack.Push(ThingToPush) self.State=GotoState return 1 # normal successful completion # some error cases elif flag == NOMATCHFLAG: self.ParseError(current,tokenVal, "nomatch1") else: data = Stream.DUMP() s = """ flag = %s map = %s """ % (flag, FSM.map(current,token)) data = data + s raise FlowError, 'unexpected else '+data def GotoState(self, rule): ''' compute the state to goto after a reduction is performed on a rule. Algorithm: determine the state at beginning of reduction and the next state indicated by the head nonterminal of the rule. special case: empty stack and root nonterminal > success. ''' FSM = self.FSM Stack = self.Stack Head = rule.Nonterm if len(Stack)==0: #Stack[-1]==0: #Stack.IsEmpty(): BeforeState = FSM.initial_state else: BeforeState = Stack[-1][0] #Stack[Stack[-1]][0] #Stack.Top()[0] # is this right? if the stack is empty and the Head # is the root nonterm, then goto is final state if len(Stack)==0 and Head == FSM.root_nonTerminal:#Stack.isEmpty() Result = FSM.successful_final_state else: # consider eliminating the call to .map here? (efficiency) (flag, Result) = FSM.map(BeforeState, Head) if flag != MOVETOFLAG: #FSM.DUMP() self.ParseError(BeforeState, Head, "notmoveto") return Result def ParseError( self, State, Token, *rest): # make this parse error nicer (add diagnostic methods?) L = [""] L.append("*******************************") L.append("current state = "+`State`) L.append("expects: ") expects = "" for (flag,name) in self.FSM.Expects(State): if flag in (TERMFLAG, KEYFLAG): expects = expects + `name`+ ", " L.append(expects) L.append(`rest`) L.append("current token = " + `Token`) #print "Stack =", #self.StackDump(5) #print from string import join data = self.LexStream.DUMP() + join(L, "\n") raise SyntaxError, 'unexpected token sequence.' + data def StackDump(self, N): Stack = self.Stack Topkey = len(Stack) if Topkey>N: Start = Topkey - N else: Start = 1 for i in range(Start,Topkey+1): print " :: ", Stack[i], def GO(self): '''execute parsing until done ''' while self.State != self.FSM.successful_final_state: self.DoOneReduction() # should I check that stack has only one elt here? # return result of last reduction return self.Stack[-1][1] #self.Stack.Top()[1] def nonterminal(string): ''' function for declaring a variable to represent a nonterminal: eg Program = nonterminal("program") included for convenient autodocumentation ''' return (NONTERMFLAG, string) def termrep(string): ''' declaring a terminal WITHOUT INSTALLING IT IN A LexDict ''' return (TERMFLAG, string) def DefaultReductFun( RuleResultsList, Context ): ''' used as a default reduction function for rules ''' if WARNONDEFAULTS: print "warn: default reduction." print " ", RuleResultsList return RuleResultsList class ParseRule: ''' the rule class a rule is defined by a goal nonterminal marker of form (NONTERMFLAG, Name) and a list defining the body which must contain elts of form (KEYFLAG, Name) or (NONTERMFLAG, Name) of (TERMFLAG, Name) and a reduction function which takes a list of the same size as the BodyList (consisting of the results of the evaluations of the previous reductions) and returns an interpretation for the body ''' def __init__(self, goalNonTerm, BodyList, \ ReductFunction = DefaultReductFun): #print BodyList # check some of the arguments (very limited!) if len(goalNonTerm) != 2 or goalNonTerm[0] != NONTERMFLAG: raise TypeError, "goal of rule must be nonterminal" for m in BodyList: #print m if len(m) != 2: raise TypeError, "invalid body form for rule" self.Nonterm = goalNonTerm self.Body = BodyList self.ReductFun = ReductFunction def __repr__(self): return THISMODULE + ".ParseRule" + `self.components()` def components(self): ''' marshal-able components of a rule ''' return (self.Nonterm, self.Body) def reduce(self, Stack, Context=None): ''' rule.reduce(Stack) pops of the stack elements corresponding to the body of the rule and prepares the appropriate reduction object for evaluation (or not) at higher levels ''' #print "reducing", Stack Blength = len(self.Body) #print Blength, len(self.Body) # pop off previous results from stack corresponding to body BodyResults = [None] * Blength #BodyNames = [None] * Blength # for debug #print "popping: " for i in range(1,Blength+1): Bindex = Blength - i # stack contents pop off in reverse order # get and destructure the rule body entry RuleEntry = self.Body[Bindex] ( REkind , REname ) = RuleEntry # get and destructure the stack entry PoppedValue = Stack[-i] #Stack.Top() #print PoppedValue, #del Stack[-1]# = Stack[-1]-1 #Stack.Pop() SETokVal = PoppedValue[1] SEvalue = SETokVal[1] SEname = SETokVal[0][1] # the names from rule and stack must match (?) if SEname != REname: print SEname, REname print self raise ReductError, " token names don't match" # store the values for the reduction BodyResults[Bindex] = SEvalue #BodyNames[Bindex] = SEname # debug del Stack[len(Stack)-Blength:] #print "reduced", Stack #print # evaluate the reduction, in context reduct = self.ReductFun(BodyResults, Context) if WARNONDEFAULTS and self.ReductFun is DefaultReductFun: # should check whether name is defined before this... print " default used on ", self.Name #Reduction( self.ReductFun, BodyResults, BodyNames ) return (Stack, reduct) def PrintDefaultBindings(rulelist): ''' for debugging: look through a rule list and print names of rules that have default binding ''' for r in rulelist: if r.ReductFun is DefaultReductFun: print r.Name class FSMachine: def __init__(self, rootNonTerm): # start and success state conventions startState=1 successState=0 self.root_nonTerminal = rootNonTerm self.initial_state = startState self.successful_final_state = successState # the list of states of the FSM, implemented as a dictionary # entries are identified by their index # content is # a list whose first elt is either TRANSFLAG, or TERMFLAG # other list elts may be added by other layers (parse generator) # indicating the kind of the state. self.States = {} # allocate start and success states self.States[startState]=[TRANSFLAG] self.States[successState]=[TERMFLAG] # the most recently allocated state self.maxState= startState # the map of current token+state number to next state #with entries of form (tokenname,state):nextstate_sequence # self.StateTokenMap = {} def DUMP(self, DumpMapData=1, DumpStateData=1, ForbiddenMark={}): ''' ForbiddenMark is for filtering out maps to an error state ''' print "root nonterminal is ", self.root_nonTerminal print "start at ", self.initial_state print "end at ", self.successful_final_state print "number of states: ", self.maxState if DumpStateData: print for State in range(0,self.maxState+1): Data = self.States[State] print State, ": ", Data if DumpMapData: print for key in self.StateTokenMap.keys(): map = self.StateTokenMap[key] if map[0][0] == MOVETOFLAG: ToStateData = self.States[map[0][1]] if len(ToStateData) < 2: Mark = None else: Mark = ToStateData[1] if Mark != ForbiddenMark: print key, " > ", map, " = ", ToStateData else: print key, " > reduction to rule number ", map[0][1] def Expects(self, State): ''' what tokens does a state expect? ''' keys = self.StateTokenMap.keys() Tokens = kjSet.NewSet( [] ) for (state1,token) in keys: if State == state1: kjSet.addMember(token,Tokens) return kjSet.get_elts(Tokens) def NewState(self, kind, AdditionalInfo = []): ''' "allocate" a new state of specified kind kind must either be TRANSFLAG, TERMFLAG or a rule object returns the number of the new state ''' if not kind in (TRANSFLAG,TERMFLAG,REDUCEFLAG): raise TypeError, "unknown state kind" available = self.maxState+1 self.States[available] = [kind] + AdditionalInfo self.maxState = available return available def SetReduction(self, fromState, TokenRep, Rulenum): ''' Install a reduction transition in the FSM: a reduction is represented by mapping to a rule index no nondeterminism is allowed. ''' key = (fromState, TokenRep) if not self.StateTokenMap.has_key(key): self.StateTokenMap[ key ] = ((REDUCEFLAG, Rulenum),) else: raise ReductError, "attempt to set ambiguous reduction" def SetMap(self, fromState, TokenRep, toState): ''' Install a "shift" or "goto transition in the FSM: supports nondeterminism by storing a sequence of possible transitions ''' key = (fromState, TokenRep) if self.StateTokenMap.has_key(key): Old = self.StateTokenMap[key] if Old[0][0] != MOVETOFLAG: # if the old value was not an integer, not a "normal state": # complain: raise NondetError, \ "attempt to make inappropriate transition ambiguous" self.StateTokenMap[key] = Old + ((MOVETOFLAG,toState),) else: self.StateTokenMap[key] = ((MOVETOFLAG,toState),) def map(self, current_state, current_token): ''' Find the action indicated by fsm on (current_state, current_token) input. note: in the event of nondeterministic choice this chooses the first possibility listed. ParseObj.DoOneReduction() currently uses the internal structure of StateTokenMap directly, rather than using this function. ''' StateEntry = self.States[current_state][0] if StateEntry == TERMFLAG: return (TERMFLAG, 0) elif StateEntry == TRANSFLAG: # try to find a transition for this token and state key = (current_state, current_token) try: TMap = self.StateTokenMap[key] return TMap[0] except KeyError: return (NOMATCHFLAG, 0) else: raise FlowError, "unexpected else (2)" class Grammar: ''' the grammar class: a grammar consists of - a LexDict lexical dictionary; - a deterministic FSMachine; - a Rulelist and optionally a dictionary that maps Rulenames to Rulelist indices (used for dumping and externally) ''' def __init__(self, LexD, DFA, RuleL, RuleNameDict = None): # for auto initialization set LexD,DFA,RuleL to None if LexD == None and DFA == None and RuleL == None: self.LexD = LexDictionary() # use a dummy root nonterminal -- must fix elsewhere! self.DFA = FSMachine("ERROR") self.RuleL = [] else: self.LexD = LexD self.DFA = DFA self.RuleL = RuleL if RuleNameDict != None: self.AddNameDict(RuleNameDict) self.CleanUp() def PrintDefaults(self): ''' look for default bindings ''' print "Default bindings on:" PrintDefaultBindings(self.RuleL) def SetCaseSensitivity( self, Boolean ): ''' setting case sensitivity: must happen before keyword installation in LexD. ''' self.LexD.SetCaseSensitivity( Boolean ) def CleanUp(self): ''' this may be silly, but to save some space in construction a token dictionary may be used that facilitates sharing of token representations. This method either initializes the dictionary or disposes of it if it exists ''' self.IndexToToken = {} # this dictionary is used by automatically # generated grammars to determine whether # a string represents a nonterminal self.NonTermDict = {} # similarly for terminals self.TermDict = {} # this string may be used to keep a printable # representation of the rules of the grammar # (usually in automatic grammar generation self.RuleString = "" # to associate a token to an integer use # self.IndexToToken[int] = tokenrep def AddNameDict(self, RuleNameDict): ''' this method associates rules to names using a RuleNameDict dictionary which maps names to rule indices. after invocation self.RuleNameToIndex[ name ] gives the index in self.RuleL for the rule associated with name, and self.RuleL[index].Name gives the name associated with the rule self.RuleL[index] ''' self.RuleNameToIndex = RuleNameDict # add a Name attribute to the rules of the rule list for ruleName in RuleNameDict.keys(): index = RuleNameDict[ ruleName ] self.RuleL[ index ].Name = ruleName def DoParse( self, String, Context = None, DoReductions = 1 ): ''' parse a string using the grammar, return result and context ''' # construct the ParserObj Stream = LexStringWalker( String, self.LexD ) Stack = [] # {-1:0} #Walkers.SimpleStack() ParseOb = ParserObj( self.RuleL, Stream, self.DFA, Stack, \ DoReductions, Context ) # do the parse ParseResult = ParseOb.GO() # return final result of reduction and the context return (ParseResult[1], Context) def DoParse1( self, String, Context=None, DoReductions=1 ): ''' parse a string using the grammar, but only return the result of the last reduction, without the context ''' return self.DoParse(String, Context, DoReductions)[0] def Bind( self, Rulename, NewFunction ): ''' if the Name dictionary has been initialized this method will (re)bind a reduction function to a rule associated with Rulename ''' ruleindex = self.RuleNameToIndex[ Rulename ] rule = self.RuleL[ ruleindex ] rule.ReductFun = NewFunction def Addterm( self, termname, regexpstr, funct ): ''' bind a terminal to a regular expression and interp function in the lexical dictionary (convenience) ''' self.TermDict[termname] =self.LexD.terminal(termname, regexpstr, funct) def NullGrammar(): ''' function to create a "null grammar" ''' return Grammar(None,None,None,{}) def UnMarshalGram(file): ''' unmarshalling a marshalled grammar created by buildmodule.CGrammar.MarshalDump(Tofile) tightly coupled with buildmodule code... file should be open and "pointing to" the marshalled rep. warning: doesn't bind semantics! ''' Grammar = NullGrammar() UnMarshal = UnMarshaller(file, Grammar) UnMarshal.MakeLex() UnMarshal.MakeRules() UnMarshal.MakeTransitions() UnMarshal.Cleanup() return UnMarshal.Gram class UnMarshaller: ''' unmarshalling object for unmarshalling grammar from a python module ''' def __init__(self, modulename, Grammar): import marshal self.Gram = Grammar marfile = __import__(modulename) for entry in modulename.split('.')[1:]: marfile = getattr(marfile, entry) self.tokens = marfile.tokens self.punct = marfile.punct self.comments = marfile.comments self.RuleTups = marfile.RuleTups self.MaxStates = marfile.MaxStates self.reducts = marfile.reducts self.moveTos = marfile.moveTos self.Root = marfile.Root self.CaseSensitivity = marfile.CaseSensitivity Grammar.SetCaseSensitivity(self.CaseSensitivity) def MakeLex(self): Grammar=self.Gram LexD = Grammar.LexD # punctuations LexD.punctuationlist = self.punct # comments for commentregex in self.comments: LexD.comment(commentregex) #LexD.commentstring = self.comments # keywords, terminals, nonterms # rewrite the tokens list for sharing and extra safety LexTokens = {} tokens = self.tokens for tokenindex in range(len(tokens)): (kind,name) = tokens[tokenindex] if kind == KEYFLAG: tokens[tokenindex] = LexD.keyword(name) elif not kind in [TERMFLAG, NONTERMFLAG]: raise FlowError, "unknown token type" # not needed self.tokens = tokens def MakeRules(self): Grammar = self.Gram Grammar.DFA.root_nonTerminal = self.Root NameIndex = Grammar.RuleNameToIndex RuleTuples = self.RuleTups nRules = len(RuleTuples) RuleList = [None] * nRules for index in range(nRules): (Name, Components) = RuleTuples[index] rule = apply(ParseRule, Components) rule.Name = Name RuleList[index] = rule NameIndex[Name] = index Grammar.RuleL = RuleList def MakeTransitions(self): Grammar = self.Gram DFA = Grammar.DFA StateTokenMap = DFA.StateTokenMap tokens = self.tokens # record the state number DFA.maxState = self.MaxStates # this is historical, unfortunately... CLEAN IT UP SOMEDAY! # THE DFA.States DICT IS NOT NEEDED (?) (here) for state in range(1, self.MaxStates+1): DFA.States[state] = [TRANSFLAG] # record the reductions for (fromState, TokenIndex, rulenum) in self.reducts: DFA.SetReduction(fromState, tokens[TokenIndex], rulenum) # record the transitions for (fromState, TokenIndex, ToState) in self.moveTos: DFA.SetMap(fromState, tokens[TokenIndex], ToState) def Cleanup(self): Grammar = self.Gram Grammar.CleanUp() # # $Log: kjParser.py,v $ # Revision 1.5 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.4 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.3 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/kjSet.py0100644000157700012320000001565107467104371014707 0ustar rjonestech""" Sets implemented using mappings. These only work for "immutable" elements. probably not terribly efficient, but easy to implement and not as slow as concievably possible. :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: kjSet.py,v 1.3 2002/05/11 02:59:05 richard Exp $: """ def NewSet(Sequence): Result = {} for Elt in Sequence: Result[Elt] = 1 return Result def Empty(Set): if Set == {}: return 1 else: return 0 def get_elts(Set): return Set.keys() def member(Elt,Set): return Set.has_key(Elt) # in place mutators: # returns if no change otherwise 1 def addMember(Elt,Set): change = 0 if not Set.has_key(Elt): Set[Elt] = 1 change = 1 return change def Augment(Set, OtherSet): change = 0 for Elt in OtherSet.keys(): if not Set.has_key(Elt): Set[Elt] = 1 change = 1 return change def Mask(Set, OtherSet): change = 0 for Elt in OtherSet.keys(): if Set.has_key(Elt): del Set[Elt] change = 1 return change # side effect free functions def Intersection(Set1, Set2): Result = {} for Elt in Set1.keys(): if Set2.has_key(Elt): Result[Elt] = 1 return Result def Difference(Set1, Set2): Result = {} for Elt in Set1.keys(): if not Set2.has_key(Elt): Result[Elt] = 1 return Result def Union(Set1,Set2): Result = {} Augment(Result,Set1) Augment(Result,Set2) return Result def Subset(Set1,Set2): Result = 1 for Elt in Set1.keys(): if not Set2.has_key(Elt): Result = 0 return Result # nonlocal return Result def Same(Set1,Set2): if Subset(Set1,Set2) and Subset(Set2,Set1): return 1 else: return 0 # directed graphs as Dictionaries of Sets # also only works for immutable nodes def NewDG(pairlist): Result = {} for (source,dest) in pairlist: AddArc(Result, source, dest) return Result def GetPairs(Graph): result = [] Sources = Graph.keys() for S in Sources: Dests = get_elts( Graph[S] ) ThesePairs = [None] * len(Dests) for i in range(0,len(Dests)): D = Dests[i] ThesePairs[i] = (S, D) result = result + ThesePairs return result def AddArc(Graph, Source, Dest): change = 0 if Graph.has_key(Source): Adjacent = Graph[Source] if not member(Dest,Adjacent): addMember(Dest,Adjacent) change = 1 else: Graph[Source] = NewSet( [ Dest ] ) change = 1 return change def Neighbors(Graph,Source): if Graph.has_key(Source): return get_elts(Graph[Source]) else: return [] def HasArc(Graph, Source, Dest): result = 0 if Graph.has_key(Source) and member(Dest, Graph[Source]): result = 1 return result def Sources(Graph): return Graph.keys() # when G1, G2 and G3 are different graphs this results in # G1 = G1 U ( G2 o G3 ) # If G1 is identical to one of G2,G3 the result is somewhat # nondeterministic (depends on dictionary implementation). # However, guaranteed that AddComposition(G,G,G) returns # G1 U (G1 o G1) <= G <= TC(G1) # where G1 is G's original value and TC(G1) is its transitive closure # hence this function can be used for brute force transitive closure # def AddComposition(G1, G2, G3): change = 0 for G2Source in Sources(G2): for Middle in Neighbors(G2,G2Source): for G3Dest in Neighbors(G3, Middle): if not HasArc(G1, G2Source, G3Dest): change = 1 AddArc(G1, G2Source, G3Dest) return change # in place transitive closure of a graph def TransClose(Graph): change = AddComposition(Graph, Graph, Graph) somechange = change while change: change = AddComposition(Graph, Graph, Graph) if not somechange: somechange = change return somechange ########### SQueue stuff # # A GrabBag should be used to hold objects temporarily for future # use. You can put things in and take them out, with autodelete # that's all! # make a new baggy with nothing in it # BG[0] is insert cursor BG[1] is delete cursor, others are elts # OLD = 1 NEW = 0 START = 2 def NewBG(): B = [None]*8 #default size B[OLD] = START B[NEW] = START return B def BGempty(B): # other ops must maintain this: old == new iff empty return B[OLD] == B[NEW] # may return new, larger structure # must be used with assignment... B = BGadd(e,B) def BGadd(elt, B): cursor = B[NEW] oldlen = len(B) # look for an available position while B[cursor] != None: cursor = cursor+1 if cursor >= oldlen: cursor = START if cursor == B[NEW]: #back to beginning break # resize if wrapped if B[cursor] != None: B = B + [None] * oldlen cursor = oldlen B[OLD] = START if B[cursor] != None: raise IndexError, "can't insert?" # add the elt B[cursor] = (elt,) B[NEW] = cursor # B nonempty so OLD and NEW should differ. if B[OLD] == cursor: B[NEW] = cursor + 1 if B[NEW]<=len(B): B[NEW] = START return B def BGgetdel(B): # find something to delete: cursor = B[OLD] blen = len(B) while B[cursor]==None: cursor = cursor+1 if cursor>=blen: cursor = START if cursor == B[OLD]: break # wrapped if B[cursor] == None: raise IndexError, "delete from empty grabbag(?)" # test to see if bag is empty (position cursor2 at nonempty slot) cursor2 = cursor+1 if cursor2>=blen: cursor2 = START while B[cursor2]==None: cursor2 = cursor2+1 if cursor2>=blen: cursor2 = START # since B[cursor] not yet deleted while will terminate # get and delete the elt (result,) = B[cursor] B[cursor] = None # cursor == cursor2 iff bag is empty B[OLD] = cursor2 if B[NEW] == cursor2: B[NEW] = cursor return result def BGtest(n): B = NewBG() rn = range(n) rn2 = range(n-2) for i in rn: for j in rn: B = BGadd( (i,j), B) B = BGadd( (j,i), B) x = BGgetdel(B) for j in rn2: y = BGgetdel(B) print (i, x, y) return B # # $Log: kjSet.py,v $ # Revision 1.3 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.2 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/kjbuckets0.py0100644000157700012320000006736407467104371015704 0ustar rjonestech""" kjbuckets in pure python :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: kjbuckets0.py,v 1.4 2002/05/11 02:59:05 richard Exp $: """ ### needs more thorough testing! def kjtabletest(x): #print "kjtabletest" try: return x.is_kjtable except: return 0 unhashable = "unhashable key error" class kjGraph: is_kjtable = 1 def __init__(self, *args): #print "kjGraph.__init__", args key_to_list = self.key_to_list = {} self.dirty = 0 self.hashed = None #print args if args: if len(args)>1: raise ValueError, "only 1 or 0 argument supported" from types import IntType, ListType, TupleType arg = args[0] targ = type(arg) test = key_to_list.has_key if type(arg) is IntType: return # ignore int initializer (presize not implemented) elif type(arg) is ListType or type(arg) is TupleType: for (x,y) in arg: if test(x): key_to_list[x].append(y) else: key_to_list[x] = [y] return aclass = arg.__class__ if aclass is kjGraph: aktl = arg.key_to_list for k in aktl.keys(): key_to_list[k] = aktl[k][:] return if aclass is kjDict or aclass is kjSet: adict = arg.dict for k in adict.keys(): key_to_list[k] = [ adict[k] ] return raise ValueError, "arg for kjGraph must be tuple, list, or kjTable" def __repr__(self): return "%s(%s)" % (self.__class__.__name__, self.items()) def _setitems(self, thing): #print "kjGraph._setitem", thing #print "setitems", thing if self.hashed is not None: raise ValueError, "table has been hashed, it is immutable" try: for (k,v) in thing: #print k,v, "going" #inlined __setitem__ try: klist = self.key_to_list[k] #print "klist gotten" except KeyError: try: klist = self.key_to_list[k] = [] except TypeError: raise unhashable if v not in klist: klist.append(v) except (TypeError, KeyError): #import sys #print sys.exc_type, sys.exc_value if kjtabletest(thing): self._setitems(thing._pairs()) self.dirty = thing.dirty else: raise ValueError, "cannot setitems with %s" % type(thing) except unhashable: raise TypeError, "unhashable type" def __setitem__(self, item, value): ktl = self.key_to_list if ktl.has_key(item): l = ktl[item] if value not in l: l.append(value) else: ktl[item] = [value] def __getitem__(self, item): return self.key_to_list[item][0] def __delitem__(self, item): self.dirty = 1 del self.key_to_list[item] def choose_key(self): return self.key_to_list.keys()[0] def _pairs(self, justtot=0): myitems = self.key_to_list.items() tot = 0 for (k, v) in myitems: tot = tot + len(v) if justtot: return tot else: result = [None]*tot i = 0 for (k,v) in myitems: for x in v: result[i] = (k,x) i = i+1 return result def __len__(self): v = self.key_to_list.values() lv = map(len, v) from operator import add return reduce(add, lv, 0) def items(self): return self._pairs() def values(self): v = self.key_to_list.values() from operator import add tot = reduce(add, map(len, v), 0) result = [None] * tot count = 0 for l in v: next = count + len(l) result[count:next] = l count = next return result def keys(self): return self.key_to_list.keys() def member(self, k, v): ktl = self.key_to_list if ktl.has_key(k): return v in ktl[k] return 0 _member = member # because member redefined for kjSet def add(self, k, v): ktl = self.key_to_list if ktl.has_key(k): l = ktl[k] if v not in l: l.append(v) else: ktl[k] = [v] def delete_arc(self, k, v): self.dirty = 1 if self.hashed is not None: raise ValueError, "table has been hashed, it is immutable" try: l = self.key_to_list[k] i = l.index(v) del l[i] if not l: del self.key_to_list[k] except: raise KeyError, "not in table"# % (k,v) def has_key(self, k): return self.key_to_list.has_key(k) def subset(self, other): oc = other.__class__ if oc is kjGraph: oktl = other.key_to_list sktl = self.key_to_list otest = oktl.has_key for k in sktl.keys(): if otest(k): l = sktl[k] ol = oktl[k] for x in l: if x not in ol: return 0 else: return 0 return 1 elif oc is kjSet or oc is kjDict: sktl = self.key_to_list odict = other.dict otest = odict.has_key for k in sktl.keys(): if otest(k): l = sktl[k] ov = odict[k] for x in l: if ov!=x: return 0 else: return 0 return 1 def neighbors(self, k): try: return self.key_to_list[k][:] except: return [] def reachable(self, k): try: horizon = self.key_to_list[k] except: return kjSet() else: if not horizon: return [] d = {} for x in horizon: d[x] = 1 done = 0 while horizon: newhorizon = [] for n in horizon: for n2 in self.neighbors(n): if not d.has_key(n2): newhorizon.append(n2) d[n2] = 1 horizon = newhorizon return kjSet(d.keys()) # ???? def ident(self): result = kjDict(self) result.dirty = self.dirty or result.dirty return result def tclosure(self): # quick and dirty try: raise self except (kjSet, kjDict): raise ValueError, "tclosure only defined on graphs" except kjGraph: pass except: raise ValueError, "tclosure only defined on graphs" result = kjGraph(self) result.dirty = self.dirty addit = result.add while 1: #print result more = result*result if more.subset(result): return result for (x,y) in more.items(): addit(x,y) def Clean(self): if self.dirty: return None return self def Wash(self): self.dirty = 0 def Soil(self): self.dirty = 1 def remap(self, X): # really only should be defined for kjdict, but whatever return kjDict(X*self).Clean() def dump(self, seq): result = map(None, seq) for i in range(len(result)): result[i] = self[result[i]] if len(seq) == 1: return result[0] return tuple(result) def __hash__(self): # should test better """in conformance with kjbuckets, permit unhashable keys""" if self.hashed is not None: return self.hashed items = self._pairs() for i in xrange(len(items)): (a,b) = items[i] try: b = hash(b) except: b = 1877777 items[i] = hash(a)^~b items.sort() result = self.hashed = hash(tuple(items)) return result def __cmp__(self, other): #print "kjGraph.__cmp__" ls = len(self) lo = len(other) test = cmp(ls, lo) if test: return test si = self._pairs() oi = other._pairs() si.sort() oi.sort() return cmp(si, oi) def __nonzero__(self): if self.key_to_list: return 1 return 0 def __add__(self, other): result = kjGraph(self) rktl = result.key_to_list rtest = rktl.has_key result.dirty = self.dirty or other.dirty oc = other.__class__ if oc is kjGraph: oktl = other.key_to_list for k in oktl.keys(): l = oktl[k] if rtest(k): rl = rktl[k] for x in l: if x not in rl: rl.append(x) else: rktl[k] = l[:] elif oc is kjSet or oc is kjDict: odict = other.dict for k in odict.keys(): ov = odict[k] if rtest(k): rl = rktl[k] if ov not in rl: rl.append(ov) else: rktl[k] = [ov] else: raise ValueError, "kjGraph adds only with kjTable" return result __or__ = __add__ def __sub__(self, other): result = kjGraph() rktl = result.key_to_list sktl = self.key_to_list oc = other.__class__ if oc is kjGraph: oktl = other.key_to_list otest = oktl.has_key for k in sktl.keys(): l = sktl[k][:] if otest(k): ol = oktl[k] for x in ol: if x in l: l.remove(x) if l: rktl[k] = l else: rktl[k] = l elif oc is kjSet or oc is kjDict: odict = other.dict otest = odict.has_key for k in sktl.keys(): l = sktl[k][:] if otest(k): ov = odict[k] if ov in l: l.remove(ov) if l: rktl[k] = l else: raise ValueError, "kjGraph diffs only with kjTable" return result def __mul__(self, other): result = kjGraph() rktl = result.key_to_list sktl = self.key_to_list oc = other.__class__ if oc is kjGraph: oktl = other.key_to_list otest = other.has_key for sk in sktl.keys(): sklist = [] for sv in sktl[sk]: if otest(sv): sklist[0:0] = oktl[sv] if sklist: rktl[sk] = sklist elif oc is kjSet or oc is kjDict: odict = other.dict otest = odict.has_key for sk in sktl.keys(): sklist=[] for sv in sktl[sk]: if otest(sv): sklist.append(odict[sv]) if sklist: rktl[sk] = sklist else: raise ValueError, "kjGraph composes only with kjTable" return result def __invert__(self): result = self.__class__() pairs = self._pairs() for i in xrange(len(pairs)): (k,v) = pairs[i] pairs[i] = (v,k) result._setitems(pairs) result.dirty = self.dirty or result.dirty return result def __and__(self, other): sktl = self.key_to_list oc = other.__class__ if oc is kjGraph: result = kjGraph() rktl = result.key_to_list oktl = other.key_to_list otest = oktl.has_key for k in self.keys(): if otest(k): l = sktl[k] ol = oktl[k] rl = [] for x in l: if x in ol: rl.append(x) if rl: rktl[k] = rl elif oc is kjSet or oc is kjDict: result = oc() # less general! rdict = result.dict odict = other.dict stest = sktl.has_key for k in odict.keys(): if stest(k): v = odict[k] l = sktl[k] if v in l: rdict[k] = v else: raise ValueError, "kjGraph intersects only with kjTable" result.dirty = self.dirty or other.dirty return result def __coerce__(self, other): return (self, other) # ?is this sufficient? class kjDict(kjGraph): def __init__(self, *args): #print "kjDict.__init__", args self.hashed = None dict = self.dict = {} self.dirty = 0 if not args: return if len(args)==1: from types import TupleType, ListType, IntType arg0 = args[0] targ0 = type(arg0) if targ0 is IntType: return if targ0 is ListType or targ0 is TupleType: otest = dict.has_key for (a,b) in arg0: if otest(a): if dict[a]!=b: self.dirty = 1 dict[a] = b return argc = arg0.__class__ if argc is kjGraph: ktl = arg0.key_to_list for k in ktl.keys(): l = ktl[k] if len(l)>1: self.dirty=1 for v in l: dict[k] = v return if argc is kjSet or argc is kjDict: adict = arg0.dict for (k,v) in adict.items(): dict[k]=v return raise ValueError, "kjDict initializes only from list, tuple, kjTable, or int" def _setitems(self, thing): #print "kjDict._setitem", thing if self.hashed is not None: raise KeyError, "table hashed, cannot modify" dict = self.dict try: for (k,v) in thing: if dict.has_key(k) and dict[k]!=v: self.dirty = 1 dict[k] = v except: self._setitems(thing._pairs()) # maybe too tricky! def dump(self, dumper): ld = len(dumper) if ld==1: return self.dict[dumper[0]] else: sdict = self.dict result = [None] * ld for i in xrange(ld): result[i] = sdict[ dumper[i] ] return tuple(result) def __setitem__(self, item, value): if self.hashed is not None: raise ValueError, "table has been hashed, it is immutable" d = self.dict if d.has_key(item): if d[item]!=value: self.dirty = 1 self.dict[item]=value def __getitem__(self, item): return self.dict[item] def __delitem__(self, item): if self.hashed is not None: raise ValueError, "table has been hashed, it is immutable" self.dirty = 1 del self.dict[item] def choose_key(self): return self.dict.keys()[0] def __len__(self): return len(self.dict) def _pairs(self, justtot=0): if justtot: return len(self.dict) return self.dict.items() def values(self): return self.dict.values() def keys(self): return self.dict.keys() def items(self): return self.dict.items() def remap(self, X): if X.__class__ is kjGraph: if self.dirty or X.dirty: return None result = kjDict() resultd = result.dict selfd = self.dict inself = selfd.has_key inresult = resultd.has_key ktl = X.key_to_list for k in ktl.keys(): for v in ktl[k]: if inself(v): map = selfd[v] if inresult(k): if resultd[k]!=map: return None else: resultd[k]=map return result else: return (kjDict(X*self)).Clean() def __cmp__(s,o): from types import InstanceType if type(o) is not InstanceType: return -1 oc = o.__class__ if oc is kjDict or oc is kjSet: return cmp(s.dict, o.dict) return kjGraph.__cmp__(s, o) def __hash__(s): h = s.hashed if h is not None: return h return kjGraph.__hash__(s) def __add__(s,o): oc = o.__class__ if oc is kjDict or oc is kjSet: result = kjDict() result.dirty = s.dirty or o.dirty rdict = result.dict rtest = result.has_key sdict = s.dict for k in sdict.keys(): rdict[k] = sdict[k] odict = o.dict for k in odict.keys(): if rtest(k): if rdict[k]!=odict[k]: result.dirty=1 else: rdict[k] = odict[k] return result if oc is kjGraph: return kjGraph.__add__(o,s) else: raise ValueError, "kjDict unions only with kjTable" __or__ = __add__ def __and__(s,o): oc = o.__class__ if oc is kjDict or oc is kjSet: result = oc() result.dirty = s.dirty or o.dirty rdict = result.dict odict = o.dict sdict = s.dict stest = sdict.has_key for k in odict.keys(): v = odict[k] if stest(k) and sdict[k]==v: rdict[k] = v return result elif oc is kjGraph: return kjGraph.__and__(o,s) def __sub__(s,o): oc = o.__class__ result = kjDict() result.dirty = s.dirty or o.dirty sdict = s.dict rdict = result.dict if oc is kjDict: odict = o.dict otest = odict.has_key for k in sdict.keys(): v = sdict[k] if otest(k): if odict[k]!=v: rdict[k] = v else: rdict[k] = v return result if oc is kjGraph: oktl = o.key_to_list otest = oktl.has_key for k in sdict.keys(): v = sdict[k] if otest(k): if v not in oktl[k]: rdict[k] = v else: rdict[k] = v return result raise ValueError, "kjDict only diffs with kjGraph, kjDict" def __mul__(s,o): oc = o.__class__ sdict = s.dict if oc is kjDict or oc is kjSet: result = kjDict() result.dirty = s.dirty or o.dirty rdict = result.dict odict = o.dict otest = odict.has_key for k in sdict.keys(): kv = sdict[k] if otest(kv): rdict[k] = odict[kv] return result elif oc is kjGraph: return kjGraph(s) * o else: raise ValueError, "kjDict only composes with kjTable" def member(self, k, v): d = self.dict try: return d[k] == v except: return 0 _member = member def delete_arc(self, k, v): if self.dict[k] == v: del self.dict[k] else: raise KeyError, "pair not in table" def has_key(self, k): return self.dict.has_key(k) def neighbors(self, k): try: return [ self.dict[k] ] except: return [] def reachable(self, k): result = {} d = self.dict try: while 1: next = d[k] if result.has_key(next): break result[next] = 1 k = next except KeyError: pass return kjSet(result.keys()) def __invert__(self): result = kjDict() dr = result.dict drtest = dr.has_key ds = self.dict for (a,b) in ds.items(): if drtest(b): result.dirty=1 dr[b]=a result.dirty = self.dirty or result.dirty return result def __nonzero__(self): if self.dict: return 1 return 0 def subset(s, o): oc = o.__class__ sdict = s.dict if oc is kjDict or oc is kjSet: odict = o.dict otest = odict.has_key for k in sdict.keys(): v = sdict[k] if otest(k): if odict[k]!=v: return 0 else: return 0 elif oc is kjGraph: oktl = o.key_to_list otest = oktl.has_key for k in sdict.keys(): v = sdict[k] if otest(k): if v not in oktl[k]: return 0 else: return 0 else: raise ValueError, "kjDict subset test only for kjTable" return 1 def add(s, k, v): if s.hashed is not None: raise ValueError, "table has been hashed, immutable" sdict = s.dict if sdict.has_key(k): if sdict[k]!=v: s.dirty = 1 sdict[k] = v class kjSet(kjDict): def __init__(self, *args): #print "kjSet.__init__", args # usual cases first dict = self.dict = {} self.hashed = None self.dirty = 0 largs = len(args) if largs<1: return if largs>1: raise ValueError, "at most one argument supported" from types import IntType, TupleType, ListType arg0 = args[0] targ0 = type(arg0) if targ0 is IntType: return if targ0 is TupleType or targ0 is ListType: for x in arg0: dict[x] = x return argc = arg0.__class__ if argc is kjDict or argc is kjSet: stuff = arg0.dict.keys() elif argc is kjGraph: stuff = arg0.key_to_list.keys() else: raise ValueError, "kjSet from kjTable, int, list, tuple only" for x in stuff: dict[x] = x def __add__(s,o): oc = o.__class__ if oc is kjSet: result = kjSet() result.dirty = s.dirty or o.dirty rdict = result.dict for x in s.dict.keys(): rdict[x]=x for x in o.dict.keys(): rdict[x]=x return result elif oc is kjDict: return kjDict.__add__(o,s) elif oc is kjGraph: return kjGraph.__add__(o,s) __or__ = __add__ def __sub__(s,o): if o.__class__ is kjSet: result = kjSet() result.dirty = s.dirty or o.dirty rdict = result.dict otest = o.dict.has_key for x in s.dict.keys(): if not otest(x): rdict[x] = x return result else: return kjDict.__sub__(s,o) def __and__(s,o): oc = o.__class__ if oc is kjSet or oc is kjDict: result = kjSet() result.dirty = s.dirty or o.dirty rdict = result.dict odict = o.dict otest = odict.has_key for x in s.dict.keys(): if otest(x) and odict[x]==x: rdict[x] = x return result elif oc is kjGraph: return kjGraph.__and__(o,s) raise ValueError, "kjSet only intersects with kjTable" # illegal methods values = keys = remap = None def __repr__(self): return "kjSet(%s)" % self.items() def _setelts(self, items): #print "kjSet.setelts", items try: items = items._pairs() except: items = list(items) for i in xrange(len(items)): items[i] = (items[i], items[i]) self._setitems(items) else: items = list(items) for i in xrange(len(items)): items[i] = (items[i][0], items[i][0]) self._setitems(items) # hack! #D = self.dict #for x in D.keys(): # D[x] = x def _pairs(self, justtot=0): if justtot: return kjDict._pairs(self, justtot=1) pairs = kjDict.keys(self) for i in xrange(len(pairs)): pairs[i] = (pairs[i], pairs[i]) return pairs member = kjDict.has_key items = kjDict.keys #def neighbors(self, x): # raise ValueError, "operation on kjSet undefined" #reachable = neighbors def __getitem__(self, item): test = self.dict.has_key(item) if test: return 1 raise KeyError, "item not in set" def __setitem__(self, item, ignore): d = self.dict if self.hashed: raise ValueError, "table hashed, immutable" d[item] = item def add(self, elt): if self.hashed: raise ValueError, "table hashed, immutable" self.dict[elt] = elt def __mul__(s,o): oc = o.__class__ if oc is kjSet: return s.__and__(o) else: return kjDict.__mul__(s, o) def more_general(t1, t2): try: raise t1 except kjSet: try: raise t2 except (kjGraph, kjDict, kjSet): return t2.__class__ except kjDict: try: raise t2 except kjSet: return t1.__class__ except (kjDict, kjGraph): return t2.__class__ except kjGraph: return t1.__class__ except: raise ValueError, "cannot coerce, not kjtable" def less_general(t1,t2): try: raise t1 except kjSet: return t1.__class__ except kjDict: try: raise t2 except kjSet: return t2.__class__ except (kjDict, kjGraph): return t1.__class__ except kjGraph: return t2.__class__ except: raise ValueError, "cannot coerce, not kjtable" def kjUndump(t1, t2): result = kjDict() rdict = result.dict lt1 = len(t1) if lt1 == 1: rdict[t1[0]] = t2 else: # tightly bound to implementation for i in xrange(lt1): rdict[t1[i]] = t2[i] return result # # $Log: kjbuckets0.py,v $ # Revision 1.4 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.3 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/07 04:04:02 anthonybaxter # removing duplicate 'items' call. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/kjbuckets_select.py0100644000157700012320000000140107467104371017137 0ustar rjonestech'''Select the best kjbuckets module available. :Author: Richard Jones :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: kjbuckets_select.py,v 1.3 2002/05/11 02:59:05 richard Exp $: ''' # use kjbuckets builtin if available try: import kjbuckets except ImportError: import kjbuckets0 kjbuckets = kjbuckets0 # # $Log: kjbuckets_select.py,v $ # Revision 1.3 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # gadfly-1.0.0/gadfly/operations.py0100644000157700012320000005613007467104371016007 0ustar rjonestech""" Database modification statement semantics :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: operations.py,v 1.4 2002/05/11 02:59:05 richard Exp $: """ import semantics import serialize # ordering of ddef storage is important so, eg, index defs # follow table defs. class Ordered_DDF: """mixin for DDF statement sorting, subclass defines s.cmp(o)""" def __cmp__(self, other): try: #print "comparing", self.name, other.name try: sc = self.__class__ oc = other.__class__ #print sc, oc except: #print "punting 1", -1 return -1 if sc in ddf_order and oc in ddf_order: test = cmp(ddf_order.index(sc), ddf_order.index(oc)) #print "ddforder", test if test: return test return self.cmp(other) else: test = cmp(sc, oc) #print "punting 2", test return test except: #import sys #print "exception!" #print sys.exc_type, sys.exc_value return -1 def __coerce__(self, other): return (self, other) def cmp(self, other): """redefine if no name field""" return cmp(self.name, other.name) CTFMT = """\ CREATE TABLE %s ( %s )""" class CreateTable(Ordered_DDF): """create table operation""" def __init__(self, name, colelts): self.name = name self.colelts = colelts self.indb = None # db in which to create def initargs(self): return (self.name, []) def marshaldata(self): return map(serialize.serialize, self.colelts) def demarshal(self, args): self.colelts = map(serialize.deserialize, args) def __repr__(self): from string import join elts = list(self.colelts) elts = map(repr, elts) return CTFMT % (self.name, join(elts, ",\n ")) def relbind(self, db): """check that table doesn't already exist""" if db.has_relation(self.name): raise NameError, "cannot create %s, exists" % (self.name,) self.indb = db return self def eval(self, dyn=None): "create the relation now" # datatypes currently happily ignored :) db = self.indb if db is None: raise ValueError, "unbound or executed" self.indb = None name = self.name if db.has_relation(self.name): raise NameError, "relation %s exists, cannot create" % (self.name,) db.touched = 1 attnames = [] for x in self.colelts: attnames.append(x.colid) from store import Relation0 r = Relation0(attnames) # must store if new (unset for reloads) r.touched = 1 db[name] = r db.add_datadef(name, self) log = db.log if log is not None: log.log(self) viewfmt = """\ CREATE VIEW %s (%s) AS %s""" class CreateView(semantics.SimpleRecursive, Ordered_DDF): """CREATE VIEW name (namelist) AS selection""" # note: no check for cross-references on drops! def __init__(self, name, namelist, selection): self.name = name self.namelist = namelist self.selection = selection self.indb = None def __repr__(self): return viewfmt % (self.name, self.namelist, self.selection) def initargs(self): return (self.name, self.namelist, self.selection) def relbind(self, db): self.indb = db name = self.name if db.has_datadef(name): raise NameError, "(view) datadef %s exists" % name # don't bind the selection yet return self def eval(self, dyn=None): "create the view" db = self.indb name = self.name if db is None: raise ValueError, "create view %s unbound or executed" % name self.indb = None if db.has_relation(name): raise ValueError, "create view %s, name exists" % name db.touched = 1 from store import View v = View(self.name, self.namelist, self.selection, db) db[name] = v db.add_datadef(name, self) log = db.log if log is not None: log.log(self) CREATEINDEXFMT = """\ CREATE %sINDEX %s ON %s ( %s )""" class CreateIndex(semantics.SimpleRecursive, Ordered_DDF): """create index operation""" def __init__(self, name, tablename, atts, unique=0): self.name = name self.tablename=tablename self.atts = atts self.indb = None self.target = None self.unique = unique def initargs(self): return (self.name, self.tablename, self.atts, self.unique) def __cmp__(self, other): oc = other.__class__ if oc is CreateTable: return 1 # after all create tables sc = self.__class__ if oc is not sc: return cmp(sc, oc) else: return cmp(self.name, other.name) def __coerce__(self, other): return (self, other) def __repr__(self): from string import join un = "" if self.unique: un="UNIQUE " innards = join(self.atts, ",\n ") return CREATEINDEXFMT % (un, self.name, self.tablename, innards) def relbind(self, db): name = self.name self.indb = db if db.has_datadef(name): raise NameError, `name`+": data def exists" try: self.target = db.get_for_update(self.tablename) #db[self.tablename] except: raise NameError, `self.tablename`+": no such relation" return self def eval(self, dyn=None): from store import Index db = self.indb if db is None: raise ValueError, "create index unbound or executed" self.indb = None rel = self.target if rel is None: raise ValueError, "create index not bound to relation" db.touched = 1 self.the_index = the_index = Index(self.name, self.atts, unique=self.unique) rel.add_index(the_index) name = self.name db.add_datadef(name, self) db.add_index(name, the_index) log = db.log if log is not None: log.log(self) class DropIndex(semantics.SimpleRecursive): def __init__(self, name): self.name = name self.indb = None def initargs(self): return (self.name,) def __repr__(self): return "DROP INDEX %s" % (self.name,) def relbind(self, db): self.indb = db if not db.has_datadef(self.name): raise NameError, `self.name`+": no such index" return self def eval(self, dyn=None): db = self.indb self.indb=None if db is None: raise ValueError, "drop index executed or unbound" db.touched = 1 indexname = self.name createindex = db.datadefs[indexname] index = createindex.the_index relname = createindex.tablename rel = db[relname] rel.drop_index(index) db.drop_datadef(indexname) db.drop_index(indexname) log = db.log if log is not None: log.log(self) class DropTable(semantics.SimpleRecursive): def __init__(self, name): self.name = name self.indb = None def initargs(self): return (self.name,) def __repr__(self): return "DROP TABLE %s" % (self.name,) def relbind(self, db): self.indb = db name = self.name if not db.has_relation(name): raise NameError, `self.name` + ": cannot delete, no such table/view" self.check_kind(name, db) return self def check_kind(self, name, db): if db[name].is_view: raise ValueError, "%s is VIEW, can't DROP TABLE" % name def eval(self, dyn): db = self.indb if db is None: raise ValueError, "unbound or executed" db.touched = 1 self.indb = None self.relbind(db) name = self.name rel = db[name] rel.drop_indices(db) db.drop_datadef(name) del db[name] log = db.log if log is not None: log.log(self) class DropView(DropTable): """DROP VIEW name""" def __repr__(self): return "DROP VIEW %s" % self.name def check_kind(self, name, db): if not db[name].is_view: raise ValueError, "%s is TABLE, can't DROP VIEW" % name COLDEFFMT = "%s %s %s %s" class ColumnDef(semantics.SimpleRecursive): def __init__(self, colid, datatype, defaults, constraints): self.colid = colid self.datatype = datatype self.defaults = defaults self.constraints = constraints def initargs(self): return (self.colid, self.datatype, self.defaults, self.constraints) def __repr__(self): defaults = self.defaults if defaults is None: defaults="" constraints = self.constraints if constraints is None: constraints = "" return COLDEFFMT % (self.colid, self.datatype, defaults, constraints) def evalcond(cond, eqs, target, dyn, rassns, translate, invtrans): """factored out shared op between Update and Delete.""" if dyn: #print "dyn", dyn from semantics import dynamic_binding dynbind = dynamic_binding(len(dyn), dyn) if len(dynbind)>1: raise ValueError, "only one dynamic binding allowed for UPDATE" dynbind1 = dynbind = dynbind[0] if eqs is not None: dynbind1 = dynbind.remap(eqs) if dynbind1 is None: # inconsistent return dynbind = dynbind1 + dynbind if rassns is not None: rassns = rassns + invtrans * dynbind if rassns.Clean() is None: # inconsistent return else: rassns = invtrans * dynbind #print "dynbind", dynbind #print "rassn", rassns else: dynbind = None # get tuple set, try to use an index index = None if rassns is not None: known = rassns.keys() index = target.choose_index(known) if index is None: (tuples, seqnums) = target.rows(1) else: #print "using index", index.name (tuples, seqnums) = index.matches(rassns) ltuples = len(tuples) buffer = [0] * ltuples rtups = range(ltuples) for i in rtups: tup = tuples[i] #print tup ttup = translate * tup if dynbind: ttup = (ttup + dynbind).Clean() if ttup is not None: buffer[i] = ttup #print "buffer", buffer #print "cond", cond #for x in buffer: #print "before", x test = cond(buffer) #print "test", test return (test, rtups, seqnums, tuples) UPDFMT = """\ UPDATE %s SET %s WHERE %s""" # optimize to use indices and single call to "cond" class UpdateOp(semantics.SimpleRecursive): def __init__(self, name, assns, condition): self.name = name self.assns = assns self.condition = condition def initargs(self): return (self.name, self.assns, self.condition) def __repr__(self): return UPDFMT % (self.name, self.assns, self.condition) def relbind(self, db): self.indb = db name = self.name target = self.target = db.get_for_update(name) (attb, relb, amb, ambatts) = db.bindings( [ (name, name) ] ) assns = self.assns = self.assns.relbind(attb, db) cond = self.condition = self.condition.relbind(attb, db) constraints = cond.constraints if constraints is not None: eqs = self.eqs = constraints.eqs cassns = constraints.assns else: cassns = eqs = self.eqs = None #print constraints, eqs # check that atts of assns are atts of target #print dir(assns) resultatts = assns.attorder from semantics import kjbuckets kjSet = kjbuckets.kjSet kjGraph = kjbuckets.kjGraph resultatts = kjSet(resultatts) allatts = kjSet(target.attribute_names) self.preserved = allatts - resultatts huh = resultatts - allatts if huh: raise NameError, "%s lacks %s attributes" % (name, huh.items()) # compute projection assnsatts = kjGraph(assns.domain().items()).neighbors(name) condatts = kjGraph(cond.domain().items()).neighbors(name) condatts = condatts+assnsatts #print "condatts", condatts translate = kjbuckets.kjDict() for att in condatts: translate[ (name, att) ] = att self.translate = translate invtrans= self.invtrans = ~translate if cassns is not None: self.rassns = invtrans * cassns else: self.rassns = None #print "cassns,rassns", cassns, self.rassns #print translate # compute domain of self.assns # (do nothing with it, should add sanity check!) assns_domain = self.assns.domain() return self def eval(self, dyn=None): indb = self.indb name = self.name cond = self.condition cond.uncache() assns = self.assns assns.uncache() translate = self.translate preserved = self.preserved target = self.target rassns = self.rassns eqs = self.eqs invtrans = self.invtrans #print "assns", assns, assns.__class__ #print "cond", cond #print "eqs", eqs #print "target", target #print "dyn", dyn #print "rassns", rassns #print "translate", translate #print "invtrans", invtrans (test, rtups, seqnums, tuples) = evalcond( cond, eqs, target, dyn, rassns, translate, invtrans) # shortcut if not test: return self.indb.touched = 1 tt = type from types import IntType #print test (tps, attorder) = assns.map(test) count = 0 newseqs = list(rtups) newtups = list(rtups) for i in rtups: new = tps[i] if tt(new) is not IntType and new is not None: seqnum = seqnums[i] old = tuples[i] if preserved: new = new + preserved*old newtups[count] = new newseqs[count] = seqnum count = count + 1 if count: newseqs = newseqs[:count] newtups = newtups[:count] target.reset_tuples(newtups, newseqs) log = indb.log if log is not None and not log.is_scratch: from semantics import Reset_Tuples op = Reset_Tuples(self.name) op.set_data(newtups, newseqs, target) log.log(op) class DeleteOp(semantics.SimpleRecursive): def __init__(self, name, where): self.name = name self.condition = where def initargs(self): return (self.name, self.condition) def __repr__(self): return "DELETE FROM %s WHERE %s" % (self.name, self.condition) def relbind(self, db): self.indb = db name = self.name target = self.target = db.get_for_update(name) (attb, relb, amb, ambatts) = db.bindings( [ (name, name) ] ) cond = self.condition = self.condition.relbind(attb, db) # compute domain of cond # do nothing with it (should add sanity check) cond_domain = cond.domain() constraints = cond.constraints if constraints is not None: cassns = constraints.assns self.eqs = constraints.eqs else: self.eqs = cassns = None # compute projection/rename from semantics import kjbuckets condatts = kjbuckets.kjGraph(cond.domain().items()).neighbors(name) translate = kjbuckets.kjDict() for att in condatts: translate[(name, att)] = att self.translate = translate invtrans = self.invtrans = ~translate if cassns is not None: self.rassns = invtrans * cassns else: self.rassns = None return self def eval(self, dyn=None): # note, very similar to update case... indb = self.indb name = self.name target = self.target tuples = target.tuples eqs = self.eqs rassns = self.rassns cond = self.condition cond.uncache() translate = self.translate invtrans = self.invtrans (test, rtups, seqnums, tuples) = evalcond( cond, eqs, target, dyn, rassns, translate, invtrans) # shortcut if not test: return indb.touched = 1 tt = type from types import IntType count = 0 newseqs = list(rtups) #print "rtups", rtups for i in rtups: new = test[i] if tt(new) is not IntType and new is not None: seqnum = seqnums[i] newseqs[count] = seqnum count = count + 1 #print "newseqs", newseqs #print "count", count if count: newseqs = newseqs[:count] target.erase_tuples(newseqs) log = indb.log if log is not None and not log.is_scratch: from semantics import Erase_Tuples op = Erase_Tuples(self.name) op.set_data(newseqs, target) log.log(op) INSFMT = """\ INSERT INTO %s %s %s""" class InsertOp(semantics.SimpleRecursive): def __init__(self, name, optcolids, insertspec): self.name = name self.optcolids = optcolids self.insertspec = insertspec self.target = None # target relation self.collector = None # name map for attribute translation def initargs(self): return (self.name, self.optcolids, self.insertspec) def __repr__(self): return INSFMT % (self.name, self.optcolids, self.insertspec) def relbind(self, db): self.indb = db name = self.name # determine target relation target = self.target = db.get_for_update(name) targetatts = target.attributes() from semantics import kjbuckets kjSet = kjbuckets.kjSet targetset = kjSet(targetatts) # check or set colid bindings colids = self.optcolids if colids is None: colids = self.optcolids = target.attributes() colset = kjSet(colids) ### for now all attributes must be in colset cdiff = colset-targetset if cdiff: raise NameError, "%s: no such attributes in %s" % (cdiff.items(), name) cdiff = targetset-colset ### temporary!!! if cdiff: raise NameError, "%s: not set in insert on %s" % (cdiff.items(), name) # bind the insertspec insertspec = self.insertspec self.insertspec = insertspec = insertspec.relbind(db) # create a collector for result from semantics import TupleCollector collector = self.collector = TupleCollector() # get ordered list of expressions to eval on bound attributes of insertspec resultexps = insertspec.resultexps() if len(resultexps)!=len(colset): raise ValueError, "result and colset of differing length %s:%s" % (colset,resultexps) pairs = map(None, colids, resultexps) for (col,exp) in pairs: collector.addbinding(col, exp) return self def eval(self, dyn=None): resultbts = self.insertspec.eval(dyn) #print "resultbts", resultbts # shortcut if not resultbts: return indb = self.indb indb.touched = 1 (resulttups, resultatts) = self.collector.map(resultbts) #print "resulttups", resulttups if resulttups: target = self.target target.add_tuples(resulttups) #target.regenerate_indices() log = indb.log if log is not None and not log.is_scratch: from semantics import Add_Tuples op = Add_Tuples(self.name) op.set_data(resulttups, target) log.log(op) Insert_dummy_arg = [ ( (1,1), 1 ) ] class InsertValues(semantics.SimpleRecursive): def __init__(self, List): self.list = List def initargs(self): return (self.list,) def __repr__(self): return "VALUES " +` tuple(self.list) ` def resultexps(self): return self.list def relbind(self, db): l = self.list bindings = {} for i in xrange(len(self.list)): li = l[i] l[i] = li.relbind(bindings, db) # do nothing with domain, for now li_domain = li.domain() return self def eval(self, dyn=None): if dyn: from semantics import dynamic_binding dynbt = dynamic_binding(len(dyn), dyn) else: # dummy value to prevent triviality from semantics import kjbuckets dynbt = [kjbuckets.kjDict(Insert_dummy_arg)] #print "bindings", dynbt.assns return dynbt # ?? class InsertSubSelect(semantics.SimpleRecursive): def __init__(self, subsel): self.subsel = subsel def initargs(self): return (self.subsel,) def __repr__(self): return "[subsel] %s" % (self.subsel,) def resultexps(self): # get list of result bindings subsel = self.subsel atts = self.subsel.attributes() # bind each as "result.name" exps = [] from semantics import BoundAttribute for a in atts: exps.append( BoundAttribute("result", a) ) return exps # temp def relbind(self, db): subsel = self.subsel self.subsel = subsel.relbind(db) # do nothing with domain for now #subsel_domain = subsel.domain() return self def eval(self, dyn=None): subsel = self.subsel subsel.uncache() rel = subsel.eval(dyn) tups = rel.rows() from semantics import BoundTuple ### temp from semantics import kjbuckets kjDict = kjbuckets.kjDict for i in xrange(len(tups)): tupsi = tups[i] new = kjDict() for k in tupsi.keys(): new[ ("result", k) ] = tupsi[k] tups[i] = new return tups # ordering for archiving datadefs ddf_order = [CreateTable, CreateIndex, CreateView] # # $Log: operations.py,v $ # Revision 1.4 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.3 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/07 23:19:02 richard # Removed circular import (at import time at least) # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/gadfly/semantics.py0100644000157700012320000026676607467104371015634 0ustar rjonestech""" SQL semantics :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: semantics.py,v 1.7 2002/05/11 02:59:05 richard Exp $: """ ### trim unused methods. ### make assns use equivalence classes. ### maybe eventually implement disj-conj-eq optimizations ### note: for multithreading x.relbind(...) should ALWAYs return ### a fresh copy of structure (sometimes in-place now). ### note: binding of order by is dubious with archiving, ### should not bind IN PLACE, leave unbound elts alone! import sys, traceback, types from kjbuckets_select import kjbuckets import serialize Tuple = kjbuckets.kjDict Graph = kjbuckets.kjGraph Set = kjbuckets.kjSet ### debug #sys.stderr = sys.stdin # operations on simple tuples, mostly from kjbuckets #def maketuple(thing): # """try to make a tuple from thing. # thing should be a dictionary or sequence of (name, value) # or other tuple.""" # from types import DictType # if type(thing)==DictType: # return Tuple(thing.items() ) # else: return Tuple(thing) def no_ints_nulls(list): """in place remove all ints, Nones from a list (for null handling)""" tt = type nn = None from types import IntType count = 0 for x in list: if tt(x) is not IntType and x is not nn: list[count] = x count = count+1 del list[count:] return list # stuff for bound tuples. class HashJoiner: def __init__(self, bt, relname, attributes, relation, witness): self.relname = relname self.attributes = attributes self.relation = relation self.witness = witness self.bt = bt eqs = bt.eqs #print "relname", relname #print "attributes", attributes #print "relation", relation #print "witness", witness #print "bt", bt transform = self.transform = kjbuckets.kjDict() rbindings = self.rbindings = kjbuckets.kjSet() for a in attributes: b = (relname, a) transform[b] = a rbindings[b] = b self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings) witness = witness.remap(eqs) known = kjbuckets.kjSet(witness.keys()) & rbindings batts = tuple(known.items()) if not batts: atts = () elif len(batts)==1: atts = ( transform[batts[0]], ) else: atts = transform.dump(batts) self.atts = atts self.batts = batts self.transform = transform eqs = bt.eqs #eqs = (rbindings * eqs) self.eqs = eqs = eqs + kjbuckets.kjGraph(rbindings) self.transformG = transformG = eqs * transform assns = self.assns = bt.assns self.rassns = assns.remap( ~transformG ) def relbind(self, db, atts): rel = self.relation #print "rel is ", rel, type(rel) #print dir(rel) if rel.is_view: self.relation = rel.relbind(db, atts) return self def uncache(self): rel = self.relation if rel.is_view: self.relation.uncache() def join(self, subseq): relname = self.relname result = [] assns = self.assns if not subseq: return result # apply equalities to unitary subseq (embedded subq) if len(subseq)==1: subseq0 = subseq[0] subseq0r = subseq0.remap(self.eqs) if subseq0r is None: return [] # inconsistent subseq0 = subseq0 + subseq0r + assns if subseq0.Clean() is None: return [] # inconsistent subseq = [subseq0] rassns = self.rassns #print "rassns", rassns #print "subseq", subseq if rassns is None: #print "inconsistent btup" return [] relation = self.relation #print "assns", assns transformG = self.transformG #print "transformG", transformG transform = self.transform atts = self.atts batts = self.batts #print "batts, atts", batts, atts if not batts: #print "cross product", relname tuples = relation.rows() for t in tuples: #print "t is", t if rassns: t = (t + rassns).Clean() if t is None: #print "preselect fails" continue new = t.remap(transformG) #print "new is", new if new is None: #print "transform fails" continue for subst in subseq: #print "subst", subst if subst: add = (subst + new).Clean() else: add = new #print "add is", add if add is not None: result.append(add) else: # hash join #print "hash join" # first try to use an index index = relation.choose_index(atts) #print transform if index is not None: #print "index join", index.name, relname #print index.index.keys() #print "rassns", rassns atts = index.attributes() invtransform = ~transform if len(atts)==1: batts = (invtransform[atts[0]],) else: batts = invtransform.dump(atts) hash_tups = 1 tindex = index.index # memoize converted tuples tindex0 = {} test = tindex.has_key test0 = tindex0.has_key for i in xrange(len(subseq)): subst = subseq[i] #print "substs is", subst its = subst.dump(batts) #print "its", its othersubsts = [] if test0(its): othersubsts = tindex0[its] elif test(its): tups = tindex[its] for t in tups: #print "t before", t t = (t+rassns).Clean() #print "t after", t if t is None: continue new = t.remap(transformG) #print "new", new if new is None: continue othersubsts.append(new) tindex0[its] = othersubsts for other in othersubsts: #print "adding", other, subst add = (other + subst).Clean() if add is not None: result.append(add) # hash join #print "hash join" else: tuples = relation.rows() if len(subseq)simpletuple associations.""" self.eqs = Graph() self.assns = Tuple() for (name, simpletuple) in bindings.items(): # XXXX TODO FIXME. # there _is_ no 'bind()' method! Fortunately, afaics # this constructor is never called with args. self.bind(name, simpletuple) def initargs(self): return () def marshaldata(self): #print "btp marshaldata", self return (self.eqs.items(), self.assns.items(), self.clean, self.closed) def demarshal(self, args): (eitems, aitems, self.clean, self.closed) = args self.eqs = kjbuckets.kjGraph(eitems) self.assns = kjbuckets.kjDict(aitems) def relbind(self, dict, db): """return bindings of self wrt dict rel>att""" result = BoundTuple() e2 = result.eqs a2 = result.assns for ((a,b), (c,d)) in self.eqs.items(): if a is None: try: a = dict[b] except KeyError: raise NameError, `b`+": ambiguous or unknown attribute" if c is None: try: c = dict[d] except KeyError: raise NameError, `d`+": ambiguous or unknown attribute" e2[(a,b)] = (c,d) for ((a,b), v) in self.assns.items(): if a is None: try: a = dict[b] except KeyError: raise NameError, `b`+": ambiguous or unknown attribute" a2[(a,b)] = v result.closed = self.closed result.clean = self.clean return result #def known(self, relname): # """return ([(relname, a1), ...], [a1, ...]) # for attributes ai of relname known in self.""" # atts = [] # batts = [] # for x in self.assns.keys(): # (r,a) = x # if r==relname: # batts.append(x) # atts.append(a) # return (batts, atts) def relorder(self, db, allrels): """based on known constraints, pick an ordering for materializing relations. db is database (ignored currently) allrels is names of all relations to include (list).""" ### not very smart about indices yet!!! if len(allrels)<2: # doesn't matter return allrels order = [] eqs = self.eqs assns = self.assns kjSet = kjbuckets.kjSet kjGraph = kjbuckets.kjGraph pinned = kjSet() has_index = kjSet() needed = kjSet(allrels) akeys = assns.keys() for (r,a) in akeys: pinned[r]=r # pinned if some value known known_map = kjGraph(akeys) for r in known_map.keys(): rknown = known_map.neighbors(r) if db.has_key(r): rel = db[r] index = rel.choose_index(rknown) if index is not None: has_index[r] = r # has an index! if pinned: pinned = pinned & needed if has_index: has_index = has_index & needed related = kjGraph() for ( (r1, a1), (r2, a2) ) in eqs.items(): related[r1]=r2 # related if equated to other related[r2]=r1 # redundant if closed. if related: related = needed * related * needed chosen = kjSet() pr = kjSet(related) & pinned # choose first victim if has_index: choice = has_index.choose_key() elif pr: choice = pr.choose_key() elif pinned: choice = pinned.choose_key() elif related: choice = related.choose_key() else: return allrels[:] # give up! while pinned or related or has_index: order.append(choice) chosen[choice] = 1 if pinned.has_key(choice): del pinned[choice] if related.has_key(choice): del related[choice] if has_index.has_key(choice): del has_index[choice] nexts = related * chosen if nexts: # prefer a relation related to chosen choice = nexts.choose_key() elif pinned: # otherwise one that is pinned choice = pinned.choose_key() elif related: # otherwise one that relates to something... choice = related.choose_key() others = kjSet(allrels) - chosen if others: order = order + others.items() return order def domain(self): kjSet = kjbuckets.kjSet return kjSet(self.eqs) + kjSet(self.assns) def __repr__(self): result = [] for ( (name, att), value) in self.assns.items(): result.append( "%s.%s=%s" % (name, att, `value`) ) for ( (name, att), (name2, att2) ) in self.eqs.items(): result.append( "%s.%s=%s.%s" % (name, att, name2, att2) ) if self.clean: if not result: return "TRUE" else: result.insert(0, "FALSE") result.sort() return ' & '.join(result) def equate(self, equalities): """add equalities to self, only if not closed. equalities should be seq of ( (name, att), (name, att) ) """ if self.closed: raise ValueError, "cannot add equalities! Closed!" e = self.eqs for (a, b) in equalities: e[a] = b def close(self): """infer equalities, if consistent. only recompute equality closure if not previously closed. return None on inconsistency. """ neweqs = self.eqs if not self.closed: self.eqs = neweqs = (neweqs + ~neweqs).tclosure() # sym, trans closure self.closed = 1 # add trivial equalities to self for x in self.assns.keys(): if not neweqs.member(x,x): neweqs[x] = x newassns = self.assns.remap(neweqs) if newassns is not None and self.clean: self.assns = newassns #self.clean = 1 return self else: self.clean = 0 return None def share_eqs(self): """make clone of self that shares equalities, closure. note: will share future side effects to eqs too.""" result = BoundTuple() result.eqs = self.eqs result.closed = self.closed return result def __add__(self, other): """combine self with other, return closure.""" result = self.share_eqs() se = self.eqs oe = other.eqs if (se is not oe) and (se != oe): result.eqs = se + oe result.closed = 0 ra= result.assns = self.assns + other.assns result.clean = result.clean and (ra.Clean() is not None) return result.close() def __and__(self, other): """return closed constraints common to self and other.""" result = BoundTuple() se = self.eqs oe = other.eqs if (se is oe) or (se == oe): result.eqs = self.eqs result.closed = self.closed else: result.eqs = self.eqs & other.eqs result.assns = self.assns & other.assns result.clean = self.clean and other.clean return result.close() def __hash__(self): # note: equalities don't enter into hash computation! # (some may be spurious) self.close() return hash(self.assns)# ^ hash(self.eqs) def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test sa = self.assns oa = other.assns test = cmp(sa, oa) if test: return test kjSet = kjbuckets.kjSet kjGraph = kjbuckets.kjSet se = self.eqs se = kjGraph(se) - kjGraph(kjSet(se)) oe = other.eqs oe = kjGraph(oe) - kjGraph(kjSet(oe)) return cmp(se, oe) class BoundExpression(SimpleRecursive): """superclass for all bound expressions. except where overloaded expressions are binary with self.left and self.right """ contains_aggregate = 0 # default def __init__(self, left, right): self.left = left self.right = right self.contains_aggregate = left.contains_aggregate or right.contains_aggregate def initargs(self): return (self.left, self.right) def uncache(self): """prepare for execution, clear cached data.""" self.left.uncache() self.right.uncache() # eventually add converters... def equate(self,other): """return predicate equating self and other. Overload for special cases, please!""" return NontrivialEqPred(self, other) def attribute(self): return (None, `self`) def le(self, other): """predicate self<=other""" return LessEqPred(self, other) # these should be overridden for 2 const case someday... def lt(self, other): """predicate self list of subtuple""" lexprs = len(exprlist) if lexprs<1: raise ValueError, "aggregate on no expressions?" lassns = len(assignments) pairs = list(exprlist) for i in xrange(lexprs): expr = exprlist[i] attributes = [expr.attribute()]*lassns values = expr.value(assignments) pairs[i] = map(None, attributes, values) #for x in pairs: #print "pairs", x if lexprs>1: newassnpairs = apply(map, (None,)+tuple(pairs)) else: newassnpairs = pairs[0] #for x in newassnpairs: #print "newassnpairs", x xassns = range(lassns) dict = {} test = dict.has_key for i in xrange(lassns): thesepairs = newassnpairs[i] thissubassn = assignments[i] if test(thesepairs): dict[thesepairs].append(thissubassn) else: dict[thesepairs] = [thissubassn] items = dict.items() result = list(items) kjDict = kjbuckets.kjDict if lexprs>1: for i in xrange(len(items)): (pairs, subassns) = items[i] #print "pairs", pairs #print "subassns", subassns D = kjDict(pairs) D[None] = subassns result[i] = D else: for i in xrange(len(items)): (pair, subassns) = items[i] #print "pair", pair #print "subassns", subassns result[i] = kjDict( [pair, (None, subassns)] ) return result ### stuff for order_by class DescExpr(BoundMinus): """special wrapper used only for order by descending for things with no -thing operation (eg, strings)""" def __init__(self, thing): self.thing = thing self.contains_aggregate = thing.contains_aggregate def value(self, contexts): from types import IntType, StringType tt = type result = self.thing.value(contexts) allwrap = None allnowrap = None for i in xrange(len(contexts)): if tt(contexts[i]) is not IntType: resulti = result[i] # currently assume only value needing wrapping is string if tt(resulti) is StringType: if allnowrap is not None: raise ValueError, "(%s, %s) cannot order desc" % (allnowrap, resulti) allwrap = resulti result[i] = descOb(resulti) else: if allwrap is not None: raise ValueError, "(%s, %s) cannot order desc" % (allwrap, resulti) allnowrap = resulti result[i] = -resulti return result def __repr__(self): return "DescExpr(%s)" % (self.thing,) def orderbind(self, order): """order is list of (att, expr).""" Class = self.__class__ return Class(self.thing.orderbind(order)) class SimpleColumn(SimpleRecursive): """a simple column name for application to a list of tuples""" contains_aggregate = 0 def __init__(self, name): self.name = name def relbind(self, dict, db): # already bound! return self def orderbind(self, whatever): # already bound! return self def initargs(self): return (self.name,) def value(self, simpletuples): from types import IntType tt = type name = self.name result = list(simpletuples) for i in xrange(len(result)): ri = result[i] if tt(ri) is not IntType: result[i] = ri[name] else: result[i] = None # ??? return result def __repr__(self): return "" % (self.name,) class NumberedColumn(BoundMinus): """order by column number""" contains_aggregate = 0 def __init__(self, num): self.thing = num def __repr__(self): return "" % (self.thing,) def relbind(self, dict, db): from types import IntType if type(self.thing)!=IntType: raise ValueError, `self.thing`+": not a numbered column" return self def orderbind(self, order): return SimpleColumn( order[self.thing-1][0] ) class OrderExpr(BoundMinus): """order by expression.""" def orderbind(self, order): expratt = self.thing.attribute() for (att, exp) in order: if exp.attribute()==expratt: return SimpleColumn(att) else: raise NameError, `self`+": invalid ordering specification" def __repr__(self): return "" % (self.thing,) class descOb: """special wrapper only used for sorting in descending order should only be compared with other descOb instances. should only wrap items that cannot be easily "order inverted", (eg, strings). """ def __init__(self, ob): self.ob = ob def __cmp__(self, other): #test = cmp(self.__class__, other.__class__) #if test: return test return -cmp(self.ob,other.ob) def __coerce__(self, other): return (self, other) def __hash__(self): return hash(self.ob) def __repr__(self): return "descOb(%s)" % (self.ob,) def PositionedSort(num, ord): nc = NumberedColumn(num) if ord=="DESC": return DescExpr(nc) return nc def NamedSort(name, ord): oe = OrderExpr(name) if ord=="DESC": return DescExpr(oe) return oe def relbind_sequence(order_list, dict, db): result = list(order_list) for i in xrange(len(order_list)): result[i] = result[i].relbind(dict,db) return result def orderbind_sequence(order_list, order): result = list(order_list) for i in xrange(len(order_list)): result[i] = result[i].orderbind(order) return result def order_tuples(order_list, tuples): lorder_list = len(order_list) ltuples = len(tuples) if lorder_list<1: raise ValueError, "order on empty list?" order_map = list(order_list) for i in xrange(lorder_list): order_map[i] = order_list[i].value(tuples) if len(order_map)>1: order_vector = apply(map, (None,)+tuple(order_map) ) else: order_vector = order_map[0] #G = kjbuckets.kjGraph() pairs = map(None, range(ltuples), tuples) ppairs = map(None, order_vector, pairs) G = kjbuckets.kjGraph(ppairs) #for i in xrange(ltuples): # G[ order_vector[i] ] = (i, tuples[i]) Gkeys = G.keys() Gkeys.sort() result = list(tuples) index = 0 for x in Gkeys: #print x for (i, y) in G.neighbors(x): #print " ", y result[index]=y index = index+1 if index!=ltuples: raise ValueError, \ "TUPLE LOST IN ORDERING COMPUTATION! (%s,%s)" % (ltuples, index) return result class BoundAddition(BoundExpression): """promised addition.""" op = "+" def value(self, contexts): from types import IntType tt = type lvs = self.left.value(contexts) rvs = self.right.value(contexts) for i in xrange(len(contexts)): if tt(contexts[i]) is not IntType: lvs[i] = lvs[i] + rvs[i] return lvs class BoundSubtraction(BoundExpression): """promised subtraction.""" op = "-" def value(self, contexts): from types import IntType tt = type lvs = self.left.value(contexts) rvs = self.right.value(contexts) for i in xrange(len(contexts)): if tt(contexts[i]) is not IntType: lvs[i] = lvs[i] - rvs[i] return lvs class BoundMultiplication(BoundExpression): """promised multiplication.""" op = "*" def value(self, contexts): from types import IntType tt = type lvs = self.left.value(contexts) rvs = self.right.value(contexts) #print lvs for i in xrange(len(contexts)): if tt(contexts[i]) is not IntType: lvs[i] = lvs[i] * rvs[i] return lvs class BoundDivision(BoundExpression): """promised division.""" op = "/" def value(self, contexts): from types import IntType tt = type lvs = self.left.value(contexts) rvs = self.right.value(contexts) for i in xrange(len(contexts)): if tt(contexts[i]) is not IntType: lvs[i] = lvs[i] / rvs[i] return lvs class BoundAttribute(BoundExpression): """bound attribute: initialize with relname=None if implicit.""" contains_aggregate = 0 def __init__(self, rel, name): self.rel = rel self.name = name def initargs(self): return (self.rel, self.name) def relbind(self, dict, db): if self.rel is not None: return self name = self.name try: rel = dict[name] except KeyError: raise NameError, `name` + ": unknown or ambiguous" return BoundAttribute(rel, name) def uncache(self): pass def __repr__(self): return "%s.%s" % (self.rel, self.name) def attribute(self): """return (rename, attribute) for self.""" return (self.rel, self.name) def domain(self): return kjbuckets.kjSet([ (self.rel, self.name) ]) def value(self, contexts): """return value of self in context (bound tuple).""" #print "value of ", self, "in", contexts from types import IntType tt = type result = list(contexts) ra = (self.rel, self.name) for i in xrange(len(result)): if tt(result[i]) is not IntType: result[i] = contexts[i][ra] return result def equate(self, other): oc = other.__class__ if oc==BoundAttribute: result = BoundTuple() result.equate([(self.attribute(), other.attribute())]) return BTPredicate(result) elif oc==Constant: result = BoundTuple() result.assns[ self.attribute() ] = other.value([1])[0] return BTPredicate(result) else: return NontrivialEqPred(self, other) class Constant(BoundExpression): contains_aggregate = 0 def __init__(self, value): self.value0 = value def __hash__(self): return hash(self.value0) def initargs(self): return (self.value0,) def domain(self): return kjbuckets.kjSet() def __add__(self, other): if other.__class__==Constant: return Constant(self.value0 + other.value0) return BoundAddition(self, other) def __sub__(self, other): if other.__class__==Constant: return Constant(self.value0 - other.value0) return BoundSubtraction(self, other) def __mul__(self, other): if other.__class__==Constant: return Constant(self.value0 * other.value0) return BoundMultiplication(self, other) def __neg__(self): return Constant(-self.value0) def __div__(self, other): if other.__class__==Constant: return Constant(self.value0 / other.value0) return BoundDivision(self, other) def relbind(self, dict, db): return self def uncache(self): pass def value(self, contexts): """return the constant value associated with self.""" return [self.value0] * len(contexts) def equate(self,other): if other.__class__==Constant: if other.value0 == self.value0: return BTPredicate() #true else: return ~BTPredicate() #false else: return other.equate(self) def attribute(self): """invent a pair to identify a constant""" return ('unbound', `self`) def __repr__(self): return str(self.value0) #return "" % (`self.value0`, id(self)) class TupleCollector: """Translate a sequence of assignments to simple tuples. (for implementing the select list of a SELECT). """ contains_aggregate = 0 contains_nonaggregate = 0 def __init__(self): self.final = None self.order = [] self.attorder = [] self.exporder = [] def initargs(self): return () def marshaldata(self): exps = map(serialize.serialize, self.exporder) return (self.attorder, exps, self.contains_aggregate, self.contains_nonaggregate) def demarshal(self, args): (self.attorder, exps, self.contains_aggregate, self.contains_nonaggregate) = args exporder = self.exporder = map(serialize.deserialize, exps) self.order = map(None, self.attorder, exporder) def uncache(self): for exp in self.exporder: exp.uncache() def domain(self): all=[] for e in self.exporder: all = all+e.domain().items() return kjbuckets.kjSet(all) def __repr__(self): l = [] for (att, exp) in self.order: l.append( "%s as %s" % (exp, att) ) return ', '.join(l) def addbinding(self, attribute, expression): """bind att>expression.""" self.order.append((attribute, expression) ) self.attorder.append(attribute ) self.exporder.append(expression) if expression.contains_aggregate: self.contains_aggregate = 1 else: self.contains_nonaggregate = 1 def map(self, assnlist): """remap btlist by self. return (tuplelist, attorder)""" # DON'T eliminate nulls from types import IntType tt = type values = [] for exp in self.exporder: values.append(exp.value(assnlist)) if len(values)>1: valtups = apply(map, (None,) + tuple(values) ) else: valtups = values[0] kjUndump = kjbuckets.kjUndump undumper = tuple(self.attorder) for i in xrange(len(valtups)): test = assnlist[i] if tt(test) is IntType or test is None: valtups[i] = 0 # null/false else: tup = valtups[i] valtups[i] = kjUndump(undumper, tup) return (valtups, self.attorder) def relbind(self, dict, db): """disambiguate missing rel names if possible. also choose output names appropriately.""" # CURRENTLY THIS IS AN "IN PLACE" OPERATION order = self.order attorder = self.attorder exporder = self.exporder known = {} for i in xrange(len(order)): (att, exp) = order[i] #print exp exp = exp.relbind(dict, db) if att is None: # choose a name for this column #print exp (rname, aname) = exp.attribute() if known.has_key(aname): both = rname+"."+aname att = both count = 0 while known.has_key(att): # crank away! count = count+1 att = both+"."+`count` else: att = aname else: if known.has_key(att): raise NameError, `att`+" ambiguous in select list" order[i] = (att, exp) exporder[i] = exp attorder[i] = att known[att] = att return self class BTPredicate(SimpleRecursive): """superclass for bound tuple predicates. Eventually should be modified to use "compile" for speed to generate an "inlined" evaluation function. self(bt) returns bt with additional equality constraints (possible) or None if predicate fails.""" false = 0 constraints = None contains_aggregate = 0 def __init__(self, constraints=None): """default interpretation: True.""" if constraints is not None: self.constraints = constraints.close() def initargs(self): return (self.constraints,) def relbind(self, dict, db): c = self.constraints if c is None: return self return BTPredicate( self.constraints.relbind(dict, db) ) def uncache(self): pass #def evaluate(self, db, relnames): #"""evaluate the predicate over database bindings.""" # pretty simple strategy right now... ### need to do something about all/distinct... #c = self.constraints #if c is None: # c = BoundTuple() #order = c.relorder(db, relnames) #if not order: # raise ValueError, "attempt to evaluate over no relations: "+`relnames` #result = [c] #for r in order: # result = hashjoin(result, r, db[r]) #if self.__class__==BTPredicate: # # if it's just equality conjunction, we're done # return result #else: # # apply additional constraints # return self(result) def domain(self): c = self.constraints kjSet = kjbuckets.kjSet if c is None: return kjSet() return c.domain() def __repr__(self): if self.false: return "FALSE" c = self.constraints if c is None: c = "true" return "[pred](%s)" % c def detrivialize(self): """hook added to allow elimination of trivialities return None if completely true, or simpler form or self, if no simplification is possible.""" if self.false: return self if not self.constraints: return None return self def negated_constraints(self): """equality constraints always false of satisfactory tuple.""" return BoundTuple() # there aren't any def __call__(self, assignments, toplevel=0): """apply self to sequence of assignments return copy of asssignments with false results replaced by 0! Input may have 0's!""" # optimization # if toplevel, the btpred has been evaluated during join. if toplevel: return list(assignments) from types import IntType tt = type lbt = len(assignments) if self.false: return [0] * lbt c = self.constraints if c is None or not c: result = assignments[:] # no constraints else: assns = c.assns eqs = c.eqs eqsinteresting = 0 for (a,b) in eqs.items(): if a!=b: eqsinteresting = 1 result = assignments[:] for i in xrange(lbt): this = assignments[i] #print "comparing", self, "to", this if type(this) is IntType: continue this = (this + assns).Clean() if this is None: result[i] = 0 elif eqsinteresting: this = this.remap(eqs) if this is None: result[i] = 0 return result def __and__(self, other): """NOTE: all subclasses must define an __and__!!!""" #print "BTPredicate.__and__", (self, other) if self.__class__==BTPredicate and other.__class__==BTPredicate: c = self.constraints o = other.constraints if c is None: return other if o is None: return self if self.false: return self if other.false: return other # optimization for simple constraints all = (c+o) result = BTPredicate( all ) # all constraints if all is None: result.false = 1 else: result = other & self return result def __or__(self, other): if self.__class__==BTPredicate and other.__class__==BTPredicate: c = self.constraints o = other.constraints if c is None: return self # true dominates if o is None: return other if other.false: return self if self.false: return other if self == other: return self result = BTor_pred([self, other]) return result def __invert__(self): if self.false: return BTPredicate() if not self.constraints: result = BTPredicate() result.false = 1 return result return BTnot_pred(self) def __cmp__(self, other): test = cmp(other.__class__, self.__class__) if test: return test if self.false and other.false: return 0 return cmp(self.constraints, other.constraints) def __hash__(self): if self.false: return 11111 return hash(self.constraints) class BTor_pred(BTPredicate): def __init__(self, members, *othermembers): # replace any OR in members with its members #print "BTor_pred", members members = list(members) + list(othermembers) for m in members[:]: if m.__class__==BTor_pred: members.remove(m) members = members + m.members #print "before", members members = self.members = kjbuckets.kjSet(members).items() #print members for m in members[:]: if m.false: members.remove(m) self.constraints = None # common constraints for m in members: if m.contains_aggregate: self.contains_aggregate = 1 if members: # common constraints are those in all members constraints = members[0].constraints for m in members[1:]: mc = m.constraints if not constraints or not mc: constraints = None break constraints = constraints & mc self.constraints = constraints #print members def initargs(self): return ((),) + tuple(self.members) def relbind(self, dict, db): ms = [] for m in self.members: ms.append( m.relbind(dict, db) ) return BTor_pred(ms) def uncache(self): for m in self.members: m.uncache() def domain(self): all = BTPredicate.domain(self).items() for x in self.members: all = all + x.domain().items() return kjbuckets.kjSet(all) def __repr__(self): c = self.constraints m = self.members mr = map(repr, m) mr.sort() mr = ' | '.join(mr) if not mr: mr = "FALSE_OR" if c: mr = "[disj](%s and %s)" % (c, mr) return mr def detrivialize(self): """hook added to allow elimination of trivialities return None if completely true, or simpler form or self, if no simplification is possible.""" ms = self.members for i in xrange(len(ms)): ms[i] = ms[i].detrivialize() # now suck out subordinate ors someor = None for m in ms: if m.__class__== BTor_pred: someor = m ms.remove(m) break if someor is not None: result = someor for m in ms: result = result + m return result.detrivialize() allfalse = 1 for m in ms: if m is None: allfalse=0; break # true member allfalse = allfalse & m.false if allfalse: return ~BTPredicate() # boundary case ms[:] = filter(None, ms) if not ms: return None # all true. ms[:] = kjbuckets.kjSet(ms).items() if len(ms)==1: return ms[0] # or of 1 return self def __call__(self, boundtuples, toplevel=0): # apply common constraints first lbt = len(boundtuples) # boundary case for or is false members = self.members if not members: return [0] * lbt current = BTPredicate.__call__(self, boundtuples, toplevel) # now apply first alternative alt1 = members[0](current) # determine questionables questionables = current[:] rng = xrange(len(current)) from types import IntType for i in rng: if not isinstance(alt1[i], IntType): questionables[i] = 0 # now test other alternatives for m in self.members[1:]: passm = m(questionables) for i in rng: test = passm[i] if not isinstance(test, IntType): questionables[i] = 0 alt1[i] = test return alt1 def negated_constraints(self): """the negated constraints of an OR are the negated constraints of *all* members""" ms = self.members result = ms.negated_constraints() for m in ms[1:]: if not result: return result mc = m.negated_constraints() if not mc: return mc result = result & mc return result def __and__(self, other): """push "and" down""" newmembers = self.members[:] for i in xrange(len(newmembers)): newmembers[i] = newmembers[i] & other return BTor_pred(newmembers) def __or__(self, other): """collapse two ors, otherwise just add new member""" if self.__class__==BTor_pred and other.__class__==BTor_pred: return BTor_pred(self.members+other.members) return BTor_pred(self.members + [other]) def __invert__(self): """translate to and-not""" ms = self.members if not ms: return BTPredicate() # boundary case result = ~ms[0] for m in ms[1:]: result = result & ~m return result def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test kjSet = kjbuckets.kjSet test = cmp(kjSet(self.members), kjSet(other.members)) if test: return test return BTPredicate.__cmp__(self, other) def __hash__(self): return hash(kjbuckets.kjSet(self.members)) class BTnot_pred(BTPredicate): def __init__(self, thing): self.negated = thing self.contains_aggregate = thing.contains_aggregate self.constraints = thing.negated_constraints() def initargs(self): return (self.negated,) def relbind(self, dict, db): return BTnot_pred( self.negated.relbind(dict, db) ) def uncache(self): self.negated.uncache() def domain(self): result = BTPredicate.domain(self) + self.negated.domain() #print "neg domain is", `self`, result return result def __repr__(self): c = self.constraints n = self.negated r = "(NOT %s)" % n if c: r = "[neg](%s & %s)" % (c, r) return r def detrivialize(self): """hook added to allow elimination of trivialities return None if completely true, or simpler form or self, if no simplification is possible.""" # first, fix or/and/not precedence thing = self.negated if thing.__class__ == BTnot_pred: return thing.negated.detrivialize() if thing.__class__ == BTor_pred: # translate to and_not members = thing.members[:] for i in xrange(len(members)): members[i] = ~members[i] result = BTand_pred(members) return result.detrivialize() if thing.__class__ == BTand_pred: # translate to or_not members = thing.members[:] c = thing.constraints # precondition if c is not None: members.append(BTPredicate(c)) for i in xrange(len(members)): members[i] = ~members[i] result = BTor_pred(members) return result.detrivialize() self.negated = thing = self.negated.detrivialize() if thing is None: return ~BTPredicate() # uniquely false if thing.false: return None # uniquely true return self def __call__(self, boundtuples, toplevel=0): from types import IntType tt = type current = BTPredicate.__call__(self, boundtuples, toplevel) omit = self.negated(current) for i in xrange(len(current)): if tt(omit[i]) is not IntType: current[i]=0 return current def negated_constraints(self): """the negated constraints of a NOT are the negated constraints of the thing negated.""" return self.negated.constraints def __and__(self, other): """do the obvious thing.""" return BTand_pred([self, other]) def __or__(self, other): """do the obvious thing""" return BTor_pred([self, other]) def __invert__(self): return self.negated def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test test = cmp(self.negated,other.negated) if test: return test return BTPredicate.__cmp__(self,other) def __hash__(self): return hash(self.negated)^787876^hash(self.constraints) class BTand_pred(BTPredicate): def __init__(self, members, precondition=None, *othermembers): #print "BTand_pred", (members, precondition) members = list(members) + list(othermembers) members = self.members = kjbuckets.kjSet(members).items() self.constraints = precondition # common constraints if members: # common constraints are those in any member if precondition is not None: constraints = precondition else: constraints = BoundTuple() for i in xrange(len(members)): m = members[i] mc = m.constraints if mc: #print "constraints", constraints constraints = constraints + mc if constraints is None: break if m.__class__==BTPredicate: members[i] = None # subsumed above members = self.members = filter(None, members) for m in members: if m.contains_aggregate: self.contains_aggregate=1 ### consider propagating constraints down? self.constraints = constraints if constraints is None: self.false = 1 def initargs(self): #print "self.members", self.members #print "self.constraints", self.constraints #return (list(self.members), self.constraints) return ((), self.constraints) + tuple(self.members) def relbind(self, dict, db): ms = [] for m in self.members: ms.append( m.relbind(dict, db) ) c = self.constraints.relbind(dict, db) return BTand_pred(ms, c) def uncache(self): for m in self.members: m.uncache() def domain(self): all = BTPredicate.domain(self).items() for x in self.members: all = all + x.domain().items() return kjbuckets.kjSet(all) def __repr__(self): m = self.members c = self.constraints r = map(repr, m) if self.false: r.insert(0, "FALSE") r = ' AND '.join(r) r = "(%s)" % r if c: r = "[conj](%s and %s)" % (c, r) return r def detrivialize(self): """hook added to allow elimination of trivialities return None if completely true, or simpler form or self, if no simplification is possible.""" # first apply demorgan's law to push ands down # (exponential in worst case). #print "detrivialize" #print self ms = self.members some_or = None c = self.constraints for m in ms: if m.__class__==BTor_pred: some_or = m ms.remove(m) break if some_or is not None: result = some_or if c is not None: some_or = some_or & BTPredicate(c) for m in ms: result = result & m # preserves or/and precedence if result.__class__!=BTor_pred: raise "what the?" result = result.detrivialize() #print "or detected, returning" #print result return result for i in xrange(len(ms)): ms[i] = ms[i].detrivialize() ms[:] = filter(None, ms) if not ms: #print "returning boundary case of condition" if c is None: return None else: return BTPredicate(c).detrivialize() ms[:] = kjbuckets.kjSet(ms).items() if len(ms)==1 and c is None: #print "and of 1, returning" #print ms[0] return ms[0] # and of 1 return self def __call__(self, boundtuples, toplevel=0): # apply common constraints first current = BTPredicate.__call__(self, boundtuples, toplevel) for m in self.members: current = m(current) return current def negated_constraints(self): """the negated constraints of an AND are the negated constraints of *any* member""" ms = self.members result = BoundTuple() for m in ms: mc = m.negated_constraints() if mc: result = result + mc return result def __and__(self, other): """push "and" down if other is an or""" if other.__class__==BTor_pred: return other & self c = self.constraints # merge in other and if other.__class__==BTand_pred: allmem = self.members+other.members oc = other.constraints if c is None: c = oc elif oc is not None: c = c+oc return BTand_pred(allmem, c) return BTand_pred(self.members + [other], c) def __or__(self, other): """do the obvious thing.""" return BTor_pred([self, other]) def __invert__(self): """translate to or-not""" ms = self.members if not ms: return ~BTPredicate() # boundary case result = ~ms[0] for m in ms[1:]: result = result | ~m return result def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test kjSet = kjbuckets.kjSet test = cmp(kjSet(self.members), kjSet(other.members)) if test: return test return BTPredicate.__cmp__(self, other) def __hash__(self): return hash(kjbuckets.kjSet(self.members)) class NontrivialEqPred(BTPredicate): """equation of nontrivial expressions.""" def __init__(self, left, right): #print "making pred", self.__class__, left, right # maybe should used reflexivity... self.left = left self.right = right self.contains_aggregate = left.contains_aggregate or right.contains_aggregate def initargs(self): return (self.left, self.right) def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test test = cmp(self.right, other.right) if test: return test return cmp(other.left, other.left) def hash(self, other): return hash(self.left) ^ hash(self.right) def relbind(self, dict, db): Class = self.__class__ return Class(self.left.relbind(dict,db), self.right.relbind(dict,db) ) def uncache(self): self.left.uncache() self.right.uncache() def domain(self): return self.left.domain() + self.right.domain() op = "==" def __repr__(self): return "(%s)%s(%s)" % (self.left, self.op, self.right) def detrivialize(self): return self def __call__(self, assigns, toplevel=0): from types import IntType tt = type lv = self.left.value(assigns) rv = self.right.value(assigns) result = assigns[:] for i in xrange(len(assigns)): t = assigns[i] if type(t) is not IntType and lv[i]!=rv[i]: result[i] = 0 return result def negated_constraints(self): return None def __and__(self, other): return BTand_pred( [self, other] ) def __or__(self, other): return BTor_pred( [self, other] ) def __invert__(self): return BTnot_pred(self) class BetweenPredicate(NontrivialEqPred): """e1 BETWEEN e2 AND e3""" def __init__(self, middle, lower, upper): self.middle = middle self.lower = lower self.upper = upper def initargs(self): return (self.middle, self.lower, self.upper) def domain(self): return ( self.middle.domain() + self.lower.domain() + self.upper.domain()) def relbind(self, dict, db): self.middle = self.middle.relbind(dict, db) self.lower = self.lower.relbind(dict, db) self.upper = self.upper.relbind(dict, db) return self def uncache(self): self.middle.uncache() self.upper.uncache() self.lower.uncache() def __repr__(self): return "(%s BETWEEN %s AND %s)" % ( self.middle, self.lower, self.upper) def __hash__(self): return hash(self.middle)^~hash(self.lower)^hash(self.upper)^55 def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test test = cmp(self.lower, other.lower) if test: return test test = cmp(self.middle, other.middle) if test: return test return cmp(self.upper, other.upper) def __call__(self, assigns, toplevel=0): from types import IntType tt = type lowv = self.lower.value(assigns) upv = self.upper.value(assigns) midv = self.middle.value(assigns) result = assigns[:] for i in xrange(len(assigns)): t = assigns[i] if tt(t) is not IntType: midvi = midv[i] if lowv[i]>midvi or upv[i]1: raise ValueError, \ "Quantified predicate requires unit select list: %s" % atts self.att = atts[0] return self fmt = "(%s %s ANY %s)" op = "=" def __repr__(self): return self.fmt % (self.expr, self.op, self.subq) def __call__(self, assigns, toplevel=0): cached_column = self.cached_column cachable = self.cachable expr = self.expr subq = self.subq att = self.att if cachable: if cached_column is None: subqr = subq.eval().rows() cc = self.cached_column = dump_single_column(subqr, att) #print self, "cached", self.cached_column exprvals = expr.value(assigns) kjDict = kjbuckets.kjDict compare = self.compare tt = type from types import IntType result = assigns[:] for i in xrange(len(assigns)): assignsi = assigns[i] if tt(assignsi) is IntType: continue thisval = exprvals[i] testbtup = BoundTuple() testbtup.assns = kjDict(assignsi) if not cachable: subqr = subq.eval(outerboundtuple=testbtup).rows() cc = dump_single_column(subqr, att) #print self, "uncached", cc, thisval if not compare(thisval, cc): #print "eliminated", assignsi result[i] = 0 return result def compare(self, value, column): return value in column def __hash__(self): return hash(self.subq) ^ ~hash(self.expr) def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test test = cmp(self.expr, other.expr) if test: return test return cmp(self.subq, other.subq) # "expr IN (subq)" same as "expr = ANY (subq)" InPredicate = QuantEQ class InLits(NontrivialEqPred): """expr IN literals, support dynamic bindings.""" def __init__(self, expr, lits): self.expr = expr self.lits = lits self.cached_lits = None def initargs(self): return (self.expr, self.lits) def uncache(self): self.cached_lits = None def domain(self): d = [] for l in self.lits: d0 = l.domain() if d0: d = d + d0.items() d0 = self.expr.domain() if d: kjSet = kjbuckets.kjSet return d0 + kjSet(d) else: return d0 def relbind(self, dict, db): newlits = [] for l in self.lits: newlits.append(l.relbind(dict, db)) self.lits = newlits self.expr = self.expr.relbind(dict, db) return self fmt = "(%s IN %s)" def __repr__(self): return self.fmt % (self.expr, self.lits) def __call__(self, assigns, toplevel=0): # LITERALS ARE CONSTANT! NEED ONLY LOOK FOR ONE ASSIGN. tt = type from types import IntType litvals = self.cached_lits if litvals is None: assigns0 = [] for asn in assigns: if tt(asn) is not IntType: assigns0.append(asn) break if not assigns0: # all false/unknown return assigns litvals = [] for lit in self.lits: value = lit.value(assigns0) litvals.append(value[0]) self.cached_lits = litvals expr = self.expr exprvals = expr.value(assigns) result = assigns[:] for i in xrange(len(assigns)): assignsi = assigns[i] if tt(assignsi) is IntType: continue thisval = exprvals[i] if thisval not in litvals: #print "eliminated", assignsi result[i] = 0 return result def compare(self, value, column): return value in column def __hash__(self): return 10 ^ hash(self.expr) def __cmp__(self, other): test = cmp(self.__class__, other.__class__) if test: return test test = cmp(self.expr, other.expr) if test: return test return cmp(self.lits, other.lits) class QuantNE(QuantEQ): """Quantified not equal any predicate""" op = "<>" def compare(self, value, column): for x in column: if value!=x: return 1 return 0 ### note: faster NOT IN using QuantNE? class QuantLT(QuantEQ): """Quantified less than any predicate""" op = "<" def uncache(self): self.testval = self.cached = self.cached_column = None def compare(self, value, column): if self.cachable: if self.cached: testval = self.testval else: testval = self.testval = max(column) self.cached = 1 else: testval = max(column) return value < testval class QuantLE(QuantLT): """Quantified less equal any predicate""" op = "<=" def compare(self, value, column): if self.cachable: if self.cached: testval = self.testval else: testval = self.testval = max(column) self.cached = 1 else: testval = max(column) return value <= testval class QuantGE(QuantLT): """Quantified greater equal any predicate""" op = ">=" def compare(self, value, column): if self.cachable: if self.cached: testval = self.testval else: testval = self.testval = min(column) self.cached = 1 else: testval = min(column) return value >= testval class QuantGT(QuantLT): """Quantified greater than any predicate""" op = ">" def compare(self, value, column): if self.cachable: if self.cached: testval = self.testval else: self.testval = testval = min(column) self.cached = 1 else: testval = min(column) return value > testval def dump_single_column(assigns, att): """dump single column assignment""" result = assigns[:] for i in xrange(len(result)): result[i] = result[i][att] return result class LessPred(NontrivialEqPred): op = "<" def __call__(self, assigns, toplevel=0): from types import IntType #print '***********************************************' #print self.left, self.right #print assigns lv = self.left.value(assigns) rv = self.right.value(assigns) result = assigns[:] for i in xrange(len(assigns)): t = assigns[i] if not isinstance(t, IntType) and lv[i] >= rv[i]: result[i] = 0 return result def __inv__(self): return LessEqPred(self.right, self.left) def __hash__(self): return hash(self.left)^hash(self.right) class LessEqPred(LessPred): op = "<=" def __call__(self, assigns, toplevel=0): from types import IntType tt = type lv = self.left.value(assigns) rv = self.right.value(assigns) result = assigns[:] for i in xrange(len(assigns)): t = assigns[i] if not isinstance(t, IntType) and lv[i] > rv[i]: result[i] = 0 return result def __inv__(self): return LessPred(self.right, self.left) class SubQueryExpression(BoundMinus, SimpleRecursive): """sub query expression (subq), must eval to single column, single value""" def __init__(self, subq): self.subq = subq self.att = self.cachable = self.cached = self.cached_value = None def initargs(self): return (self.subq,) def uncache(self): self.cached = self.cached_value = None def domain(self): result = self.subq.unbound() if not result: self.cachable = 1 #print "expr subq domain", result return result def relbind(self, dict, db): subq = self.subq = self.subq.relbind(db, dict) # test that subquery is single column and determine att sl = subq.select_list atts = sl.attorder if len(atts)<>1: raise ValueError, \ "Quantified predicate requires unit select list: %s" % atts self.att = atts[0] return self def __repr__(self): return "(%s)" % self.subq def value(self, contexts): subq = self.subq att = self.att if self.cachable: if self.cached: cached_value = self.cached_value else: self.cached = 1 seval = subq.eval().rows() lse = len(seval) if lse<>1: raise ValueError, \ "const subquery expression must return 1 result: got %s" % lse self.cached_value = cached_value = seval[0][att] #print "const subq cached", cached_value return [cached_value] * len(contexts) from types import IntType tt = type result = contexts[:] kjDict = kjbuckets.kjDict for i in xrange(len(contexts)): contextsi = contexts[i] if tt(contextsi) is not IntType: testbtup = BoundTuple() testbtup.assns = kjDict(contextsi) #print "subq exp", testbtup seval = subq.eval(outerboundtuple=testbtup).rows() lse = len(seval) if lse<>1: raise ValueError, \ "dynamic subquery expression must return 1 result: got %s" % lse result[i] = seval[0][att] #print "nonconst subq uncached", result[i], contextsi return result SELECT_TEMPLATE = """\ SELECT %s %s FROM %s WHERE %s GROUP BY %s HAVING %s %s ORDER BY %s %s """ def dynamic_binding(ndynamic, dynamic): """create bindings from dynamic tuple for ndynamic parameters if a tuple is given create one if a list is given create many """ from types import ListType, TupleType if not dynamic: if ndynamic>0: raise ValueError, `ndynamic`+" dynamic parameters unbound" return [kjbuckets.kjDict()] ldyn = len(dynamic) undumper = map(None, [0]*ndynamic, range(ndynamic)) undumper = tuple(undumper) tdyn = type(dynamic) if tdyn is TupleType: ldyn = len(dynamic) if len(dynamic)!=ndynamic: raise ValueError, "%s,%s: wrong number of dynamics" % (ldyn,ndynamic) dynamic = [dynamic] elif tdyn is not ListType: raise TypeError, "dynamic parameters must be list or tuple" else: lens = map(len, dynamic) ndynamic = max(lens) if ndynamic!=min(lens): raise ValueError, "dynamic parameters of inconsistent lengths" undumper = map(None, [0]*ndynamic, range(ndynamic)) undumper = tuple(undumper) result = list(dynamic) kjUndump = kjbuckets.kjUndump for i in xrange(len(dynamic)): dyn = dynamic[i] ldyn = len(dyn) #print undumper, dyn if ldyn==1: dynresult = kjUndump(undumper, dyn[0]) else: dynresult = kjUndump(undumper, dyn) result[i] = dynresult return result class Selector: """For implementing, eg the SQL SELECT statement.""" def __init__(self, alldistinct, select_list, table_reference_list, where_pred, group_list, having_cond, union_select =None, order_by_spec =None, ndynamic=0, # number of dyn params expected ): self.ndynamic = ndynamic self.alldistinct = alldistinct self.select_list = select_list self.table_list = table_reference_list self.where_pred = where_pred self.group_list = group_list self.having_cond = having_cond self.union_select = union_select self.order_by = order_by_spec #self.union_spec = "DISTINCT" # default union mode self.relbindings = None # binding of relations self.unbound_set = None # unbound attributes self.rel_atts = None # graph of alias>attname bound in self self.all_aggregate = 0 if select_list!="*" and not group_list: if select_list.contains_aggregate: ### should restore this check somewhere else! #if select_list.contains_nonaggregate: #raise ValueError, "aggregates/nonaggregates don't mix without grouping" self.all_aggregate = 1 if where_pred and where_pred.contains_aggregate: raise ValueError, "aggregate in WHERE" self.query_plan = None def initargs(self): #print self.alldistinct #print self.select_list #print self.table_list #print self.where_pred #print self.having_cond #print self.union_select #print self.group_list #print self.order_by #print self.ndynamic # note: order by requires special handling return (self.alldistinct, self.select_list, self.table_list, self.where_pred, None, self.having_cond, self.union_select, None, self.ndynamic) def marshaldata(self): order_by = self.order_by if order_by: order_by = map(serialize.serialize, order_by) group_list = self.group_list if group_list: group_list = map(serialize.serialize, group_list) #print "marshaldata" #print order_by #print group_list return (order_by, group_list) def demarshal(self, data): (order_by, group_list) = data if order_by: order_by = map(serialize.deserialize, order_by) if group_list: group_list = map(serialize.deserialize, group_list) #print "demarshal" #print order_by #print group_list self.order_by = order_by self.group_list = group_list def unbound(self): result = self.unbound_set if result is None: raise ValueError, "binding not available" return result def uncache(self): wp = self.where_pred hc = self.having_cond sl = self.select_list if wp is not None: wp.uncache() if hc is not None: hc.uncache() sl.uncache() qp = self.query_plan if qp: for joiner in qp: joiner.uncache() def relbind(self, db, outerbindings=None): ad = self.alldistinct sl = self.select_list tl = self.table_list wp = self.where_pred gl = self.group_list hc = self.having_cond us = self.union_select ob = self.order_by test = db.bindings(tl) #print len(test) #for x in test: #print x (attbindings, relbindings, ambiguous, ambiguousatts) = test if outerbindings: # bind in outerbindings where unambiguous for (a,r) in outerbindings.items(): if ((not attbindings.has_key(a)) and (not ambiguousatts.has_key(a)) ): attbindings[a] = r # fix "*" select list if sl=="*": sl = TupleCollector() for (a,r) in attbindings.items(): sl.addbinding(None, BoundAttribute(r,a)) for (dotted, (r,a)) in ambiguous.items(): sl.addbinding(dotted, BoundAttribute(r,a)) sl = sl.relbind(attbindings, db) wp = wp.relbind(attbindings, db) if hc is not None: hc = hc.relbind(attbindings, db) if us is not None: us = us.relbind(db, attbindings) # bind grouping if present if gl: gl = relbind_sequence(gl, attbindings, db) # bind ordering list if present #print ob if ob: ob = relbind_sequence(ob, attbindings, db) ob = orderbind_sequence(ob, sl.order) result = Selector(ad, sl, tl, wp, gl, hc, us, ob) result.relbindings = relbindings result.ndynamic = self.ndynamic result.check_domains() result.plan_query() query_plan = result.query_plan for i in range(len(query_plan)): query_plan[i] = query_plan[i].relbind(db, attbindings) return result def plan_query(self): """generate a query plan (sequence of join operators).""" rel_atts = self.rel_atts # rel>attname where_pred = self.where_pred.detrivialize() #select_list = self.select_list # shortcut if where_pred is None: bt = BoundTuple() else: bt = self.where_pred.constraints if bt is None: bt = BoundTuple() eqs = kjbuckets.kjGraph(bt.eqs) witness = kjbuckets.kjDict() # set all known and unbound atts as witnessed for att in bt.assns.keys(): witness[att] = 1 #print self, "self.unbound_set", self.unbound_set for att in self.unbound_set.items(): witness[att] = 1 relbindings = self.relbindings allrels = relbindings.keys() #print relbindings allrels = bt.relorder(relbindings, allrels) #print allrels rel_atts = self.rel_atts plan = [] for rel in allrels: relation = relbindings[rel] ratts = rel_atts.neighbors(rel) h = HashJoiner(bt, rel, ratts, relation, witness) plan.append(h) for a in ratts: ra = (rel, a) witness[ra] = 1 eqs[ra] = ra witness = witness.remap(eqs) self.query_plan = plan def check_domains(self): """determine set of unbound names in self. """ relbindings = self.relbindings sl = self.select_list wp = self.where_pred gl = self.group_list hc = self.having_cond us = self.union_select all = sl.domain().items() if wp is not None: all = all + wp.domain().items() # ignore group_list ??? if hc is not None: all = all + hc.domain().items() kjSet = kjbuckets.kjSet kjGraph = kjbuckets.kjGraph alldomain = kjSet(all) rel_atts = self.rel_atts = kjGraph(all) allnames = kjSet() #print "relbindings", relbindings.keys() for name in relbindings.keys(): rel = relbindings[name] for att in rel.attributes(): allnames[ (name, att) ] = 1 # union compatibility check if us is not None: us.check_domains() myatts = self.attributes() thoseatts = us.attributes() if myatts!=thoseatts: if len(myatts)!=len(thoseatts): raise IndexError, "outer %s, inner %s: union select lists lengths differ"\ % (len(myatts), len(thoseatts)) for p in map(None, myatts, thoseatts): (x,y)=p if x!=y: raise NameError, "%s union names don't match" % (p,) self.unbound_set = alldomain - allnames def attributes(self): return self.select_list.attorder def eval(self, dynamic=None, outerboundtuple=None): """leaves a lot to be desired. dynamic and outerboundtuple are mutually exclusive. dynamic is only pertinent to top levels, outerboundtuple to subqueries""" # only uncache if outerboundtuple is None (not subquery) # ??? if outerboundtuple is None: self.uncache() query_plan = self.query_plan where_pred = self.where_pred.detrivialize() select_list = self.select_list # shortcut if where_pred is not None and where_pred.false: return store.Relation0(select_list.attorder, []) #print "where_pred", where_pred if where_pred is None or where_pred.constraints is None: assn0 = assn1 = kjbuckets.kjDict() else: assn1 = self.where_pred.constraints.assns assn0 = assn1 = kjbuckets.kjDict(assn1) # erase stored results from possible previous evaluation ndynamic = self.ndynamic if outerboundtuple is not None: assn1 = assn1 + outerboundtuple.assns elif ndynamic: dyn = dynamic_binding(ndynamic, dynamic) if len(dyn)!=1: raise ValueError, "only one dynamic subst for selection allowed" dyn = dyn[0] assn1 = assn1 + dyn #print "dynamic", bt #print "assn1", assn1 # check unbound names unbound_set = self.unbound_set #print "unbound", unbound_set #print unbound_set #print self.rel_atts for pair in unbound_set.items(): if not assn1.has_key(pair): raise KeyError, `pair`+": unbound in selection" assn1 = (unbound_set * assn1) + assn0 #print "assn1 now", assn1 substseq = [assn1] for h in query_plan: #print "***" #for x in substseq: #print x #print "***" substseq = h.join(substseq) if not substseq: break #print "***" #for x in substseq: #print x #print "***" # apply the rest of the where predicate at top level if substseq and where_pred is not None: #where_pred.uncache() substseq = where_pred(substseq, 1) # eliminate zeros/nulls substseq = no_ints_nulls(substseq) # apply grouping if present group_list = self.group_list if substseq and group_list: substseq = aggregate(substseq, group_list) having_cond = self.having_cond #print having_cond if having_cond is not None: #having_cond.uncache() substseq = no_ints_nulls(having_cond(substseq)) elif self.all_aggregate: # universal group substseq = [kjbuckets.kjDict( [(None, substseq)] ) ] (tups, attorder) = select_list.map(substseq) # do UNION if present union_select = self.union_select if union_select is not None: tups = union_select.eval(tups, dynamic, outerboundtuple) # apply DISTINCT if appropriate if self.alldistinct=="DISTINCT": tups = kjbuckets.kjSet(tups).items() # apply ordering if present ob = self.order_by if ob: tups = order_tuples(ob, tups) return store.Relation0(attorder, tups) def __repr__(self): ndyn = "" if self.ndynamic: ndyn = "\n[%s dynamic parameters]" % self.ndynamic result = SELECT_TEMPLATE % ( self.alldistinct, self.select_list, self.table_list, self.where_pred, self.group_list, self.having_cond, #union_spec, self.union_select, self.order_by, ndyn ) return result class Union(SimpleRecursive): """union clause.""" def __init__(self, alldistinct, selection): self.alldistinct = alldistinct self.selection = selection def initargs(self): return (self.alldistinct, self.selection) def unbound(self): return self.selection.unbound() def relbind(self, db, outer=None): self.selection = self.selection.relbind(db, outer) return self def check_domains(self): self.selection.check_domains() def attributes(self): return self.selection.attributes() def eval(self, assns, dyn=None, outer=None): r = self.selection.eval(dyn, outer) rows = r.rows() allrows = rows + assns if self.alldistinct=="DISTINCT": allrows = kjbuckets.kjSet(allrows).items() return allrows def __repr__(self): return "\nUNION %s %s " % (self.alldistinct, self.selection) class Intersect(Union): def eval(self, assns, dyn=None, outer=None): r = self.selection.eval(dyn, outer) rows = r.rows() kjSet = kjbuckets.kjSet allrows = (kjSet(assns) & kjSet(rows)).items() return allrows op = "INTERSECT" def __repr__(self): return "\n%s %s" % (self.op, self.selection) class Except(Union): def eval(self, assns, dyn=None, outer=None): r = self.selection.eval(dyn, outer) rows = r.rows() kjSet = kjbuckets.kjSet allrows = (kjSet(assns) - kjSet(rows)).items() return allrows op = "EXCEPT" class Parse_Context: """contextual information for parsing p.param() returns a new sequence number for external parameter. """ # not serializable parameter_index = 0 # no __init__ yet def param(self): temp = self.parameter_index self.parameter_index = temp+1 return temp def ndynamic(self): return self.parameter_index # update/delete/insert statements import operations CreateTable = operations.CreateTable CreateIndex = operations.CreateIndex DropIndex = operations.DropIndex DropTable = operations.DropTable UpdateOp = operations.UpdateOp DeleteOp = operations.DeleteOp InsertOp = operations.InsertOp InsertValues = operations.InsertValues InsertSubSelect = operations.InsertSubSelect ColumnDef = operations.ColumnDef CreateView = operations.CreateView DropView = operations.DropView # update storage structures from the database store import store Add_Tuples = store.Add_Tuples Erase_Tuples = store.Erase_Tuples Reset_Tuples = store.Reset_Tuples # # $Log: semantics.py,v $ # Revision 1.7 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.6 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.5 2002/05/07 23:19:02 richard # Removed circular import (at import time at least) # # Revision 1.4 2002/05/07 14:33:00 anthonybaxter # added a couple of 'what the??' comments. # fixed a spot where float division would cause breakge in future (in Median) # # Revision 1.3 2002/05/07 09:48:54 anthonybaxter # argh argh bastard. Average, or any op derived from it, # assumed that the first element would always be there. # in the case of, say, the second part of an 'or' operation, # it might be missing. # # Revision 1.2 2002/05/07 04:03:14 richard # . major cleanup of test_gadfly # # Revision 1.1.1.1 2002/05/06 07:31:10 richard # # # gadfly-1.0.0/gadfly/serialize.py0100644000157700012320000000433707467104371015615 0ustar rjonestech''' essentially, specialized pickle for this app: :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: serialize.py,v 1.4 2002/05/11 02:59:05 richard Exp $: ''' # TODO need to fix serialization/deserialization of btand and btor import types def serialize(ob): """ Simple protocol for generating a marshallable ob TODO: I'm worried that tuples are special cases here... """ if not isinstance(ob, types.InstanceType): # base type return ob args1 = ob.initargs() args1 = tuple(map(serialize, args1)) args2 = ob.marshaldata() return (ob.__class__.__name__, (args1, args2)) def deserialize(description): """ Dual of serialize """ # base type if not isinstance(description, types.TupleType) or len(description) != 2: return description # pull out the class name and marshal data (name, desc) = description # TODO: these doesn't actually appear to be possible if name == "tuple": # tuple case return desc if name == "list": # list case: map deserialize across desc return map(deserialize, desc) # all other cases are classes of semantics import semantics klass = getattr(semantics, name) (args1, args2) = desc args1 = tuple(map(deserialize, args1)) ob = apply(klass, args1) ob.demarshal(args2) return ob # invariant: # deserialize(serialize(ob)) returns semantic copy of ob # serialize(ob) is marshallable # ie, # args1 = ob.initargs() # init args # args1d = map(serialize, args1) # serialized # args2 = ob.marshaldata() # marshalable addl info # # assert args1d, args2 are marshallable # args1copy = map(deserialize, args1) # ob2 = ob.__class__(args1copy) # ob2 = ob2.demarshal(args2) # # assert ob2 is semantic copy of ob # # $Log: serialize.py,v $ # Revision 1.4 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # gadfly-1.0.0/gadfly/server.py0100644000157700012320000003743407467201542015136 0ustar rjonestech import sys, socket, select, gadfly from gfsocket import reply_exception, reply_success, Packet_Reader, certify # general error ServerError = "ServerError" # no such prepared name PreparedNameError = "PreparedNameError" # actions # shut down the server (admin policy only) # arguments = () # shutdown the server with no checkpoint SHUTDOWN = "SHUTDOWN" # restart the server (admin only) # arguments = () # restart the server (recover) # no checkpoint RESTART = "RESTART" # checkpoint the server (admin only) # arguments = () # checkpoint the server CHECKPOINT = "CHECKPOINT" # exec prepared statement # arguments = (prepared_name_string, dyn=None) # execute the prepared statement with dynamic args. # autocommit. EXECUTE_PREPARED = "EXECUTE_PREPARED" # exec any statement (only if not disabled) # arguments = (statement_string, dyn=None) # execute the statement with dynamic args. # autocommit. EXECUTE_STATEMENT = "EXECUTE_STATEMENT" ACTIONS = [SHUTDOWN, RESTART, CHECKPOINT, EXECUTE_PREPARED, EXECUTE_STATEMENT] class Server: """database server: listen for commands""" verbose = 1 # wait X minutes on each server loop select_timeout = 60*5 # do a checkpoint each X times thru server loop check_loop = 5 # for now works like finger/http # == each command is a separate connection. # all sql commands constitute separate transactions # which are automatically committed upon success. # for now commands come in as # 1 length (marshalled int) # 2 (password, data) (marshalled tuple) # responses come back as # 1 length (marshalled int) # 2 results (marshalled value) def __init__(self, port, db, dr, pw, startup=None): self.port = port self.db = db self.dr = dr self.pw = pw self.startup = startup self.connection = None self.socket = None # prepared cursors dictionary. self.cursors = {} self.policies = {} self.admin_policy = None def start(self): """after init, listen for commands.""" from gfsocket import READY, ERROR, unpack_certified_data verbose = self.verbose socket = self.socket connection = self.connection policies = self.policies admin_policy = self.admin_policy pending_connects = {} while 1: try: # main loop if self.check_loop < 0: self.check_loop=5 for i in xrange(self.check_loop): if verbose: print "main loop on", socket, connection # checkpoint loop sockets = [socket] if pending_connects: sockets = sockets + pending_connects.keys() # wait for availability if verbose: print "server: waiting for connection(s)" (readables, dummy, errors) = select.select(\ sockets, [], sockets[:], self.select_timeout) if socket in errors: raise ServerError, \ "listening socket in error state: aborting" # clean up error connection sockets for s in errors: del pending_connects[s] s.close() # get a new connection, if available if socket in readables: readables.remove(socket) (conn, addr) = socket.accept() if 1 or verbose: print "connect %s" % (addr,) reader = Packet_Reader(conn) pending_connects[conn] = reader # poll readable pending connections, if possible for conn in readables: reader = pending_connects[conn] mode = reader.mode if not mode==READY: if mode == ERROR: # shouldn't happen try: conn.close() del pending_connects[conn] except: pass continue else: try: reader.poll() finally: pass # AFTER DEBUG CHANGE THIS! # in blocking mode, service ready request, # commit on no error for conn in pending_connects.keys(): reader = pending_connects[conn] mode = reader.mode if mode == ERROR: try: del pending_connects[conn] conn.close() except: pass elif mode == READY: try: del pending_connects[conn] data = reader.data (actor_name, cert, md) = \ unpack_certified_data(data) # find the policy for this actor if not policies.has_key(actor_name): if verbose: print "no such policy: "+actor_name reply_exception(NameError, "no such policy: "+actor_name, conn) policy = None else: if verbose: print "executing for", actor_name policy = policies[actor_name] policy.action(cert, md, conn) except SHUTDOWN: if policy is admin_policy: print \ "shutdown on admin policy: terminating" connection.close() socket.close() # NORMAL TERMINATION: return except RESTART: if policy is admin_policy: print \ "restart from admin policy: restarting connection" connection.restart() except CHECKPOINT: if policy is admin_policy: print \ "checkpoint from admin policy: checkpointing now." connection.checkpoint() except: tb = sys.exc_traceback info = "%s %s" % (sys.exc_type, str(sys.exc_value)) if verbose: from traceback import print_tb print_tb(tb) print "error in executing action: "+info reply_exception( ServerError, "exception: "+info, conn) #break # stop after first request serviced! except: # except of main while 1 try statement tb = sys.exc_traceback ty = sys.exc_type va = sys.exc_value print "UNEXPECTED EXCEPTION ON MAINLOOP" from traceback import print_tb print_tb(tb) print "exception:", ty, va if not pending_connects: pending_connects = {} print "server: checkpointing" connection.checkpoint() def init(self): self.getconnection() self.startup_load() # get socket last in case of failure earlier self.getsocket() HOST = "" BACKLOG = 5 def getsocket(self): """get the listening socket""" verbose = self.verbose import socket, sys if verbose: print "initializing listener socket" sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: if verbose: print "trying to set REUSEADDR",\ sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) except: if verbose: print "set of REUSEADDR failed", sys.exc_type, sys.exc_value pass sock.bind((self.HOST, self.port)) sock.listen(self.BACKLOG) self.socket = sock return sock def getconnection(self): """get the db connection""" from gadfly import gadfly c = self.connection = gadfly(self.db, self.dr) # don't automatically checkpoint upon commit c.autocheckpoint = 0 def startup_load(self): """setup the policies and load startup module""" admin_policy = self.get_admin_policy() module_name = self.startup if module_name: module = __import__(module_name) # startup(admin_policy, connection, Server_instance) test = module.startup(admin_policy, self.connection, self) if test is not None: self.policies = test self.policies["admin"] = admin_policy def get_admin_policy(self): """return the admin policy for priviledged access.""" p = self.admin_policy = Policy( "admin", self.pw, self.connection, queries=1) return p class Policy: """security policy""" verbose = 0 # allow arbitrary sql statments general_queries = 0 # dictionary of named accesses as strings named_accesses = None # dictionary of prepared named accesses prepared_cursors = None def __init__(self, name, password, connection, queries=0): """create a policy (name, password, connection) name is the name of the policy password is the access policy (None for no password) connection is the database connection. set queries to allow general accesses (unrestricted) """ if self.verbose: print "policy.__init__", name self.general_queries = queries self.name = name self.password = password self.connection = connection self.socket = None self.named_accesses = {} self.prepared_cursors = {} def __setitem__(self, name, value): if self.verbose: print "policy", self.name, ":", (name, value) from types import StringType if type(name) is not StringType or type(value) is not StringType: raise ValueError, "cursor names and contents must be strings" self.named_accesses[name] = value def execute_named(self, name, params=None): """execute a named (prepared) sql statement""" if self.verbose: print "policy", self.name, "executes", name, params na = self.named_accesses pc = self.prepared_cursors con = self.connection if not na.has_key(name): raise PreparedNameError, "unknown access name: %s" % name stat = na[name] if pc.has_key(name): # get prepared query cursor = pc[name] else: # prepare a new cursor pc[name] = cursor = con.cursor() return self.execute(cursor, stat, params) def execute(self, cursor, statement, params=None): """execute a statement in a cursor""" if self.verbose: print "policy", self.name, "executes", statement, params cursor.execute(statement, params) # immediate commit! self.connection.commit() try: result = cursor.fetchall() description = cursor.description result = (description, result) except: result = None return result def execute_any_statement(self, statement, params=None): """execute any statement.""" if self.verbose: print "policy", self.name, "executes", statement, params con = self.connection cursor = con.cursor() return self.execute(cursor, statement, params) def action(self, certificate, datastring, socket): """perform a database/server action after checking certificate""" verbose = self.verbose if verbose: print "policy", self.name, "action..." # make sure the certificate checks out if not self.certify(datastring, certificate, self.password): raise ServerError, "password certification failure" # unpack the datastring from marshal import loads test = loads(datastring) #if verbose: #print "data is", test (action, moredata) = test import sys if action in ACTIONS: action = "policy_"+action myaction = getattr(self, action) try: data = apply(myaction, moredata+(socket,)) #self.reply_success(data) # pass up server level requests as exceptions except SHUTDOWN, detail: raise SHUTDOWN, detail except RESTART, detail: raise RESTART, detail except CHECKPOINT, detail: raise CHECKPOINT, detail except: tb = sys.exc_traceback exceptiondata = "%s\n%s" %(sys.exc_type, str(sys.exc_value)) if verbose: from traceback import print_tb print_tb(tb) self.reply_exception(ServerError, "unexpected exception: "+exceptiondata, socket) raise ServerError, exceptiondata else: raise ServerError, "unknown action: "+`action` def certify(self, datastring, certificate, password): # hook for subclassing return certify(datastring, certificate, password) def policy_SHUTDOWN(self, socket): self.reply_success("attempting server shutdown", socket) raise SHUTDOWN, "please shut down the server" def policy_RESTART(self, socket): self.reply_success("attempting server restart", socket) raise RESTART, "please restart the server" def policy_CHECKPOINT(self, socket): self.reply_success("attempting server checkpoint", socket) raise CHECKPOINT, "please checkpoint the server" def policy_EXECUTE_PREPARED(self, name, dyn, socket): try: result = self.execute_named(name, dyn) self.reply_success(result, socket) except PreparedNameError, detail: self.reply_exception(PreparedNameError, "no such prepared statement: "+name, socket) def policy_EXECUTE_STATEMENT(self, stat, dyn, socket): if not self.general_queries: self.reply_exception(ServerError, "general statements disallowed on this policy", socket) raise ServerError, "illegal statement attempt for: "+self.name result = self.execute_any_statement(stat, dyn) self.reply_success(result, socket) def reply_exception(self, exc, info, socket): # hook for subclassing reply_exception(exc, info, socket) def reply_success(self, data, socket): # hook for subclassing reply_success(data, socket) gadfly-1.0.0/gadfly/sql.py0100644000157700012320000000256407467104371014425 0ustar rjonestech''' defines the SQL grammar and bahaviour for gadfly :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: sql.py,v 1.5 2002/05/11 02:59:05 richard Exp $: ''' def getSQL(): from grammar import DeclareTerminals from gadfly import kjParser infile = 'gadfly.sql_mar' try: SQLG = kjParser.UnMarshalGram(infile) except ImportError: raise ImportError, "Couldn't find sql_mar.py - has setup.py been run?" DeclareTerminals(SQLG) return SQLG # # $Log: sql.py,v $ # Revision 1.5 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.4 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.3 2002/05/07 07:55:34 anthonybaxter # more helpful message. # # Revision 1.2 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.1.1.1 2002/05/06 07:31:10 richard # # # gadfly-1.0.0/gadfly/sql_mar.py0100644000157700012320000015420407467213507015265 0ustar rjonestechtokens = [(-6, 'INTO'), (-7, 'boolean_factor'), (-7, 'primary'), (-7, 'column_constraint_definition'), (-7, 'drop_index_statement'), (-6, 'COUNT'), (-6, 'EXISTS'), (-6, 'CREATE'), (-7, 'sortspeclist'), (-6, 'INSERT'), (-6, 'DESC'), (-7, 'optorder_by'), (-6, 'INTEGER'), (-6, 'AS'), (-6, 'IN'), (-6, 'INTERSECT'), (-7, 'search_condition'), (-6, 'HAVING'), (-7, 'sliteral'), (-6, 'AND'), (-6, 'UPDATE'), (-7, 'character_string_type'), (-8, 'character_string_literal'), (-6, 'FROM'), (-6, 'AVG'), (-7, 'column_name'), (-7, 'approximate_numeric_type'), (-6, 'NOT'), (-6, 'ANY'), (-7, 'statement'), (-7, 'factor'), (-7, 'literal'), (-7, 'set_function_reference'), (-7, 'table_reference_list'), (-6, 'ORDER'), (-7, 'statement_list'), (-6, 'FLOAT'), (-7, 'create_view_statement'), (-7, 'insert_spec'), (-6, 'INDEX'), (-7, 'select_sublist'), (-7, 'predicate'), (-7, 'term'), (-7, 'boolean_primary'), (-6, 'GROUP'), (-6, '?'), (-7, 'insert_statement'), (-6, 'SELECT'), (-7, 'drop_table_statement'), (-7, 'comparison_predicate'), (-6, 'DROP'), (-7, 'select_statement'), (-6, '+'), (-6, ')'), (-6, '/'), (-6, 'UNION'), (-6, '-'), (-6, 'DISTINCT'), (-7, 'data_type'), (-7, 'colids'), (-7, 'assn'), (-7, 'delete_statement_searched'), (-6, 'VARCHAR'), (-6, ';'), (-7, 'opt_ord'), (-7, 'update_statement_searched'), (-7, 'litlist'), (-6, 'VIEW'), (-6, '='), (-7, 'table_name'), (-7, 'create_table_statement'), (-7, 'exists_predicate'), (-7, 'create_index_statement'), (-7, 'column_definition'), (-7, 'boolean_term'), (-7, 'assns'), (-7, 'optgroup'), (-6, 'UNIQUE'), (-7, 'colelts'), (-6, 'WHERE'), (-7, 'alldistinct'), (-7, 'optunion'), (-6, 'BETWEEN'), (-6, 'TABLE'), (-8, 'user_defined_name'), (-6, 'MAX'), (-7, 'exact_numeric_type'), (-6, 'ALL'), (-6, 'ON'), (-6, 'BY'), (-7, 'opthaving'), (-7, 'column_alias'), (-6, 'DELETE'), (-6, 'OR'), (-7, 'optdefault'), (-7, 'selectsubs'), (-7, 'sort_specification'), (-6, 'VALUES'), (-6, ','), (-6, 'SUM'), (-7, 'optwhere'), (-7, 'drop_view_statement'), (-7, 'sub_query'), (-6, 'MIN'), (-7, 'optcolconstraints'), (-7, 'namelist'), (-8, 'numeric_literal'), (-7, 'aggregate'), (-7, 'optcolids'), (-6, '*'), (-6, '('), (-6, '.'), (-6, 'EXCEPT'), (-6, 'MEDIAN'), (-6, 'SET'), (-7, 'allany'), (-7, 'colnamelist'), (-7, 'colelt'), (-8, '*'), (-7, 'expression'), (-7, 'column_identifier'), (-6, '>'), (-6, '<'), (-6, 'ASC'), (-7, 'select_list'), (-7, 'optnamelist')] punct = '.,*;=<>{}()?+-/' comments = ['--.*'] RuleTups = [('stat1', ((-7, 'statement_list'), [(-7, 'statement')])), ('statn', ((-7, 'statement_list'), [(-7, 'statement'), (-6, ';'), (-7, 'statement_list')])), ('dropindexstat', ((-7, 'statement'), [(-7, 'drop_index_statement')])), ('createindexstat', ((-7, 'statement'), [(-7, 'create_index_statement')])), ('selstat', ((-7, 'statement'), [(-7, 'select_statement')])), ('insstat', ((-7, 'statement'), [(-7, 'insert_statement')])), ('createtablestat', ((-7, 'statement'), [(-7, 'create_table_statement')])), ('droptablestat', ((-7, 'statement'), [(-7, 'drop_table_statement')])), ('delstat', ((-7, 'statement'), [(-7, 'delete_statement_searched')])), ('updatestat', ((-7, 'statement'), [(-7, 'update_statement_searched')])), ('createviewstat', ((-7, 'statement'), [(-7, 'create_view_statement')])), ('dropviewstat', ((-7, 'statement'), [(-7, 'drop_view_statement')])), ('dropview', ((-7, 'drop_view_statement'), [(-6, 'DROP'), (-6, 'VIEW'), (-8, 'user_defined_name')])), ('createview', ((-7, 'create_view_statement'), [(-6, 'CREATE'), (-6, 'VIEW'), (-8, 'user_defined_name'), (-7, 'optnamelist'), (-6, 'AS'), (-7, 'select_statement')])), ('optnamelist0', ((-7, 'optnamelist'), [])), ('optnamelistn', ((-7, 'optnamelist'), [(-6, '('), (-7, 'namelist'), (-6, ')')])), ('dropindex', ((-7, 'drop_index_statement'), [(-6, 'DROP'), (-6, 'INDEX'), (-8, 'user_defined_name')])), ('createindex', ((-7, 'create_index_statement'), [(-6, 'CREATE'), (-6, 'INDEX'), (-8, 'user_defined_name'), (-6, 'ON'), (-8, 'user_defined_name'), (-6, '('), (-7, 'namelist'), (-6, ')')])), ('createuniqueindex', ((-7, 'create_index_statement'), [(-6, 'CREATE'), (-6, 'UNIQUE'), (-6, 'INDEX'), (-8, 'user_defined_name'), (-6, 'ON'), (-8, 'user_defined_name'), (-6, '('), (-7, 'namelist'), (-6, ')')])), ('names1', ((-7, 'namelist'), [(-8, 'user_defined_name')])), ('namesn', ((-7, 'namelist'), [(-7, 'namelist'), (-6, ','), (-8, 'user_defined_name')])), ('update', ((-7, 'update_statement_searched'), [(-6, 'UPDATE'), (-8, 'user_defined_name'), (-6, 'SET'), (-7, 'assns'), (-7, 'optwhere')])), ('assn1', ((-7, 'assns'), [(-7, 'assn')])), ('assnn', ((-7, 'assns'), [(-7, 'assns'), (-6, ','), (-7, 'assn')])), ('assn', ((-7, 'assn'), [(-7, 'column_identifier'), (-6, '='), (-7, 'expression')])), ('deletefrom', ((-7, 'delete_statement_searched'), [(-6, 'DELETE'), (-6, 'FROM'), (-8, 'user_defined_name'), (-7, 'optwhere')])), ('droptable', ((-7, 'drop_table_statement'), [(-6, 'DROP'), (-6, 'TABLE'), (-8, 'user_defined_name')])), ('createtable', ((-7, 'create_table_statement'), [(-6, 'CREATE'), (-6, 'TABLE'), (-8, 'user_defined_name'), (-6, '('), (-7, 'colelts'), (-6, ')')])), ('colelts1', ((-7, 'colelts'), [(-7, 'colelt')])), ('coleltsn', ((-7, 'colelts'), [(-7, 'colelts'), (-6, ','), (-7, 'colelt')])), ('coleltid', ((-7, 'colelt'), [(-7, 'column_definition')])), ('coleltconstraint', ((-7, 'colelt'), [(-7, 'column_constraint_definition')])), ('coldef', ((-7, 'column_definition'), [(-7, 'column_identifier'), (-7, 'data_type'), (-7, 'optdefault'), (-7, 'optcolconstraints')])), ('optdef0', ((-7, 'optdefault'), [])), ('optcolconstr0', ((-7, 'optcolconstraints'), [])), ('stringtype', ((-7, 'data_type'), [(-7, 'character_string_type')])), ('exnumtype', ((-7, 'data_type'), [(-7, 'exact_numeric_type')])), ('appnumtype', ((-7, 'data_type'), [(-7, 'approximate_numeric_type')])), ('integer', ((-7, 'exact_numeric_type'), [(-6, 'INTEGER')])), ('float', ((-7, 'approximate_numeric_type'), [(-6, 'FLOAT')])), ('varchar', ((-7, 'character_string_type'), [(-6, 'VARCHAR')])), ('varcharn', ((-7, 'character_string_type'), [(-6, 'VARCHAR'), (-6, '('), (-8, 'numeric_literal'), (-6, ')')])), ('insert1', ((-7, 'insert_statement'), [(-6, 'INSERT'), (-6, 'INTO'), (-7, 'table_name'), (-7, 'optcolids'), (-7, 'insert_spec')])), ('optcolids0', ((-7, 'optcolids'), [])), ('optcolids1', ((-7, 'optcolids'), [(-6, '('), (-7, 'colids'), (-6, ')')])), ('colids1', ((-7, 'colids'), [(-7, 'column_identifier')])), ('colidsn', ((-7, 'colids'), [(-7, 'colids'), (-6, ','), (-7, 'column_identifier')])), ('insert_values', ((-7, 'insert_spec'), [(-6, 'VALUES'), (-6, '('), (-7, 'litlist'), (-6, ')')])), ('insert_query', ((-7, 'insert_spec'), [(-7, 'sub_query')])), ('litlist1', ((-7, 'litlist'), [(-7, 'sliteral')])), ('litlistn', ((-7, 'litlist'), [(-7, 'litlist'), (-6, ','), (-7, 'sliteral')])), ('sliteral0', ((-7, 'sliteral'), [(-7, 'literal')])), ('sliteralp', ((-7, 'sliteral'), [(-6, '+'), (-7, 'literal')])), ('sliterals', ((-7, 'sliteral'), [(-7, 'sliteral'), (-6, '+'), (-7, 'literal')])), ('sliterald', ((-7, 'sliteral'), [(-7, 'sliteral'), (-6, '-'), (-7, 'literal')])), ('sliteralm', ((-7, 'sliteral'), [(-6, '-'), (-7, 'literal')])), ('subselect', ((-7, 'sub_query'), [(-6, 'SELECT'), (-7, 'alldistinct'), (-7, 'select_list'), (-6, 'FROM'), (-7, 'table_reference_list'), (-7, 'optwhere'), (-7, 'optgroup'), (-7, 'opthaving'), (-7, 'optunion')])), ('selectx', ((-7, 'select_statement'), [(-7, 'sub_query'), (-7, 'optorder_by')])), ('ad0', ((-7, 'alldistinct'), [])), ('adall', ((-7, 'alldistinct'), [(-6, 'ALL')])), ('addistinct', ((-7, 'alldistinct'), [(-6, 'DISTINCT')])), ('where0', ((-7, 'optwhere'), [])), ('where1', ((-7, 'optwhere'), [(-6, 'WHERE'), (-7, 'search_condition')])), ('group0', ((-7, 'optgroup'), [])), ('group1', ((-7, 'optgroup'), [(-6, 'GROUP'), (-6, 'BY'), (-7, 'colnamelist')])), ('colnames1', ((-7, 'colnamelist'), [(-7, 'column_name')])), ('colnamesn', ((-7, 'colnamelist'), [(-7, 'colnamelist'), (-6, ','), (-7, 'column_name')])), ('having0', ((-7, 'opthaving'), [])), ('having1', ((-7, 'opthaving'), [(-6, 'HAVING'), (-7, 'search_condition')])), ('union0', ((-7, 'optunion'), [])), ('union1', ((-7, 'optunion'), [(-6, 'UNION'), (-7, 'alldistinct'), (-7, 'sub_query')])), ('except1', ((-7, 'optunion'), [(-6, 'EXCEPT'), (-7, 'sub_query')])), ('intersect1', ((-7, 'optunion'), [(-6, 'INTERSECT'), (-7, 'sub_query')])), ('order0', ((-7, 'optorder_by'), [])), ('order1', ((-7, 'optorder_by'), [(-6, 'ORDER'), (-6, 'BY'), (-7, 'sortspeclist')])), ('sortspec1', ((-7, 'sortspeclist'), [(-7, 'sort_specification')])), ('sortspecn', ((-7, 'sortspeclist'), [(-7, 'sortspeclist'), (-6, ','), (-7, 'sort_specification')])), ('sortint', ((-7, 'sort_specification'), [(-8, 'numeric_literal'), (-7, 'opt_ord')])), ('sortcol', ((-7, 'sort_specification'), [(-7, 'column_name'), (-7, 'opt_ord')])), ('optord0', ((-7, 'opt_ord'), [])), ('optordasc', ((-7, 'opt_ord'), [(-6, 'ASC')])), ('optorddesc', ((-7, 'opt_ord'), [(-6, 'DESC')])), ('trl1', ((-7, 'table_reference_list'), [(-8, 'user_defined_name')])), ('trln', ((-7, 'table_reference_list'), [(-8, 'user_defined_name'), (-6, ','), (-7, 'table_reference_list')])), ('trl1a', ((-7, 'table_reference_list'), [(-8, 'user_defined_name'), (-8, 'user_defined_name')])), ('trlna', ((-7, 'table_reference_list'), [(-8, 'user_defined_name'), (-8, 'user_defined_name'), (-6, ','), (-7, 'table_reference_list')])), ('trl1as', ((-7, 'table_reference_list'), [(-8, 'user_defined_name'), (-6, 'AS'), (-8, 'user_defined_name')])), ('trlnas', ((-7, 'table_reference_list'), [(-8, 'user_defined_name'), (-6, 'AS'), (-8, 'user_defined_name'), (-6, ','), (-7, 'table_reference_list')])), ('selectstar', ((-7, 'select_list'), [(-6, '*')])), ('selectsome', ((-7, 'select_list'), [(-7, 'selectsubs')])), ('select1', ((-7, 'selectsubs'), [(-7, 'select_sublist')])), ('selectn', ((-7, 'selectsubs'), [(-7, 'selectsubs'), (-6, ','), (-7, 'select_sublist')])), ('selectit', ((-7, 'select_sublist'), [(-7, 'expression')])), ('selectname', ((-7, 'select_sublist'), [(-7, 'expression'), (-6, 'AS'), (-7, 'column_alias')])), ('colalias', ((-7, 'column_alias'), [(-8, 'user_defined_name')])), ('search1', ((-7, 'search_condition'), [(-7, 'boolean_term')])), ('searchn', ((-7, 'search_condition'), [(-7, 'boolean_term'), (-6, 'OR'), (-7, 'search_condition')])), ('bool1', ((-7, 'boolean_term'), [(-7, 'boolean_factor')])), ('booln', ((-7, 'boolean_term'), [(-7, 'boolean_factor'), (-6, 'AND'), (-7, 'boolean_term')])), ('bf1', ((-7, 'boolean_factor'), [(-7, 'boolean_primary')])), ('notbf', ((-7, 'boolean_factor'), [(-6, 'NOT'), (-7, 'boolean_primary')])), ('bp1', ((-7, 'boolean_primary'), [(-7, 'predicate')])), ('bps', ((-7, 'boolean_primary'), [(-6, '('), (-7, 'search_condition'), (-6, ')')])), ('predicate1', ((-7, 'predicate'), [(-7, 'comparison_predicate')])), ('predicateeq', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '='), (-7, 'expression')])), ('predicatelt', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '<'), (-7, 'expression')])), ('predicategt', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '>'), (-7, 'expression')])), ('predicatele', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '<'), (-6, '='), (-7, 'expression')])), ('predicatege', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '>'), (-6, '='), (-7, 'expression')])), ('predicatene', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, '<'), (-6, '>'), (-7, 'expression')])), ('predbetween', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, 'BETWEEN'), (-7, 'expression'), (-6, 'AND'), (-7, 'expression')])), ('prednotbetween', ((-7, 'comparison_predicate'), [(-7, 'expression'), (-6, 'NOT'), (-6, 'BETWEEN'), (-7, 'expression'), (-6, 'AND'), (-7, 'expression')])), ('predexists', ((-7, 'predicate'), [(-7, 'exists_predicate')])), ('exists', ((-7, 'exists_predicate'), [(-6, 'EXISTS'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqeq', ((-7, 'predicate'), [(-7, 'expression'), (-6, '='), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqne', ((-7, 'predicate'), [(-7, 'expression'), (-6, '<'), (-6, '>'), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqlt', ((-7, 'predicate'), [(-7, 'expression'), (-6, '<'), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqgt', ((-7, 'predicate'), [(-7, 'expression'), (-6, '>'), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqle', ((-7, 'predicate'), [(-7, 'expression'), (-6, '<'), (-6, '='), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predqge', ((-7, 'predicate'), [(-7, 'expression'), (-6, '>'), (-6, '='), (-7, 'allany'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('nnall', ((-7, 'allany'), [(-6, 'ALL')])), ('nnany', ((-7, 'allany'), [(-6, 'ANY')])), ('predin', ((-7, 'predicate'), [(-7, 'expression'), (-6, 'IN'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('prednotin', ((-7, 'predicate'), [(-7, 'expression'), (-6, 'NOT'), (-6, 'IN'), (-6, '('), (-7, 'sub_query'), (-6, ')')])), ('predinlits', ((-7, 'predicate'), [(-7, 'expression'), (-6, 'IN'), (-6, '('), (-7, 'litlist'), (-6, ')')])), ('prednotinlits', ((-7, 'predicate'), [(-7, 'expression'), (-6, 'NOT'), (-6, 'IN'), (-6, '('), (-7, 'litlist'), (-6, ')')])), ('subqexpr', ((-7, 'expression'), [(-6, '('), (-7, 'sub_query'), (-6, ')')])), ('exp1', ((-7, 'expression'), [(-7, 'term')])), ('expplus', ((-7, 'expression'), [(-7, 'expression'), (-6, '+'), (-7, 'term')])), ('expminus', ((-7, 'expression'), [(-7, 'expression'), (-6, '-'), (-7, 'term')])), ('term1', ((-7, 'term'), [(-7, 'factor')])), ('termtimes', ((-7, 'term'), [(-7, 'term'), (-6, '*'), (-7, 'factor')])), ('termdiv', ((-7, 'term'), [(-7, 'term'), (-6, '/'), (-7, 'factor')])), ('factor1', ((-7, 'factor'), [(-7, 'primary')])), ('plusfactor', ((-7, 'factor'), [(-6, '+'), (-7, 'factor')])), ('minusfactor', ((-7, 'factor'), [(-6, '-'), (-7, 'factor')])), ('primary1', ((-7, 'primary'), [(-7, 'column_name')])), ('primarylit', ((-7, 'primary'), [(-7, 'literal')])), ('primaryexp', ((-7, 'primary'), [(-6, '('), (-7, 'expression'), (-6, ')')])), ('primaryset', ((-7, 'primary'), [(-7, 'set_function_reference')])), ('stringlit', ((-7, 'literal'), [(-8, 'character_string_literal')])), ('stringstring', ((-7, 'literal'), [(-7, 'literal'), (-8, 'character_string_literal')])), ('numlit', ((-7, 'literal'), [(-8, 'numeric_literal')])), ('countstar', ((-7, 'set_function_reference'), [(-6, 'COUNT'), (-6, '('), (-6, '*'), (-6, ')')])), ('distinctcount', ((-7, 'set_function_reference'), [(-6, 'COUNT'), (-6, '('), (-6, 'DISTINCT'), (-7, 'expression'), (-6, ')')])), ('allcount', ((-7, 'set_function_reference'), [(-6, 'COUNT'), (-6, '('), (-7, 'expression'), (-6, ')')])), ('distinctset', ((-7, 'set_function_reference'), [(-7, 'aggregate'), (-6, '('), (-6, 'DISTINCT'), (-7, 'expression'), (-6, ')')])), ('allset', ((-7, 'set_function_reference'), [(-7, 'aggregate'), (-6, '('), (-7, 'expression'), (-6, ')')])), ('average', ((-7, 'aggregate'), [(-6, 'AVG')])), ('maximum', ((-7, 'aggregate'), [(-6, 'MAX')])), ('minimum', ((-7, 'aggregate'), [(-6, 'MIN')])), ('summation', ((-7, 'aggregate'), [(-6, 'SUM')])), ('median', ((-7, 'aggregate'), [(-6, 'MEDIAN')])), ('dynamic', ((-7, 'literal'), [(-6, '?')])), ('columnname1', ((-7, 'column_name'), [(-7, 'column_identifier')])), ('columnname2', ((-7, 'column_name'), [(-7, 'table_name'), (-6, '.'), (-7, 'column_identifier')])), ('tablename1', ((-7, 'table_name'), [(-8, 'user_defined_name')])), ('columnid1', ((-7, 'column_identifier'), [(-8, 'user_defined_name')]))] MaxStates = 312 reducts = [(88, 79, 141), (158, 118, 78), (73, 97, 43), (231, 53, 104), (136, 93, 128), (251, 118, 47), (149, 19, 101), (145, 17, 97), (126, 109, 155), (181, 121, 147), (170, 98, 40), (272, 44, 87), (132, 121, 138), (57, 15, 157), (107, 98, 75), (51, 93, 139), (133, 53, 126), (152, 112, 112), (133, 27, 126), (91, 53, 134), (278, 99, 58), (178, 34, 143), (284, 53, 122), (136, 68, 128), (91, 14, 134), (245, 17, 85), (273, 112, 65), (79, 52, 157), (43, 53, 133), (181, 13, 147), (270, 63, 18), (133, 52, 126), (307, 93, 117), (284, 17, 122), (298, 53, 71), (266, 34, 109), (136, 34, 128), (68, 98, 92), (178, 68, 143), (161, 98, 23), (137, 118, 129), (128, 79, 131), (128, 19, 131), (28, 103, 59), (282, 52, 54), (208, 17, 144), (77, 17, 22), (274, 63, 64), (150, 112, 99), (132, 56, 138), (51, 68, 139), (235, 112, 105), (224, 17, 102), (178, 19, 143), (137, 44, 129), (48, 52, 136), (181, 53, 147), (88, 19, 141), (151, 55, 103), (93, 15, 135), (209, 93, 146), (181, 14, 147), (137, 19, 129), (274, 53, 64), (77, 53, 22), (136, 13, 128), (223, 53, 96), (275, 55, 68), (128, 118, 131), (88, 44, 141), (53, 93, 127), (8, 99, 58), (57, 109, 157), (132, 23, 138), (274, 55, 64), (63, 56, 142), (27, 56, 60), (88, 118, 141), (272, 79, 87), (74, 111, 156), (223, 112, 96), (57, 27, 157), (56, 121, 153), (9, 118, 7), (271, 98, 41), (263, 15, 107), (136, 63, 128), (310, 118, 118), (133, 15, 126), (263, 55, 107), (50, 79, 137), (302, 44, 114), (132, 98, 138), (179, 63, 145), (51, 122, 139), (46, 68, 140), (79, 15, 157), (109, 118, 79), (127, 17, 132), (63, 121, 142), (300, 17, 123), (51, 112, 139), (302, 118, 114), (28, 56, 59), (304, 17, 116), (183, 15, 84), (57, 55, 157), (167, 53, 35), (57, 52, 157), (138, 63, 82), (235, 15, 105), (171, 98, 37), (75, 63, 61), (63, 82, 142), (214, 112, 67), (268, 63, 108), (250, 98, 55), (127, 53, 132), (137, 79, 129), (7, 118, 5), (43, 112, 133), (56, 82, 153), (43, 44, 133), (50, 54, 137), (283, 19, 124), (239, 118, 106), (209, 109, 146), (57, 97, 156), (59, 19, 154), (56, 53, 153), (249, 53, 52), (50, 118, 137), (210, 17, 86), (127, 82, 132), (49, 34, 130), (111, 63, 21), (50, 19, 137), (49, 17, 130), (219, 53, 49), (28, 113, 59), (301, 93, 111), (93, 52, 135), (299, 17, 125), (162, 15, 24), (50, 44, 137), (56, 17, 153), (49, 122, 130), (283, 15, 124), (209, 13, 146), (210, 53, 86), (133, 14, 126), (174, 13, 15), (63, 14, 142), (212, 34, 83), (169, 53, 36), (299, 53, 125), (178, 112, 143), (127, 56, 132), (93, 27, 135), (63, 17, 142), (127, 121, 132), (59, 44, 154), (302, 112, 114), (212, 44, 83), (178, 122, 143), (49, 93, 130), (268, 93, 108), (179, 54, 145), (150, 63, 99), (274, 112, 64), (128, 93, 131), (151, 34, 103), (136, 112, 128), (132, 44, 138), (307, 63, 117), (57, 23, 157), (88, 93, 141), (153, 17, 95), (71, 118, 26), (283, 17, 124), (136, 23, 128), (136, 79, 128), (137, 34, 129), (162, 98, 24), (151, 44, 103), (28, 52, 59), (181, 27, 147), (88, 34, 141), (209, 19, 146), (235, 17, 105), (46, 55, 140), (28, 5, 59), (57, 56, 157), (151, 112, 103), (312, 93, 119), (126, 82, 155), (139, 17, 61), (48, 121, 136), (239, 17, 106), (62, 110, 150), (215, 56, 51), (63, 109, 142), (128, 34, 131), (48, 56, 136), (172, 63, 27), (153, 53, 95), (57, 93, 157), (150, 55, 99), (51, 27, 139), (209, 82, 146), (149, 93, 101), (147, 15, 62), (166, 98, 33), (231, 34, 104), (27, 22, 60), (136, 19, 128), (166, 53, 33), (266, 19, 109), (93, 82, 135), (50, 13, 137), (201, 118, 76), (255, 15, 113), (46, 14, 140), (126, 53, 155), (43, 23, 133), (273, 55, 65), (49, 14, 130), (307, 118, 117), (93, 56, 135), (91, 109, 134), (272, 34, 87), (266, 118, 109), (275, 63, 68), (136, 118, 128), (312, 112, 119), (128, 44, 131), (208, 112, 144), (79, 98, 157), (235, 118, 105), (159, 63, 80), (9, 63, 7), (273, 63, 65), (209, 68, 146), (299, 118, 125), (128, 54, 131), (14, 118, 9), (133, 98, 126), (56, 15, 153), (266, 44, 109), (145, 44, 97), (126, 17, 155), (88, 54, 141), (310, 53, 118), (149, 34, 101), (79, 53, 157), (49, 44, 130), (209, 53, 146), (59, 93, 154), (178, 55, 143), (56, 109, 153), (139, 63, 61), (308, 55, 66), (249, 98, 52), (79, 23, 157), (209, 54, 146), (133, 13, 126), (127, 122, 132), (132, 13, 138), (301, 19, 111), (133, 23, 126), (49, 118, 130), (48, 44, 136), (53, 53, 127), (50, 93, 137), (221, 118, 98), (43, 118, 133), (301, 44, 111), (151, 53, 103), (53, 14, 127), (91, 55, 134), (19, 63, 2), (79, 56, 157), (50, 34, 137), (132, 15, 138), (49, 19, 130), (53, 17, 127), (224, 34, 102), (308, 63, 66), (301, 118, 111), (235, 34, 105), (43, 98, 133), (187, 47, 44), (208, 56, 144), (120, 98, 19), (209, 14, 146), (161, 15, 23), (178, 63, 143), (302, 34, 114), (88, 13, 141), (204, 53, 34), (278, 110, 58), (206, 98, 29), (281, 98, 53), (208, 53, 144), (136, 44, 128), (53, 121, 127), (51, 63, 139), (128, 13, 131), (132, 52, 138), (310, 17, 118), (93, 17, 135), (7, 63, 5), (53, 19, 127), (296, 63, 72), (179, 112, 145), (209, 56, 146), (43, 52, 133), (138, 112, 82), (133, 82, 126), (35, 118, 57), (127, 13, 132), (214, 63, 67), (75, 112, 61), (51, 55, 139), (300, 15, 123), (57, 98, 157), (56, 22, 153), (224, 53, 102), (278, 45, 58), (127, 109, 132), (46, 54, 140), (63, 22, 142), (133, 63, 126), (223, 118, 96), (132, 27, 138), (137, 13, 129), (27, 109, 60), (10, 118, 4), (50, 27, 137), (72, 118, 12), (307, 34, 117), (301, 63, 111), (138, 17, 82), (300, 34, 123), (13, 63, 8), (75, 17, 61), (309, 63, 70), (57, 10, 157), (138, 55, 82), (160, 63, 77), (48, 15, 136), (133, 19, 126), (152, 118, 112), (137, 53, 129), (116, 53, 30), (309, 53, 70), (183, 118, 84), (126, 123, 155), (63, 54, 142), (263, 44, 107), (136, 98, 128), (138, 53, 82), (75, 53, 61), (133, 44, 126), (162, 79, 24), (302, 15, 114), (27, 47, 60), (161, 34, 23), (139, 53, 61), (142, 118, 48), (46, 63, 140), (74, 97, 156), (235, 19, 105), (207, 53, 20), (133, 118, 126), (179, 17, 145), (312, 53, 119), (263, 118, 107), (70, 118, 16), (78, 63, 61), (47, 110, 148), (300, 93, 123), (304, 63, 116), (137, 52, 129), (147, 44, 62), (56, 63, 153), (212, 118, 83), (209, 112, 146), (133, 79, 126), (162, 44, 24), (50, 15, 137), (8, 22, 58), (278, 56, 58), (79, 79, 157), (88, 27, 141), (278, 22, 58), (53, 122, 127), (179, 121, 145), (57, 36, 157), (49, 13, 130), (51, 15, 139), (6, 118, 10), (53, 112, 127), (280, 98, 50), (63, 63, 142), (126, 68, 155), (162, 118, 24), (63, 55, 142), (183, 79, 84), (179, 82, 145), (137, 27, 129), (57, 111, 156), (185, 118, 63), (27, 84, 60), (79, 121, 157), (128, 52, 131), (134, 98, 94), (209, 122, 146), (176, 118, 13), (281, 52, 53), (56, 55, 153), (88, 52, 141), (106, 63, 74), (57, 54, 157), (79, 13, 157), (157, 98, 81), (289, 112, 110), (137, 23, 129), (132, 82, 138), (235, 44, 105), (289, 63, 110), (132, 34, 138), (43, 34, 133), (179, 14, 145), (56, 13, 153), (57, 19, 157), (271, 53, 41), (48, 19, 136), (79, 10, 157), (312, 34, 119), (300, 118, 123), (78, 34, 61), (48, 109, 136), (149, 15, 101), (255, 93, 113), (278, 103, 58), (208, 14, 144), (283, 112, 124), (77, 63, 22), (185, 15, 63), (93, 79, 135), (181, 56, 147), (207, 98, 20), (250, 56, 55), (28, 47, 59), (57, 118, 157), (88, 112, 141), (284, 55, 122), (132, 93, 138), (149, 44, 101), (134, 23, 94), (231, 55, 104), (179, 122, 145), (272, 15, 87), (53, 82, 127), (48, 98, 136), (46, 22, 140), (144, 53, 45), (139, 112, 61), (88, 15, 141), (93, 44, 135), (250, 53, 55), (273, 98, 65), (178, 27, 143), (224, 55, 102), (48, 82, 136), (28, 99, 59), (43, 19, 133), (181, 63, 147), (151, 63, 103), (57, 79, 157), (128, 15, 131), (152, 55, 112), (168, 53, 39), (240, 118, 17), (49, 98, 130), (239, 112, 106), (150, 19, 99), (208, 82, 144), (301, 34, 111), (136, 56, 128), (179, 118, 145), (128, 122, 131), (277, 118, 56), (268, 53, 108), (181, 55, 147), (274, 17, 64), (46, 122, 140), (208, 121, 144), (140, 63, 42), (108, 98, 79), (49, 112, 130), (224, 63, 102), (50, 52, 137), (8, 103, 58), (93, 19, 135), (153, 112, 95), (235, 53, 105), (63, 118, 142), (126, 27, 155), (56, 122, 153), (245, 34, 85), (161, 79, 23), (119, 53, 28), (178, 14, 143), (169, 98, 36), (48, 13, 136), (56, 112, 153), (16, 118, 3), (209, 63, 146), (272, 118, 87), (289, 55, 110), (63, 112, 142), (53, 52, 127), (51, 121, 139), (105, 63, 25), (63, 122, 142), (298, 63, 71), (181, 118, 147), (282, 53, 54), (178, 17, 143), (214, 34, 67), (57, 123, 157), (221, 15, 98), (8, 24, 58), (304, 118, 116), (79, 12, 157), (210, 112, 86), (302, 55, 114), (28, 22, 59), (127, 52, 132), (79, 54, 157), (49, 15, 130), (19, 118, 2), (53, 55, 127), (14, 63, 9), (128, 98, 131), (8, 85, 58), (185, 34, 63), (135, 98, 93), (191, 19, 100), (299, 112, 125), (178, 53, 143), (115, 53, 31), (126, 13, 155), (88, 98, 141), (308, 53, 66), (307, 53, 117), (179, 98, 145), (289, 17, 110), (79, 14, 157), (231, 93, 104), (48, 122, 136), (79, 93, 157), (158, 98, 78), (46, 53, 140), (181, 82, 147), (191, 15, 100), (178, 82, 143), (136, 27, 128), (43, 13, 133), (208, 68, 144), (59, 23, 154), (93, 98, 135), (119, 98, 28), (59, 98, 154), (296, 53, 72), (181, 68, 147), (50, 23, 137), (183, 112, 84), (242, 98, 32), (42, 118, 1), (51, 53, 139), (133, 93, 126), (48, 23, 136), (263, 93, 107), (212, 17, 83), (278, 85, 58), (137, 14, 129), (136, 52, 128), (93, 13, 135), (93, 118, 135), (210, 63, 86), (120, 53, 19), (91, 17, 134), (239, 55, 106), (300, 44, 123), (161, 44, 23), (263, 34, 107), (59, 56, 154), (183, 34, 84), (5, 118, 0), (300, 19, 123), (201, 98, 76), (50, 56, 137), (304, 112, 116), (275, 34, 68), (8, 5, 58), (49, 79, 130), (78, 118, 61), (161, 118, 23), (93, 34, 135), (209, 55, 146), (153, 55, 95), (57, 13, 157), (283, 55, 124), (150, 53, 99), (60, 110, 151), (239, 34, 106), (181, 122, 147), (165, 53, 38), (239, 63, 106), (63, 34, 142), (127, 112, 132), (181, 112, 147), (158, 63, 78), (311, 112, 115), (133, 17, 126), (43, 93, 133), (245, 53, 85), (57, 12, 157), (43, 27, 133), (165, 98, 38), (138, 44, 82), (77, 112, 22), (150, 17, 99), (310, 112, 118), (132, 79, 138), (284, 118, 122), (153, 63, 95), (129, 23, 91), (277, 34, 56), (147, 34, 62), (46, 17, 140), (139, 55, 61), (50, 98, 137), (93, 93, 135), (48, 54, 136), (127, 68, 132), (210, 79, 86), (224, 112, 102), (56, 68, 153), (137, 56, 129), (231, 112, 104), (49, 121, 130), (133, 121, 126), (28, 106, 59), (301, 53, 111), (128, 23, 131), (65, 23, 88), (59, 13, 154), (239, 15, 106), (310, 55, 118), (107, 118, 75), (77, 44, 22), (28, 85, 59), (132, 118, 138), (46, 121, 140), (91, 82, 134), (57, 63, 157), (208, 122, 144), (109, 98, 79), (255, 19, 113), (63, 68, 142), (126, 63, 155), (93, 54, 135), (278, 24, 58), (48, 93, 136), (308, 112, 66), (126, 55, 155), (255, 44, 113), (46, 98, 140), (273, 53, 65), (46, 82, 140), (307, 15, 117), (145, 112, 97), (248, 118, 69), (275, 112, 68), (191, 112, 100), (128, 56, 131), (79, 109, 157), (255, 118, 113), (181, 17, 147), (126, 15, 155), (152, 17, 112), (46, 79, 140), (57, 34, 157), (239, 53, 106), (132, 19, 138), (43, 54, 133), (312, 55, 119), (278, 5, 58), (88, 56, 141), (281, 56, 53), (91, 68, 134), (278, 47, 58), (133, 55, 126), (20, 63, 73), (13, 118, 8), (50, 82, 137), (77, 55, 22), (179, 93, 145), (278, 109, 58), (255, 53, 113), (129, 98, 91), (304, 19, 116), (138, 34, 82), (178, 23, 143), (59, 82, 154), (300, 112, 123), (127, 19, 132), (183, 17, 84), (183, 63, 84), (282, 98, 54), (268, 15, 108), (245, 79, 85), (49, 54, 130), (75, 15, 61), (93, 109, 135), (138, 15, 82), (235, 63, 105), (59, 63, 154), (142, 63, 48), (51, 79, 139), (228, 110, 121), (304, 44, 116), (79, 63, 157), (179, 109, 145), (235, 55, 105), (43, 79, 133), (208, 52, 144), (179, 15, 145), (63, 79, 142), (79, 122, 157), (183, 55, 84), (127, 118, 132), (126, 10, 155), (56, 79, 153), (302, 17, 114), (309, 34, 70), (178, 56, 143), (43, 121, 133), (57, 17, 157), (302, 19, 114), (70, 63, 16), (50, 109, 137), (263, 63, 107), (161, 112, 23), (53, 34, 127), (75, 44, 61), (15, 63, 6), (127, 55, 132), (59, 123, 154), (126, 79, 155), (59, 17, 154), (50, 14, 137), (298, 34, 71), (191, 34, 100), (231, 63, 104), (59, 52, 154), (20, 118, 73), (59, 14, 154), (49, 109, 130), (311, 55, 115), (208, 63, 144), (162, 63, 24), (132, 68, 138), (185, 55, 63), (51, 56, 139), (6, 63, 10), (50, 17, 137), (53, 27, 127), (147, 63, 62), (56, 44, 153), (209, 34, 146), (185, 63, 63), (28, 84, 59), (8, 109, 58), (299, 44, 125), (210, 118, 86), (208, 79, 144), (160, 118, 77), (229, 110, 120), (289, 34, 110), (56, 19, 153), (106, 118, 74), (210, 44, 86), (50, 53, 137), (278, 84, 58), (266, 63, 109), (248, 34, 69), (49, 52, 130), (312, 63, 119), (51, 23, 139), (245, 112, 85), (149, 17, 101), (78, 44, 61), (284, 44, 122), (91, 44, 134), (272, 53, 87), (126, 34, 155), (235, 93, 105), (132, 112, 138), (301, 15, 111), (79, 118, 157), (310, 93, 118), (242, 53, 32), (132, 122, 138), (249, 52, 52), (51, 98, 139), (208, 15, 144), (179, 52, 145), (248, 63, 69), (77, 79, 22), (181, 54, 147), (88, 82, 141), (231, 19, 104), (310, 34, 118), (49, 23, 130), (275, 118, 68), (149, 53, 101), (8, 45, 58), (191, 93, 100), (245, 44, 85), (272, 17, 87), (179, 27, 145), (255, 112, 113), (57, 47, 156), (255, 55, 113), (91, 19, 134), (245, 118, 85), (284, 19, 122), (212, 79, 83), (231, 44, 104), (223, 55, 96), (137, 82, 129), (46, 23, 140), (302, 93, 114), (128, 82, 131), (43, 55, 133), (18, 118, 11), (128, 17, 131), (88, 14, 141), (311, 44, 115), (245, 15, 85), (153, 44, 95), (181, 44, 147), (46, 34, 140), (178, 109, 143), (215, 52, 51), (307, 19, 117), (88, 17, 141), (126, 54, 155), (128, 14, 131), (126, 98, 155), (268, 19, 108), (93, 63, 135), (283, 93, 124), (209, 15, 146), (147, 55, 62), (224, 19, 102), (282, 56, 54), (43, 109, 133), (311, 118, 115), (240, 63, 17), (139, 34, 61), (221, 93, 98), (53, 56, 127), (140, 118, 42), (181, 79, 147), (91, 79, 134), (152, 34, 112), (270, 118, 18), (137, 17, 129), (93, 55, 135), (239, 93, 106), (208, 19, 144), (59, 112, 154), (178, 98, 143), (128, 121, 131), (50, 121, 137), (308, 98, 66), (274, 15, 64), (281, 53, 53), (49, 82, 130), (311, 19, 115), (115, 98, 31), (43, 63, 133), (56, 14, 153), (224, 118, 102), (181, 19, 147), (214, 118, 67), (308, 15, 66), (144, 98, 45), (209, 118, 146), (79, 112, 157), (48, 55, 136), (51, 34, 139), (161, 63, 23), (298, 118, 71), (107, 63, 75), (56, 34, 153), (68, 23, 92), (209, 44, 146), (105, 118, 25), (289, 19, 110), (53, 118, 127), (179, 56, 145), (59, 79, 154), (191, 63, 100), (162, 112, 24), (301, 17, 111), (299, 93, 125), (221, 53, 98), (57, 68, 157), (135, 23, 93), (212, 53, 83), (289, 44, 110), (185, 112, 63), (59, 109, 154), (53, 79, 127), (299, 34, 125), (268, 112, 108), (133, 112, 126), (127, 54, 132), (108, 63, 79), (127, 63, 132), (171, 53, 37), (221, 17, 98), (63, 93, 142), (266, 55, 109), (137, 93, 129), (210, 34, 86), (48, 68, 136), (307, 17, 117), (289, 118, 110), (136, 15, 128), (275, 15, 68), (145, 53, 97), (289, 53, 110), (304, 55, 116), (15, 118, 6), (56, 54, 153), (161, 55, 23), (128, 27, 131), (214, 15, 67), (126, 121, 155), (93, 68, 135), (304, 53, 116), (157, 118, 81), (127, 93, 132), (46, 109, 140), (145, 55, 97), (79, 19, 157), (304, 93, 116), (300, 63, 123), (59, 27, 154), (153, 34, 95), (179, 55, 145), (208, 98, 144), (300, 55, 123), (127, 34, 132), (159, 98, 80), (51, 82, 139), (304, 34, 116), (179, 68, 145), (179, 53, 145), (51, 109, 139), (263, 112, 107), (52, 98, 90), (283, 63, 124), (133, 122, 126), (152, 53, 112), (223, 15, 96), (57, 62, 157), (91, 121, 134), (209, 79, 146), (48, 34, 136), (308, 17, 66), (208, 23, 144), (191, 118, 100), (88, 23, 141), (311, 34, 115), (139, 118, 61), (61, 23, 89), (311, 53, 115), (79, 36, 157), (239, 44, 106), (215, 98, 51), (79, 34, 157), (139, 44, 61), (159, 118, 80), (93, 122, 135), (280, 53, 50), (63, 53, 142), (52, 23, 90), (93, 112, 135), (91, 54, 134), (79, 68, 157), (172, 118, 27), (88, 121, 141), (43, 15, 133), (46, 15, 140), (56, 93, 153), (28, 24, 59), (147, 112, 62), (136, 121, 128), (79, 62, 157), (128, 109, 131), (223, 63, 96), (181, 93, 147), (311, 93, 115), (178, 52, 143), (51, 13, 139), (88, 109, 141), (224, 93, 102), (311, 15, 115), (153, 118, 95), (27, 106, 60), (77, 34, 22), (239, 19, 106), (220, 98, 46), (126, 112, 155), (77, 118, 22), (59, 54, 154), (187, 97, 44), (284, 34, 122), (46, 112, 140), (209, 121, 146), (91, 34, 134), (133, 68, 126), (132, 54, 138), (312, 118, 119), (43, 122, 133), (27, 24, 60), (128, 53, 131), (93, 23, 135), (145, 93, 97), (162, 34, 24), (126, 118, 155), (46, 52, 140), (151, 15, 103), (179, 23, 145), (116, 98, 30), (51, 52, 139), (310, 19, 118), (132, 63, 138), (88, 22, 141), (312, 44, 119), (136, 14, 128), (8, 110, 58), (136, 53, 128), (266, 53, 109), (191, 44, 100), (79, 123, 157), (126, 93, 155), (152, 15, 112), (209, 52, 146), (77, 15, 22), (310, 44, 118), (312, 19, 119), (311, 17, 115), (63, 13, 142), (63, 44, 142), (126, 19, 155), (48, 112, 136), (91, 93, 134), (178, 44, 143), (78, 55, 61), (127, 23, 132), (170, 53, 40), (145, 34, 97), (79, 27, 157), (251, 63, 47), (27, 5, 60), (206, 53, 29), (178, 13, 143), (48, 14, 136), (43, 82, 133), (255, 17, 113), (272, 63, 87), (132, 17, 138), (57, 121, 157), (191, 55, 100), (132, 109, 138), (160, 98, 77), (93, 121, 135), (72, 63, 12), (28, 45, 59), (272, 55, 87), (127, 27, 132), (307, 112, 117), (149, 55, 101), (48, 17, 136), (49, 68, 130), (178, 54, 143), (266, 112, 109), (284, 15, 122), (149, 63, 101), (91, 15, 134), (208, 118, 144), (48, 79, 136), (201, 63, 76), (245, 55, 85), (57, 53, 157), (136, 122, 128), (283, 118, 124), (48, 27, 136), (63, 27, 142), (208, 44, 144), (48, 53, 136), (273, 34, 65), (126, 14, 155), (91, 63, 134), (27, 52, 60), (220, 53, 46), (255, 63, 113), (137, 15, 129), (78, 15, 61), (224, 15, 102), (274, 118, 64), (126, 44, 155), (310, 63, 118), (209, 98, 146), (137, 63, 129), (88, 55, 141), (151, 19, 103), (176, 63, 13), (43, 14, 133), (59, 53, 154), (43, 68, 133), (299, 55, 125), (162, 55, 24), (168, 98, 39), (63, 52, 142), (126, 56, 155), (53, 68, 127), (231, 17, 104), (149, 118, 101), (57, 82, 157), (150, 34, 99), (283, 53, 124), (74, 47, 156), (53, 98, 127), (128, 55, 131), (43, 17, 133), (93, 53, 135), (277, 63, 56), (56, 27, 153), (224, 44, 102), (126, 122, 155), (181, 15, 147), (311, 63, 115), (128, 63, 131), (137, 98, 129), (147, 53, 62), (277, 53, 56), (151, 118, 103), (208, 13, 144), (308, 118, 66), (137, 55, 129), (88, 63, 141), (46, 93, 140), (48, 118, 136), (126, 23, 155), (73, 47, 43), (41, 13, 14), (79, 17, 157), (275, 53, 68), (91, 122, 134), (284, 112, 122), (273, 17, 65), (74, 110, 156), (49, 27, 130), (221, 63, 98), (296, 34, 72), (212, 15, 83), (179, 44, 145), (302, 63, 114), (266, 15, 109), (266, 17, 109), (181, 98, 147), (223, 34, 96), (309, 118, 70), (245, 63, 85), (91, 52, 134), (183, 53, 84), (145, 63, 97), (138, 118, 82), (221, 34, 98), (268, 118, 108), (46, 13, 140), (78, 112, 61), (255, 34, 113), (75, 118, 61), (268, 17, 108), (57, 44, 157), (283, 34, 124), (150, 15, 99), (59, 34, 154), (127, 98, 132), (16, 63, 3), (191, 53, 100), (215, 53, 51), (79, 55, 157), (91, 27, 134), (307, 44, 117), (263, 53, 107), (179, 19, 145), (53, 23, 127), (137, 121, 129), (185, 17, 63), (273, 15, 65), (50, 55, 137), (219, 98, 49), (299, 15, 125), (49, 53, 130), (152, 63, 112), (43, 56, 133), (46, 56, 140), (79, 82, 157), (210, 15, 86), (138, 79, 82), (145, 118, 97), (181, 52, 147), (59, 55, 154), (178, 93, 143), (221, 112, 98), (8, 84, 58), (28, 109, 59), (308, 34, 66), (108, 118, 79), (179, 79, 145), (304, 15, 116), (137, 112, 129), (48, 63, 136), (212, 112, 83), (185, 53, 63), (57, 110, 156), (57, 14, 157), (109, 63, 79), (10, 63, 4), (132, 14, 138), (248, 53, 69), (151, 93, 103), (27, 110, 60), (179, 13, 145), (162, 17, 24), (181, 34, 147), (51, 118, 139), (35, 63, 57), (27, 99, 60), (50, 63, 137), (268, 34, 108), (111, 118, 21), (51, 54, 139), (153, 15, 95), (8, 56, 58), (56, 98, 153), (53, 13, 127), (137, 122, 129), (150, 118, 99), (46, 44, 140), (63, 98, 142), (266, 93, 109), (91, 118, 134), (268, 44, 108), (46, 19, 140), (75, 55, 61), (46, 118, 140), (150, 44, 99), (221, 44, 98), (274, 34, 64), (63, 15, 142), (91, 56, 134), (71, 63, 26), (152, 93, 112), (208, 54, 144), (127, 14, 132), (208, 55, 144), (209, 27, 146), (299, 63, 125), (128, 112, 131), (88, 122, 141), (18, 63, 11), (289, 93, 110), (157, 63, 81), (231, 15, 104), (133, 56, 126), (91, 23, 134), (139, 15, 61), (27, 45, 60), (310, 15, 118), (57, 112, 157), (272, 112, 87), (209, 17, 146), (162, 53, 24), (56, 52, 153), (127, 79, 132), (278, 106, 58), (208, 93, 144), (152, 44, 112), (136, 55, 128), (132, 53, 138), (284, 63, 122), (278, 113, 58), (28, 110, 59), (150, 93, 99), (88, 53, 141), (151, 17, 103), (57, 122, 157), (263, 19, 107), (167, 98, 35), (59, 68, 154), (183, 44, 84), (273, 118, 65), (307, 55, 117), (149, 112, 101), (209, 23, 146), (27, 103, 60), (59, 15, 154), (49, 56, 130), (51, 14, 139), (214, 53, 67), (63, 19, 142), (210, 55, 86), (8, 52, 58), (312, 15, 119), (250, 52, 55), (152, 19, 112), (181, 23, 147), (50, 68, 137), (208, 34, 144), (208, 109, 144), (278, 52, 58), (289, 15, 110), (91, 112, 134), (301, 55, 111), (133, 34, 126), (127, 44, 132), (8, 113, 58), (181, 109, 147), (59, 121, 154), (231, 118, 104), (208, 27, 144), (212, 55, 83), (49, 63, 130), (147, 118, 62), (178, 121, 143), (212, 63, 83), (53, 15, 127), (49, 55, 130), (77, 98, 22), (91, 13, 134), (59, 122, 154), (204, 98, 34), (51, 17, 139), (58, 110, 149), (249, 56, 52), (221, 55, 98), (136, 17, 128), (312, 17, 119), (178, 118, 143), (178, 15, 143), (302, 53, 114), (59, 10, 154), (50, 112, 137), (268, 55, 108), (191, 17, 100), (53, 44, 127), (67, 110, 152), (8, 47, 58), (50, 122, 137), (56, 118, 153), (283, 44, 124), (179, 34, 145), (214, 55, 67), (46, 27, 140), (56, 56, 153), (137, 68, 129), (136, 82, 128), (147, 17, 62), (178, 79, 143), (51, 44, 139), (8, 106, 58), (126, 52, 155), (93, 14, 135), (296, 118, 72), (284, 93, 122), (223, 44, 96), (223, 17, 96), (300, 53, 123), (161, 17, 23), (53, 63, 127), (91, 98, 134), (59, 118, 154), (78, 17, 61), (128, 68, 131), (127, 15, 132), (78, 53, 61), (263, 17, 107), (299, 19, 125), (56, 23, 153), (88, 68, 141), (145, 15, 97), (27, 85, 60), (63, 23, 142), (301, 112, 111), (132, 55, 138), (79, 44, 157), (51, 19, 139), (161, 53, 23), (27, 113, 60), (75, 34, 61)] moveTos = [(130, 107, 64), (95, 57, 130), (170, 110, 205), (146, 110, 190), (94, 24, 47), (94, 40, 129), (130, 22, 46), (55, 103, 62), (287, 24, 47), (154, 41, 149), (199, 106, 63), (94, 2, 43), (287, 2, 43), (192, 106, 63), (235, 52, 99), (95, 31, 50), (233, 120, 59), (99, 107, 64), (148, 99, 60), (90, 22, 46), (104, 31, 50), (29, 85, 58), (99, 85, 58), (37, 60, 77), (198, 99, 60), (29, 107, 64), (89, 84, 57), (130, 42, 53), (166, 94, 204), (227, 24, 47), (297, 47, 8), (123, 99, 60), (109, 10, 157), (86, 57, 123), (227, 2, 43), (66, 84, 57), (246, 25, 273), (217, 31, 250), (263, 52, 99), (261, 120, 59), (92, 113, 67), (54, 110, 92), (197, 30, 49), (104, 74, 153), (99, 52, 54), (112, 103, 62), (236, 2, 43), (189, 22, 46), (290, 102, 303), (29, 5, 44), (121, 98, 175), (247, 30, 49), (154, 120, 59), (89, 69, 45), (92, 25, 48), (37, 84, 79), (118, 53, 172), (73, 108, 102), (225, 56, 217), (247, 16, 275), (253, 31, 281), (136, 54, 89), (190, 102, 222), (123, 24, 47), (76, 96, 107), (189, 42, 53), (131, 53, 181), (99, 5, 44), (69, 23, 101), (85, 13, 122), (102, 38, 140), (100, 106, 63), (198, 5, 44), (123, 2, 43), (92, 56, 55), (237, 28, 228), (29, 52, 54), (110, 84, 79), (78, 98, 110), (1, 61, 13), (237, 106, 63), (233, 110, 66), (197, 52, 54), (233, 115, 262), (192, 1, 145), (112, 56, 55), (55, 113, 67), (97, 53, 133), (247, 85, 58), (130, 45, 56), (197, 5, 44), (236, 99, 60), (198, 113, 67), (92, 103, 62), (90, 45, 56), (261, 110, 66), (54, 120, 59), (258, 106, 63), (95, 42, 53), (112, 25, 48), (108, 64, 158), (1, 20, 3), (103, 59, 143), (29, 95, 61), (26, 46, 7), (198, 2, 43), (197, 107, 64), (247, 5, 44), (154, 102, 97), (227, 99, 60), (29, 30, 49), (139, 100, 185), (247, 27, 148), (112, 113, 67), (95, 106, 63), (53, 109, 90), (154, 110, 154), (148, 24, 47), (247, 49, 151), (24, 84, 40), (200, 24, 47), (99, 30, 49), (80, 68, 112), (94, 99, 60), (41, 110, 84), (287, 99, 60), (197, 85, 58), (45, 111, 87), (86, 106, 63), (104, 119, 155), (148, 2, 43), (199, 28, 228), (102, 47, 8), (247, 52, 54), (86, 24, 47), (199, 99, 60), (108, 10, 157), (90, 5, 44), (103, 120, 144), (29, 45, 56), (195, 110, 225), (197, 56, 55), (148, 106, 63), (112, 52, 54), (237, 87, 229), (216, 31, 249), (99, 103, 62), (200, 106, 63), (203, 84, 120), (154, 42, 53), (86, 2, 43), (200, 45, 56), (112, 5, 44), (173, 84, 79), (244, 84, 138), (189, 120, 59), (29, 103, 62), (95, 24, 47), (198, 69, 45), (198, 106, 63), (95, 2, 43), (254, 22, 46), (197, 25, 48), (81, 84, 113), (104, 69, 45), (154, 22, 46), (55, 85, 58), (104, 71, 152), (252, 31, 215), (137, 54, 89), (194, 122, 199), (26, 37, 6), (130, 31, 50), (112, 107, 64), (104, 84, 57), (197, 113, 67), (92, 30, 49), (66, 47, 8), (261, 22, 46), (233, 42, 53), (247, 25, 48), (248, 15, 276), (189, 6, 146), (236, 28, 228), (183, 98, 211), (26, 29, 5), (55, 52, 54), (66, 31, 50), (66, 32, 51), (261, 32, 51), (237, 2, 43), (130, 120, 59), (55, 5, 44), (256, 98, 252), (100, 99, 60), (112, 85, 58), (261, 42, 53), (89, 32, 51), (54, 45, 56), (233, 22, 46), (189, 41, 149), (1, 4, 19), (180, 53, 209), (247, 56, 55), (123, 30, 49), (237, 24, 47), (99, 113, 67), (194, 68, 197), (237, 99, 60), (66, 119, 96), (267, 110, 293), (100, 24, 47), (26, 102, 20), (154, 45, 56), (100, 2, 43), (186, 106, 63), (123, 106, 63), (1, 47, 8), (295, 84, 57), (254, 45, 56), (92, 85, 58), (109, 64, 160), (8, 57, 27), (55, 56, 55), (92, 107, 64), (247, 103, 62), (236, 106, 63), (189, 110, 154), (263, 56, 100), (112, 30, 49), (96, 53, 132), (29, 113, 67), (26, 48, 9), (99, 56, 55), (26, 51, 10), (197, 103, 62), (260, 102, 288), (92, 5, 44), (225, 66, 256), (278, 57, 27), (269, 102, 294), (95, 99, 60), (155, 27, 196), (29, 25, 48), (227, 106, 63), (104, 49, 151), (54, 22, 46), (225, 52, 216), (162, 56, 100), (154, 52, 54), (230, 110, 260), (199, 2, 43), (247, 43, 150), (99, 25, 48), (200, 28, 228), (295, 69, 45), (130, 110, 66), (188, 84, 79), (177, 56, 100), (55, 30, 93), (278, 87, 28), (261, 45, 56), (94, 106, 63), (287, 106, 63), (90, 110, 92), (218, 53, 251), (192, 2, 43), (29, 56, 55), (104, 99, 60), (92, 52, 54), (200, 113, 67), (95, 52, 54), (290, 47, 8), (156, 106, 109), (106, 98, 156), (148, 113, 67), (214, 90, 248), (237, 107, 64), (125, 53, 179), (100, 30, 49), (40, 110, 83), (200, 42, 53), (198, 119, 232), (39, 84, 82), (233, 119, 263), (123, 103, 62), (86, 119, 125), (236, 103, 62), (32, 84, 72), (112, 2, 43), (247, 106, 63), (29, 32, 51), (190, 47, 8), (86, 52, 54), (295, 120, 59), (92, 99, 60), (216, 45, 56), (233, 87, 229), (301, 56, 100), (95, 5, 44), (237, 85, 58), (156, 96, 201), (261, 119, 289), (112, 24, 47), (233, 2, 43), (247, 69, 45), (200, 56, 55), (175, 84, 207), (66, 45, 56), (188, 120, 220), (143, 98, 188), (148, 56, 55), (197, 106, 63), (86, 85, 58), (3, 84, 21), (54, 31, 50), (54, 32, 51), (154, 119, 194), (237, 52, 54), (227, 103, 62), (95, 107, 64), (117, 36, 168), (117, 21, 167), (198, 25, 48), (1, 92, 17), (192, 30, 49), (95, 85, 58), (237, 5, 44), (148, 25, 48), (297, 102, 309), (252, 45, 56), (200, 25, 48), (192, 16, 223), (55, 2, 43), (258, 52, 216), (102, 102, 142), (104, 110, 154), (287, 103, 62), (86, 107, 64), (55, 24, 47), (258, 66, 285), (199, 30, 49), (117, 62, 170), (200, 5, 44), (185, 44, 213), (86, 30, 49), (227, 56, 55), (104, 6, 146), (94, 25, 48), (76, 69, 45), (287, 25, 48), (199, 107, 64), (123, 113, 67), (55, 99, 60), (194, 53, 132), (246, 116, 274), (1, 46, 7), (192, 107, 64), (100, 5, 44), (200, 103, 62), (12, 0, 33), (99, 106, 63), (280, 56, 254), (148, 103, 62), (216, 22, 46), (287, 56, 55), (148, 49, 151), (198, 103, 62), (130, 69, 45), (100, 52, 54), (247, 1, 145), (29, 106, 63), (227, 25, 48), (192, 85, 58), (95, 30, 49), (200, 110, 66), (241, 98, 175), (173, 117, 206), (112, 69, 45), (154, 31, 50), (236, 113, 67), (83, 73, 116), (199, 85, 58), (90, 69, 45), (103, 84, 79), (197, 28, 228), (254, 31, 282), (155, 52, 99), (227, 113, 67), (155, 121, 200), (194, 121, 200), (112, 99, 60), (252, 22, 46), (186, 56, 217), (101, 33, 139), (192, 52, 54), (123, 56, 55), (92, 2, 43), (130, 84, 57), (192, 49, 151), (199, 52, 54), (112, 32, 51), (4, 83, 24), (261, 31, 50), (26, 61, 13), (200, 2, 43), (236, 25, 48), (287, 113, 67), (189, 71, 152), (266, 56, 100), (199, 5, 44), (94, 113, 67), (192, 27, 148), (123, 25, 48), (11, 39, 30), (233, 107, 64), (26, 20, 3), (154, 74, 153), (194, 82, 198), (236, 56, 55), (100, 107, 64), (104, 120, 59), (238, 110, 269), (237, 30, 49), (282, 22, 88), (94, 32, 51), (233, 32, 51), (233, 31, 50), (89, 22, 46), (247, 45, 56), (227, 52, 54), (54, 84, 57), (252, 18, 280), (225, 106, 63), (294, 53, 307), (100, 25, 48), (192, 113, 67), (66, 110, 66), (204, 104, 242), (26, 35, 42), (217, 45, 56), (244, 33, 272), (199, 113, 67), (94, 5, 44), (236, 85, 58), (123, 107, 64), (182, 84, 210), (280, 52, 253), (66, 102, 97), (1, 37, 6), (94, 52, 54), (287, 52, 54), (92, 106, 63), (189, 119, 155), (236, 107, 64), (123, 85, 58), (124, 53, 178), (92, 42, 53), (76, 8, 106), (110, 120, 80), (227, 5, 44), (76, 120, 59), (104, 45, 56), (82, 88, 114), (173, 120, 117), (237, 22, 46), (197, 99, 60), (236, 5, 44), (253, 45, 56), (214, 17, 247), (186, 52, 216), (123, 52, 54), (192, 56, 55), (29, 2, 43), (20, 34, 36), (177, 52, 99), (154, 6, 146), (29, 24, 47), (198, 30, 49), (227, 107, 64), (186, 66, 218), (227, 85, 58), (266, 52, 99), (192, 25, 48), (130, 119, 180), (148, 30, 49), (215, 22, 88), (99, 24, 47), (100, 113, 67), (200, 30, 49), (197, 87, 229), (99, 2, 43), (54, 69, 45), (236, 52, 54), (123, 5, 44), (287, 107, 64), (189, 31, 50), (247, 99, 60), (303, 53, 310), (94, 107, 64), (1, 102, 20), (75, 100, 105), (205, 106, 243), (95, 56, 55), (130, 32, 51), (199, 103, 62), (202, 53, 240), (86, 25, 48), (55, 106, 63), (66, 120, 59), (260, 47, 8), (94, 30, 49), (192, 119, 155), (287, 30, 49), (148, 107, 64), (89, 120, 59), (1, 51, 10), (90, 31, 50), (90, 32, 51), (237, 113, 67), (104, 22, 46), (98, 91, 135), (154, 69, 45), (1, 48, 9), (200, 107, 64), (192, 103, 62), (269, 47, 8), (194, 52, 99), (189, 74, 221), (200, 85, 58), (197, 2, 43), (102, 97, 141), (199, 24, 47), (148, 85, 58), (95, 25, 48), (227, 30, 49), (247, 74, 153), (29, 99, 60), (94, 103, 62), (233, 84, 57), (86, 56, 55), (203, 105, 241), (78, 79, 104), (301, 52, 99), (104, 42, 53), (241, 53, 270), (197, 24, 47), (233, 45, 56), (26, 4, 19), (192, 24, 47), (217, 22, 46), (95, 113, 67), (26, 47, 8), (200, 52, 54), (164, 110, 203), (138, 84, 183), (87, 120, 126), (184, 33, 212), (148, 52, 54), (112, 106, 63), (247, 2, 43), (156, 25, 108), (199, 56, 55), (237, 56, 55), (287, 42, 53), (247, 24, 47), (192, 43, 150), (236, 30, 49), (233, 69, 45), (83, 3, 115), (101, 84, 138), (55, 25, 48), (185, 76, 214), (225, 45, 56), (154, 84, 57), (253, 22, 46), (37, 120, 80), (287, 85, 58), (258, 56, 217), (87, 84, 79), (86, 113, 67), (198, 52, 54), (100, 103, 62), (243, 53, 271), (199, 69, 45), (237, 25, 48), (148, 5, 44), (261, 69, 45), (98, 84, 134), (99, 22, 46), (104, 5, 44), (258, 18, 219), (4, 39, 22), (92, 110, 66), (233, 106, 63), (227, 45, 56), (11, 83, 31), (89, 103, 62), (237, 115, 267), (237, 110, 66), (66, 103, 62), (104, 27, 148), (199, 121, 236), (199, 120, 59), (55, 84, 57), (194, 27, 196), (287, 45, 56), (94, 45, 56), (261, 106, 63), (117, 58, 166), (104, 52, 54), (211, 84, 138), (247, 119, 155), (258, 102, 286), (92, 31, 50), (189, 99, 60), (186, 45, 56), (202, 98, 175), (95, 110, 66), (154, 106, 63), (123, 45, 56), (254, 106, 63), (148, 42, 53), (104, 85, 58), (29, 31, 50), (100, 120, 59), (55, 69, 45), (198, 22, 46), (247, 113, 67), (292, 47, 8), (192, 41, 149), (197, 119, 231), (222, 53, 255), (200, 22, 46), (104, 107, 64), (112, 84, 57), (29, 40, 52), (148, 22, 46), (86, 110, 66), (99, 32, 51), (99, 31, 50), (122, 47, 8), (198, 42, 53), (155, 68, 197), (184, 84, 138), (189, 32, 51), (192, 6, 146), (198, 85, 58), (236, 45, 56), (130, 99, 60), (92, 84, 57), (1, 70, 15), (287, 22, 46), (130, 24, 47), (1, 65, 14), (94, 22, 46), (227, 42, 53), (90, 2, 43), (192, 110, 154), (250, 22, 88), (66, 113, 67), (125, 52, 99), (86, 103, 62), (104, 16, 147), (90, 24, 47), (199, 115, 234), (154, 32, 51), (89, 113, 67), (104, 30, 49), (237, 120, 59), (130, 2, 43), (76, 84, 57), (136, 109, 90), (31, 84, 71), (54, 106, 63), (94, 42, 53), (227, 22, 46), (268, 52, 99), (248, 112, 279), (11, 67, 32), (259, 52, 99), (33, 69, 73), (53, 54, 89), (155, 122, 199), (26, 101, 18), (29, 119, 68), (197, 32, 51), (197, 31, 50), (154, 1, 145), (148, 45, 56), (33, 84, 74), (189, 24, 47), (95, 120, 59), (163, 84, 120), (123, 42, 53), (66, 56, 55), (236, 22, 46), (247, 32, 51), (247, 31, 50), (153, 93, 192), (89, 56, 55), (189, 2, 43), (89, 99, 60), (100, 110, 92), (246, 84, 57), (86, 120, 59), (66, 25, 48), (123, 22, 46), (252, 56, 217), (186, 22, 46), (89, 25, 48), (236, 42, 53), (92, 69, 45), (117, 12, 165), (83, 84, 79), (293, 102, 306), (233, 28, 228), (198, 45, 56), (233, 99, 60), (29, 84, 57), (148, 120, 59), (89, 107, 64), (100, 22, 46), (274, 98, 295), (200, 120, 59), (95, 45, 56), (154, 113, 67), (123, 110, 66), (125, 56, 100), (66, 107, 64), (95, 32, 51), (20, 11, 35), (29, 120, 59), (138, 13, 182), (186, 18, 219), (73, 110, 103), (261, 99, 60), (268, 56, 100), (100, 42, 137), (104, 43, 150), (86, 45, 56), (96, 56, 100), (66, 85, 58), (29, 124, 69), (259, 56, 100), (236, 115, 265), (236, 110, 66), (189, 106, 63), (89, 85, 58), (112, 31, 50), (104, 32, 51), (198, 120, 59), (99, 84, 57), (227, 110, 66), (154, 99, 60), (192, 42, 53), (64, 110, 95), (99, 69, 45), (54, 2, 43), (66, 52, 54), (295, 25, 308), (68, 13, 98), (50, 22, 88), (199, 42, 53), (90, 99, 60), (54, 24, 47), (26, 72, 16), (89, 52, 54), (26, 7, 4), (237, 45, 56), (148, 41, 149), (76, 106, 109), (26, 9, 12), (94, 110, 66), (287, 110, 66), (90, 106, 63), (92, 24, 47), (104, 103, 62), (192, 22, 46), (4, 77, 23), (89, 5, 44), (61, 98, 94), (252, 52, 216), (199, 22, 46), (55, 32, 51), (55, 31, 50), (29, 69, 45), (163, 105, 202), (130, 106, 63), (5, 63, 26), (66, 5, 44), (148, 6, 146), (247, 84, 57), (137, 109, 90), (123, 120, 59), (200, 115, 238), (192, 120, 59), (143, 53, 187), (86, 22, 46), (257, 53, 284), (194, 56, 100), (66, 69, 45), (199, 25, 48), (276, 47, 8), (94, 85, 58), (246, 69, 45), (84, 84, 120), (44, 110, 86), (36, 89, 76), (104, 25, 48), (66, 22, 46), (197, 69, 45), (156, 84, 57), (95, 22, 46), (100, 45, 56), (86, 42, 53), (154, 2, 43), (54, 99, 60), (154, 24, 47), (104, 56, 55), (198, 110, 66), (189, 1, 145), (236, 120, 59), (261, 2, 43), (225, 47, 8), (192, 45, 56), (227, 120, 59), (258, 22, 46), (265, 110, 292), (237, 42, 53), (90, 120, 59), (213, 89, 246), (156, 69, 45), (232, 19, 261), (177, 53, 208), (248, 55, 278), (26, 65, 14), (261, 24, 47), (112, 119, 162), (189, 45, 56), (199, 45, 56), (281, 22, 88), (138, 98, 184), (34, 84, 75), (66, 30, 49), (94, 120, 59), (287, 120, 59), (86, 109, 124), (197, 84, 57), (155, 56, 100), (189, 84, 57), (247, 71, 152), (225, 31, 215), (89, 30, 127), (233, 24, 47), (189, 5, 44), (139, 79, 104), (41, 125, 85), (211, 33, 245), (99, 110, 92), (86, 31, 50), (86, 32, 51), (112, 120, 59), (130, 25, 48), (236, 84, 57), (227, 69, 45), (90, 25, 48), (156, 120, 59), (292, 102, 305), (278, 80, 297), (54, 85, 58), (199, 119, 235), (112, 45, 56), (200, 99, 60), (54, 107, 64), (114, 84, 164), (189, 113, 67), (130, 56, 55), (287, 69, 45), (76, 25, 108), (94, 56, 55), (123, 84, 57), (199, 87, 229), (279, 47, 8), (113, 110, 163), (247, 120, 59), (154, 30, 49), (218, 98, 252), (90, 56, 55), (154, 16, 193), (235, 56, 100), (122, 102, 20), (30, 84, 70), (258, 31, 215), (123, 69, 45), (286, 53, 300), (17, 23, 34), (189, 25, 48), (247, 6, 146), (54, 5, 44), (200, 68, 237), (37, 75, 78), (55, 45, 56), (92, 22, 46), (66, 42, 53), (197, 120, 59), (192, 99, 60), (287, 84, 57), (219, 52, 253), (261, 30, 49), (94, 84, 57), (237, 103, 62), (90, 84, 57), (100, 56, 55), (66, 24, 47), (285, 98, 252), (89, 2, 43), (247, 41, 149), (104, 1, 145), (189, 56, 55), (130, 113, 67), (89, 24, 47), (232, 56, 100), (288, 53, 302), (122, 51, 176), (258, 47, 8), (8, 80, 29), (104, 41, 149), (237, 32, 51), (237, 31, 50), (4, 67, 25), (90, 113, 67), (236, 69, 45), (108, 123, 159), (225, 22, 46), (233, 30, 49), (54, 52, 54), (227, 84, 57), (226, 110, 258), (66, 2, 43), (154, 49, 151), (231, 52, 99), (99, 120, 59), (261, 84, 57), (198, 84, 57), (100, 31, 50), (100, 32, 51), (112, 42, 53), (261, 85, 58), (259, 19, 287), (194, 14, 195), (253, 106, 63), (78, 100, 111), (123, 31, 50), (199, 110, 66), (66, 99, 60), (237, 119, 268), (1, 29, 5), (189, 69, 45), (261, 107, 64), (248, 81, 277), (256, 53, 283), (21, 114, 37), (154, 5, 44), (26, 92, 17), (233, 85, 58), (112, 22, 46), (154, 27, 148), (148, 84, 57), (117, 26, 171), (293, 47, 8), (189, 103, 62), (23, 39, 39), (192, 74, 153), (83, 78, 118), (247, 110, 154), (289, 56, 100), (86, 99, 60), (110, 60, 161), (217, 106, 63), (148, 71, 152), (148, 69, 45), (92, 119, 96), (261, 5, 44), (173, 73, 116), (95, 119, 131), (200, 69, 45), (83, 117, 119), (233, 52, 54), (54, 30, 91), (197, 110, 66), (197, 115, 230), (26, 70, 15), (154, 107, 64), (75, 79, 104), (236, 24, 47), (1, 101, 18), (192, 31, 50), (192, 32, 51), (233, 5, 44), (104, 106, 63), (90, 103, 62), (131, 52, 99), (261, 52, 54), (239, 56, 100), (68, 52, 99), (92, 45, 56), (306, 53, 312), (130, 103, 62), (55, 22, 46), (199, 32, 51), (189, 43, 150), (154, 85, 58), (104, 2, 43), (148, 119, 155), (130, 30, 49), (1, 72, 16), (155, 82, 198), (231, 56, 100), (200, 119, 239), (1, 9, 12), (1, 7, 4), (287, 32, 51), (287, 31, 50), (94, 31, 50), (95, 69, 45), (233, 113, 67), (197, 42, 53), (94, 69, 45), (90, 30, 128), (104, 24, 47), (154, 56, 55), (26, 50, 11), (55, 110, 92), (199, 31, 50), (86, 69, 45), (210, 98, 244), (200, 87, 229), (55, 107, 64), (237, 84, 57), (54, 103, 62), (258, 45, 56), (227, 32, 51), (227, 31, 50), (180, 56, 100), (261, 113, 67), (1, 50, 11), (154, 25, 48), (22, 84, 38), (197, 22, 46), (92, 120, 59), (29, 110, 66), (289, 52, 99), (84, 105, 121), (92, 32, 51), (96, 52, 99), (25, 84, 41), (104, 113, 67), (83, 120, 117), (189, 30, 49), (155, 14, 195), (8, 87, 28), (99, 45, 56), (196, 82, 227), (154, 47, 8), (233, 56, 55), (86, 84, 57), (112, 110, 66), (247, 107, 64), (246, 120, 59), (247, 22, 46), (236, 31, 50), (236, 32, 51), (261, 25, 48), (109, 123, 159), (237, 69, 45), (199, 68, 233), (38, 88, 81), (123, 32, 51), (148, 43, 191), (186, 31, 215), (117, 86, 169), (89, 45, 56), (249, 22, 88), (279, 102, 298), (68, 56, 100), (261, 56, 55), (131, 56, 100), (234, 110, 264), (86, 5, 44), (264, 102, 291), (233, 25, 48), (247, 42, 53), (154, 71, 152), (95, 84, 57), (285, 53, 299), (99, 99, 60), (89, 106, 63), (123, 119, 177), (193, 53, 224), (233, 103, 62), (141, 110, 186), (225, 102, 257), (121, 53, 174), (198, 107, 64), (287, 5, 44), (154, 43, 150), (189, 85, 58), (145, 19, 189), (236, 87, 229), (89, 110, 92), (95, 103, 62), (305, 53, 311), (55, 120, 59), (66, 106, 63), (197, 45, 56), (239, 52, 99), (130, 5, 44), (118, 98, 173), (192, 5, 44), (173, 3, 115), (261, 103, 62), (199, 84, 57), (100, 69, 45), (130, 52, 54), (198, 56, 55), (192, 84, 57), (162, 52, 99), (189, 107, 64), (252, 106, 63), (225, 18, 219), (200, 84, 57), (90, 52, 54), (236, 119, 266), (29, 109, 65), (54, 113, 67), (192, 69, 45), (192, 71, 152), (148, 110, 154), (189, 27, 148), (99, 42, 136), (227, 119, 259), (154, 103, 62), (89, 31, 50), (130, 85, 58), (54, 25, 48), (264, 47, 8), (29, 22, 46), (198, 31, 50), (198, 32, 51), (90, 85, 58), (100, 84, 57), (219, 56, 254), (276, 102, 296), (100, 85, 58), (291, 53, 304), (216, 106, 63), (90, 107, 64), (148, 31, 50), (148, 32, 51), (94, 119, 68), (287, 119, 301), (189, 52, 54), (200, 31, 50), (200, 32, 51), (232, 52, 99), (189, 49, 151), (29, 42, 53), (196, 14, 226), (198, 24, 47), (180, 52, 99), (262, 110, 290), (54, 56, 55)] Root = (-7, 'statement_list') CaseSensitivity = 0 gadfly-1.0.0/gadfly/store.py0100644000157700012320000013503307467104371014760 0ustar rjonestech""" Storage objects :Author: Aaron Watters :Maintainers: http://gadfly.sf.net/ :Copyright: Aaron Robert Watters, 1994 :Id: $Id: store.py,v 1.7 2002/05/11 02:59:05 richard Exp $: """ verbosity = 0 import sys, os, md5, types, marshal from kjbuckets_select import kjbuckets import serialize class StorageError(Exception): ''' error on checking of data integrity ''' # use md5 checksum (stub if md5 unavailable?) def checksum(string): return md5.new(string).digest() def recursive_dump(data, prefix="["): """for debugging""" if isinstance(data, types.StringType): #print prefix, data return p2 = prefix+"[" try: for x in data: recursive_dump(x, p2) except: print prefix, data def checksum_dump(data, file): """checksum and dump marshallable data to file""" storage = marshal.dumps(data) checkpair = (checksum(storage), storage) marshal.dump(checkpair, file) def checksum_undump(file): """undump marshallable data from file, checksum""" checkpair = marshal.load(file) (check, storage) = checkpair if checksum(storage)!=check: raise StorageError, "data load checksum fails" data = marshal.loads(storage) return data def backup_file(filename, backupname): """backup file, if unopenable ignore""" try: f = open(filename, "rb") except: return data = f.read() f.close() f = open(backupname, "wb") f.write(data) f.close() def del_file(filename): """delete file, ignore errors""" try: os.unlink(filename) except: pass class Database0: """ Quick and dirty in core database representation. """ # db.log is not None == use db.log to log modifications # set for verbose prints verbose = verbosity # set for read only copy readonly = 0 # set for temp/scratch db copy semantics is_scratch = 0 # set to add introspective tables introspect = 1 def __init__(self, shadowing=None, log=None): """ Dictionary of relations. """ verbose = self.verbose self.shadowing = shadowing self.log = log self.touched = 0 if log: self.is_scratch = log.is_scratch if shadowing and not log: raise ValueError, "shadowing db requires log" if verbose: print "Database0 init" if log: log.verbose = 1 if shadowing: # shadow structures of shadowed db self.rels = shadow_dict(shadowing.rels, Relation0.unshadow) self.datadefs = shadow_dict(shadowing.datadefs) self.indices = shadow_dict(shadowing.indices) else: self.rels = {} self.datadefs = {} self.indices = {} if self.introspect: self.set_introspection() def set_introspection(self): import introspection self["dual"] = introspection.DualView() self["__table_names__"] = introspection.RelationsView() self["__datadefs__"] = introspection.DataDefsView() self["__indices__"] = introspection.IndicesView() self["__columns__"] = introspection.ColumnsView() self["__indexcols__"] = introspection.IndexAttsView() def reshadow(self, db, dblog): """(re)make self into shadow of db with dblog""" self.shadowing = db self.log = dblog self.rels = shadow_dict(db.rels, Relation0.unshadow) self.datadefs = shadow_dict(db.datadefs) self.indices = shadow_dict(db.indices) def clear(self): """I'm not sure if database has circular structure, so this added""" self.shadowing = None self.log = None self.rels = {} self.datadefs = {} self.indices = {} def commit(self): """commit shadowed changes""" verbose = self.verbose if self.shadowing and self.touched: # log commit handled elsewhere #log = self.log #if log and not log.is_scratch: #if verbose: print "committing log" #self.log.commit(verbose) if verbose: print "committing rels" self.rels.commit(verbose) if verbose: print "committing datadefs" self.datadefs.commit(verbose) if verbose: print "committing indices" self.indices.commit(verbose) st = self.shadowing.touched if not st: if verbose: print "setting touched", self.touched self.shadowing.touched = self.touched elif verbose: print "shadowed database is touched" elif verbose: print "db0: commit on nonshadow instance" def __setitem__(self, name, relation): """bind a name (uppercased) to tuples as a relation.""" if self.indices.has_key(name): raise NameError, "cannot set index" self.rels[ name.upper() ] = relation if self.verbose: print "db0 sets rel", name def add_index(self, name, index): if self.rels.has_key(name): raise NameError, `name`+": is relation" self.indices[name] = index if self.verbose: print "db0 sets index", name def drop_index(self, name): if self.verbose: print "db0 drops index", name del self.indices[name] def __getitem__(self, name): if self.verbose: print "db0 gets rel", name return self.rels[name.upper()] def get_for_update(self, name): """note: does not imply updates, just possibility of them""" verbose = self.verbose if verbose: print "db0 gets rel for update", name shadowing = self.shadowing gotit = 0 name = name.upper() rels = self.rels if shadowing: if rels.is_shadowed(name): test = rels[name] # do we really have a shadow or a db copy? if test.is_shadow: gotit = 1 if not gotit: if shadowing.has_relation(name): test = shadowing.get_for_update(name) else: # uncommitted whole relation test = rels[name] gotit = 1 else: test = rels[name] gotit = 1 if self.readonly: raise ValueError, "cannot update, db is read only" elif test.is_view: raise ValueError, "VIEW %s cannot be updated" % name elif shadowing and not gotit: if verbose: print "db0: making shadow for", name if test.is_shadow: return test shadow = Relation0(()) shadow = shadow.shadow(test, self.log, name, self) rels[name] = shadow return shadow else: return test def __delitem__(self, name): if self.verbose: print "db0 drops rel", name del self.rels[name.upper()] def relations(self): return self.rels.keys() def has_relation(self, name): return self.rels.has_key(name) def getdatadefs(self): result = self.datadefs.values() # sort to make create tables first, eg result.sort() return result def add_datadef(self, name, defn, logit=1): """only log the datadef if logit is set, else ignore redefinitions""" dd = self.datadefs if logit and dd.has_key(name): raise KeyError, `name`+": already defined" if logit: self.touched = 1 dd[name] = defn def has_datadef(self, name): return self.datadefs.has_key(name) def drop_datadef(self, name): if self.verbose: print "db0 drops datadef",name dd = self.datadefs #print dd.keys() if not dd.has_key(name): raise KeyError, `name`+": no such element" del dd[name] def __repr__(self): l = [] l.append("INDICES: "+`self.indices.keys()`) for (name, ddef) in self.datadefs.items(): l.append("data definition %s::\n%s" % (name, ddef)) for (name, rel) in self.rels.items(): l.append(name + ":") l.append(rel.irepr()) return '\n\n'.join(l) def bindings(self, fromlist): """return (attdict, reldict, amb, ambatts) from fromlist = [(name,alias)...] where reldict: alias > tuplelist attdict: attribute_name > unique_relation amb: dict of dottedname > (rel, att) ambatts: dict of ambiguous_name > witness_alias """ rels = self.rels ambiguous_atts = {} ambiguous = {} relseen = {} attbindings = {} relbindings = {} for (name,alias) in fromlist: name = name.upper() alias = alias.upper() if relseen.has_key(alias): raise NameError, `alias` + ": bound twice in from list" relseen[alias]=alias try: therel = rels[name] except KeyError: raise NameError, `name` + " no such relation in DB" relbindings[alias] = therel for attname in therel.attributes(): if not ambiguous_atts.has_key(attname): if attbindings.has_key(attname): oldrel = attbindings[attname] oldbind = (oldrel, attname) ambiguous["%s.%s"%oldbind] = oldbind del attbindings[attname] ambiguous_atts[attname]=alias newbind = (alias, attname) ambiguous["%s.%s"%newbind] = newbind else: attbindings[attname] = alias else: newbind = (alias, attname) ambiguous["%s.%s"%newbind] = newbind return (attbindings, relbindings, ambiguous, ambiguous_atts) class File_Storage0: """quick and dirty file storage mechanism. relation names in directory/dbname.gfd contains a white separated list of relation names relations in directory/relname.grl contains sequence of marshalled tuples reps prefixed by marshalled list of atts """ verbose = verbosity def __init__(self, dbname, directory): """directory must exist.""" if self.verbose: print "fs0 init:", dbname, directory self.dbname = dbname self.directory = directory self.relation_implementation = Relation0 self.recovery_mode = 0 def load(self, forscratch=0): # if logfile is present, need to recover # error condition: fail to load relation, ddf, but no log file! logfile = self.logfilename() blogfile = self.backup_logfilename() verbose = self.verbose if verbose: print "fs0 load, checking", logfile try: testlog = open(logfile, "rb") if verbose: print "fs0: opened", testlog testlog.close() testlog = open(blogfile, "rb") testlog.close() testlog = None except: recovery_mode = self.recovery_mode = 0 if verbose: print "recovery not needed" else: recovery_mode = self.recovery_mode = 1 if verbose: print "FS0 RECOVERY MODE LOAD!" resultdb = Database0() resultdb.is_scratch = forscratch commands = self.get_initstatements() for command in commands: if verbose: print "fs0 evals", command command.relbind(resultdb) command.eval() for name in resultdb.relations(): if verbose: print "fs0 loads rel", name rel = resultdb[name] if rel.is_view: # don't need to load views continue rel.set_empty() try: data = self.get_relation(name) except StorageError, detail: raise StorageError, "load failure %s: %s" % (name, detail) attsin = tuple(data.attributes()) attsout = tuple(rel.attributes()) if attsin!=attsout: raise StorageError, "rel %s: atts %s don't match %s" % ( name, attsin, attsout) rel.add_tuples( data.rows() ) # in sync! rel.touched = 0 # db in sync resultdb.touched = 0 # do recovery, if needed if recovery_mode: if verbose: print "fs0 recovering from logfile", logfile # restart the log file only if db is not scratch restart = not forscratch Log = DB_Logger(logfile, blogfile) if verbose: Log.verbose=1 Log.recover(resultdb, restart) # do a checkpoint self.recovery_mode = 0 if restart and not forscratch: Log.shutdown() Log = None del_file(logfile) if verbose: print "FS0: dumping database" self.dump(resultdb) Log = resultdb.log = DB_Logger(logfile, blogfile) Log.startup() elif not forscratch: Log = DB_Logger(logfile, blogfile) Log.startup() resultdb.log = Log return resultdb def relfilename(self, name): #return "%s/%s.grl" % (self.directory, name) return os.path.join(self.directory, name+".grl") def backup_relfilename(self, name): #return "%s/%s.brl" % (self.directory, name) return os.path.join(self.directory, name+".brl") def relfile(self, name, mode="rb"): if self.recovery_mode: return self.getfile_fallback( self.backup_relfilename(name), self.relfilename(name), mode) else: name = self.relfilename(name) return open(name, mode) def getfile_fallback(self, first, second, mode): try: return open(first, mode) except: return open(second, mode) def get_relation(self, name): f = self.relfile(name, "rb") rel = self.relation_implementation(()) try: rel.load(f) except StorageError: if self.recovery_mode: f = open(self.relfilename(name), "rb") rel.load(f) else: raise StorageError, \ "fs: could not unpack backup rel file or rel file in recovery mode: "+name return rel def dbfilename(self): #return "%s/%s.gfd" % (self.directory, self.dbname) return os.path.join(self.directory, self.dbname+".gfd") def backup_dbfilename(self): #return "%s/%s.bfd" % (self.directory, self.dbname) return os.path.join(self.directory, self.dbname+".bfd") def logfilename(self): #return "%s/%s.gfl" % (self.directory, self.dbname) return os.path.join(self.directory, self.dbname+".gfl") def backup_logfilename(self): #return "%s/%s.glb" % (self.directory, self.dbname) return os.path.join(self.directory, self.dbname+".glb") def get_initstat_file(self, mode): if self.recovery_mode: return self.getfile_fallback( self.backup_dbfilename(), self.dbfilename(), mode) else: return open(self.dbfilename(), mode) def get_initstatements(self): f = self.get_initstat_file("rb") if self.verbose: print "init statement from file", f try: data = checksum_undump(f) except StorageError: if self.recovery_mode: f = open(self.dbfilename, "rb") data = checksum_undump(f) else: raise StorageError, \ "could not unpack ddf backup or ddf file in recovery mode: "+self.dbname f.close() stats = map(serialize.deserialize, data) return stats def dump(self, db): """perform a checkpoint (no active transactions!)""" # db should be non-shadowing db # first thing: back up the log backup_file(self.logfilename(), self.backup_logfilename()) verbose = self.verbose if verbose: print "fs0: checkpointing db" if db.is_scratch or db.readonly: # don't need to do anything. if verbose: print "fs0: scratch or readonly, returning" return log = db.log if log: log.commit() if verbose: print "DEBUG LOG TRACE" log.dump() log.shutdown() if db.touched: if verbose: print "fs0: db touched, backing up ddf file" backup_file(self.dbfilename(), self.backup_dbfilename()) relations = db.relations() for r in relations: rel = db[r] #print r if rel.touched: if verbose: print "fs0: backing up touched rel", r backup_file(self.relfilename(r), self.backup_relfilename(r)) for r in relations: if verbose: print "fs0: dumping relations now" self.dumprelation(r, db[r]) if verbose: print "fs0: dumping datadefs now" self.dumpdatadefs(db) # del of logfile signals successful commit. if verbose: print "fs0: successful dump, deleting log file" logfilename = self.logfilename() blogfilename = self.backup_logfilename() del_file(logfilename) del_file(blogfilename) if db.touched: if verbose: print "fs0: deleting backup ddf file" del_file(self.backup_dbfilename()) db.touched = 0 for r in relations: rel = db[r] if rel.touched: if verbose: print "fs0: deleting rel backup", r del_file(self.backup_relfilename(r)) rel.touched = 0 if verbose: print "fs0: restarting db log" log = db.log = DB_Logger(logfilename, blogfilename) log.startup() if verbose: print "fs0: dump complete" self.recovery_mode = 0 def dumprelation(self, name, rel, force=0): """set force to ignore the "touch" flag.""" # ignore self.backup_mode if (force or rel.touched) and not rel.is_view: fn = self.relfilename(name) if self.verbose: print "dumping touched rel", name, "to", fn f = open(fn, "wb") rel.dump(f) def dumpdatadefs(self, db, force=0): """set force to ignore the touch flag""" # ignore self.backup_mode if not (force or db.touched): return fn = self.dbfilename() f = open(fn, "wb") datadefs = db.getdatadefs() datadefsd = map(serialize.serialize, datadefs) #for (defn, ser) in map(None, datadefs, datadefsd): #print defn #print ser #dumps(ser) ### debug test checksum_dump(datadefsd, f) f.close() class Relation0: """quick and dirty in core relation representation. self.tuples contains tuples or 0 if erased. tuples must not move (to preserve indices) unless indices regenerate. """ is_view = 0 # Relation0 is not a view def __init__(self, attribute_names, tuples=None, filter=None): self.indices = kjbuckets.kjGraph() self.index_list = [] self.attribute_names = attribute_names if tuples is None: tuples = [] self.filter = filter self.set_empty() self.add_tuples(tuples) # indices map attname > indices containing att # relation to shadow and log (if non-null) self.log = None self.name = None # anonymous by default self.is_shadow = 0 self.touched = 0 def shadow(self, otherrelation, log, name, inshadowdb): """return structural replica of otherrelation (as self) for non-updatable relation (eg, view) may return otherrelation""" if otherrelation.is_view: # for now, assume VIEWS CANNOT BE UPDATED return otherrelation self.is_shadow = 1 self.shadow_of_shadow = otherrelation.is_shadow self.log = log self.name = name # don't make any updates permanent if set. self.tuples = otherrelation.tuples[:] self.attribute_names = otherrelation.attribute_names self.filter = otherrelation.filter for index in otherrelation.index_list: copy = index.copy() name = copy.name self.add_index(copy, recordtuples=0) # record in shadowdb, but don't log it inshadowdb.add_index(name, copy) #inshadowdb.add_datadef(name, copy, logit=0) self.touched = otherrelation.touched return self def unshadow(self): """make self into a replacement for shadowed, return self.""" if self.is_shadow: self.log = None self.is_shadow = self.shadow_of_shadow return self def dump(self, file): attributes = tuple(self.attributes()) rows = self.rows() newrows = rows[:] count = 0 for i in xrange(len(rows)): this = rows[i] if this is not None and not isinstance(this, types.IntType): newrows[count] = rows[i].dump(attributes) count = count + 1 newrows = newrows[:count] newrows.append(attributes) checksum_dump(newrows, file) def load(self, file): """checksum must succeed.""" rows = checksum_undump(file) attributes = rows[-1] self.attribute_names = attributes rows = rows[:-1] undump = kjbuckets.kjUndump for i in xrange(len(rows)): rows[i] = undump(attributes, rows[i]) self.set_empty() self.add_tuples(rows) # in sync with disk copy! self.touched = 0 def add_index(self, index, recordtuples=1): """unset recordtuples if the index is initialized already.""" # does not "touch" the relation index_list = self.index_list indices = self.indices atts = index.attributes() for a in atts: indices[a] = index if recordtuples: (tuples, seqnums) = self.rows(1) index.clear() if tuples: index.add_tuples(tuples, seqnums) index_list.append(index) def drop_index(self, index): # does not "touch" the relation name = index.name if verbosity: print "rel.drop_index", index print "...", self.indices, self.index_list indices = self.indices for a in index.attributes(): # contorted since one index be clone of the other. aindices = indices.neighbors(a) for ind in aindices: if ind.name == name: indices.delete_arc(a, ind) theind = ind # the (non-clone) index ought to have been found above... self.index_list.remove(theind) def choose_index(self, attributes): """choose an index including subset of attributes or None""" kjSet = kjbuckets.kjSet atts = kjSet(attributes) #print "choosing index", atts indices = (atts * self.indices).values() choice = None for index in indices: indexatts = index.attributes() #print "index atts", indexatts iatts = kjSet(indexatts) if iatts.subset(atts): if choice is None: #print "chosen", index.name choice = index lchoice = len(choice.attributes()) else: if index.unique or lchoice tuples self.index = {} self.dseqnums = {} def __repr__(self): un = "" if self.unique: un="UNIQUE " return "%sindex %s on %s" % (un, self.name, self.atts) def copy(self): """make a fast structural copy of self""" result = Index(self.name, self.atts, unique=self.unique) rindex = result.index rdseqnums = result.dseqnums myindex = self.index mydseqnums = self.dseqnums for k in myindex.keys(): rindex[k] = myindex[k][:] for k in mydseqnums.keys(): rdseqnums[k] = mydseqnums[k][:] return result def attributes(self): return self.atts def matches(self, tuple, translate=None): """return (tuples, seqnums) for tuples matching tuple (with possible translations""" if translate: tuple = translate * tuple atts = self.atts dump = tuple.dump(atts) index = self.index if index.has_key(dump): return (index[dump], self.dseqnums[dump]) else: return ((), ()) def clear(self): self.index = {} self.dseqnums = {} def add_tuples(self, tuples, seqnums): unique = self.unique atts = self.atts index = self.index dseqnums = self.dseqnums test = index.has_key for i in xrange(len(tuples)): tup = tuples[i] seqnum = seqnums[i] dump = tup.dump(atts) #print self.name, dump if test(dump): bucket = index[dump] #print "self", self #print "unique", unique #print "bucket", bucket if unique and bucket: raise StorageError, "uniqueness violation: %s %s" %( dump, self) bucket.append(tup) dseqnums[dump].append(seqnum) else: index[dump] = [tup] dseqnums[dump] = [seqnum] def erase_tuples(self, seqnums, all_tuples): # all_tuples must be internal rel tuple list atts = self.atts index = self.index dseqnums = self.dseqnums for seqnum in seqnums: tup = all_tuples[seqnum] dump = tup.dump(atts) index[dump].remove(tup) dseqnums[dump].remove(seqnum) class shadow_dict: """shadow dictionary. defer & remember updates.""" verbose = verbosity def __init__(self, shadowing, value_transform=None): self.shadowed = shadowing shadow = self.shadow = {} self.touched = {} for key in shadowing.keys(): shadow[key] = shadowing[key] self.value_transform = value_transform # defeats inheritance! careful! self.values = shadow.values self.items = shadow.items self.keys = shadow.keys self.has_key = shadow.has_key def is_shadowed(self, name): return self.touched.has_key(name) def __len__(self): return len(self.shadow) def commit(self, verbose=0): """apply updates to shadowed.""" verbose = verbose or self.verbose if self.touched: shadowed = self.shadowed shadow = self.shadow value_transform = self.value_transform keys = shadowed.keys() if verbose: print "shadowdict oldkeys", keys for k in keys: del shadowed[k] keys = shadow.keys() if verbose: print "shadowdict newkeys", keys for k in shadow.keys(): value = shadow[k] if value_transform is not None: value = value_transform(value) shadowed[k] = value self.touched = {} def __getitem__(self, key): return self.shadow[key] def __setitem__(self, key, item): if not isinstance(key, types.StringType): raise "nonstring", key if item is None: raise "none set", (key, item) self.touched[key] = 1 self.shadow[key] = item def __delitem__(self, key): self.touched[key] = 1 del self.shadow[key] # stored mutations on relations class Add_Tuples: """stored rel.add_tuples(tuples)""" def __init__(self, name): self.to_rel = name self.indb = None def initargs(self): return (self.to_rel,) def set_data(self, tuples, rel): """store self.data as tuple with tuple[-1] as to_rel, rest data""" attributes = tuple(rel.attributes()) ltuples = len(tuples) data = list(tuples) for i in xrange(ltuples): tdata = tuples[i].dump(attributes) data[i] = tdata self.data = tuple(data) def __repr__(self): datarep = map(repr, self.data) datarep = '\n '.join(datarep) return "add tuples to %s\n %s\n\n" % (self.to_rel, datarep) def marshaldata(self): return self.data def demarshal(self, data): self.data = data def relbind(self, db): self.indb = db def eval(self, dyn=None): """apply operation to db""" db = self.indb data = self.data name = self.to_rel rel = db[name] attributes = tuple(rel.attributes()) tuples = list(data) undump = kjbuckets.kjUndump for i in xrange(len(tuples)): tuples[i] = undump(attributes, tuples[i]) rel.add_tuples(tuples) class Erase_Tuples(Add_Tuples): """stored rel.erase_tuples(seqnums)""" def set_data(self, seqnums, rel): seqnums = list(seqnums) self.data = tuple(seqnums) def __repr__(self): return "Erase seqnums in %s\n %s\n\n" % (self.to_rel, self.data) def eval(self, dyn=None): db = self.indb seqnums = self.data name = self.to_rel rel = db[name] rel.erase_tuples(seqnums) class Reset_Tuples(Add_Tuples): """stored rel.reset_tuples(tups, seqnums)""" def set_data(self, tups, seqnums, rel): attributes = tuple(rel.attributes()) dtups = list(tups) for i in xrange(len(dtups)): dtups[i] = dtups[i].dump(attributes) self.data = (tuple(dtups), tuple(seqnums)) def __repr__(self): (dtups, seqnums) = self.data pairs = map(None, seqnums, dtups) datarep = map(repr, pairs) datarep = ' \n'.join(datarep) return "Reset tuples in %s\n %s\n\n" % (self.to_rel, datarep) def eval(self, dyn=None): db = self.indb (dtups, seqnums) = self.data tups = list(dtups) rel = db[self.to_rel] attributes = tuple(rel.attributes()) undump = kjbuckets.kjUndump for i in xrange(len(dtups)): tups[i] = undump(attributes, dtups[i]) rel.reset_tuples(tups, seqnums) # Log entry tags START = "START" COMMIT = "COMMIT" ABORT = "ABORT" UNREADABLE = "UNREADABLE" class Transaction_Logger: """quick and dirty Log implementation per transaction.""" verbose = verbosity def __init__(self, db_log, transactionid, is_scratch=0): self.db_log = db_log self.transactionid = transactionid # ignore all operations if set self.is_scratch = is_scratch self.dirty = 0 self.deferred = [] def reset(self): self.deferred = [] def __repr__(self): return "Transaction_Logger(%s, %s, %s)" % ( self.db_log, self.transactionid, self.is_scratch) def log(self, operation): verbose = self.verbose tid = self.transactionid if not self.is_scratch: self.deferred.append(operation) if verbose: print "tid logs", tid, operation def flush(self): verbose = self.verbose if not self.is_scratch: tid = self.transactionid deferred = self.deferred self.deferred = [] if self.db_log: for operation in deferred: self.db_log.log(operation, tid) self.dirty = 1 elif verbose: print "scratch log ignored", tid, operation def commit(self, verbose=0): verbose = self.verbose or verbose tid = self.transactionid if verbose: print "committing trans log", tid if self.is_scratch: if verbose: print "scratch commit ignored", tid return if not self.dirty: if verbose: print "nondirty commit", tid return self.flush() self.db_log.commit(verbose, tid) if verbose: print "transaction is considered recoverable", tid # def __setattr__(self, attr, value): # if attr == 'dirty': # if hasattr(self, 'db_log'): # print 'TL(%x): %s set!'%(id(self), attr), `value`, `self.db_log` # else: # print 'TL(%x): %s set!'%(id(self), attr), `value`, 'no db_log' # elif attr == 'db_log': # # import traceback;traceback.print_stack() # print 'TL(%x): %s set!'%(id(self), attr), `value` # self.__dict__[attr] = value class DB_Logger: """quick and dirty global db logger.""" verbose = verbosity is_scratch = 0 def __init__(self, filename, backupname): self.filename = filename # backup name is never kept open: existence indicates log in use. self.backupname = backupname self.file = None self.dirty = 0 if self.verbose: print id(self), "created DB_Logger on", self.filename def __repr__(self): return "DB_Logger(%s)" % self.filename def startup(self): if self.verbose: print id(self), "preparing", self.filename # open happens automagically #self.file = open(self.filename, "wb") self.clear() self.dirty = 0 def shutdown(self): if self.verbose: print id(self), "shutting down log", self.filename file = self.file if file: file.close() self.file = None def clear(self): if self.verbose: print id(self), "clearing" self.shutdown() del_file(self.filename) def restart(self): if self.verbose: print id(self), "restarting log file", self.filename if self.file is not None: self.file.close() self.file = open(self.filename, "ab") dummy = open(self.backupname, "ab") dummy.close() self.dirty = 0 def clear_log_file(self): if self.verbose: print id(self), "clearing logfile", self.filename if self.file is not None: self.file.close() self.file = None del_file(self.filename) del_file(self.backupname) self.dirty = 0 def log(self, operation, transactionid=None): """transactionid of None means no transaction: immediate.""" file = self.file if file is None: self.restart() file = self.file verbose = self.verbose serial = serialize.serialize(operation) data = (transactionid, serial) if verbose: print id(self), "logging:", transactionid print operation checksum_dump(data, file) self.dirty = 1 def commit(self, verbose=0, transactionid=None): """add commit, if appropriate, flush.""" verbose = self.verbose or verbose if not self.dirty and transactionid is None: if verbose: print "commit not needed", transactionid return elif verbose: print "attempting commit", transactionid if transactionid is not None: self.log( COMMIT, transactionid ) if verbose: print "committed", transactionid if verbose: print "flushing", self.filename self.file.flush() self.dirty = 0 def recover(self, db, restart=1): verbose = self.verbose filename = self.filename if verbose: print "attempting recovery from", self.filename file = self.file if file is not None: if verbose: print "closing file" self.file.close() self.file = None if verbose: print "opens should generate an error if no recovery needed" try: file = open(filename, "rb") file2 = open(self.backupname, "rb") except: if verbose: print "no recovery needed:", filename print sys.exc_type, sys.exc_value sys.exc_traceback = None return file2.close() if verbose: print "log found, recovering from", filename records = self.read_records(file) if verbose: print "scan for commit records" commits = {} for (i, (tid, op)) in records: if op==COMMIT: if verbose: print "transaction", tid, "commit at", i commits[tid] = i elif verbose: print i, tid, "operation\n", op if verbose: print commits, "commits total" if verbose: print "applying commited operations, in order" committed = commits.has_key for (i, (tid, op)) in records: if tid is None or (committed(tid) and commits[tid]>i): if isinstance(op, types.StringType): if verbose: print "skipping marker", tid, op if verbose: print "executing for", tid, i print op #### Note: silently eat errors unless verbose ### (eg in case of table recreation...) ### There should be a better way to do this!!! try: op.relbind(db) op.eval() except: if verbose: print "error", sys.exc_type, sys.exc_value print "binding or evaluating logged operation:" print op elif verbose: print "uncommitted operation", tid, i op if verbose: print "recovery successful: clearing log file" self.clear() if restart: if verbose: print "recreating empty log file" self.startup() def read_records(self, file): """return log record as (index, (tid, op)) list""" verbose = self.verbose if verbose: print "reading log records to error" records = {} count = 0 while 1: try: data = checksum_undump(file) except: if verbose: print "record read terminated with error", len(records) print sys.exc_type, sys.exc_value break (transactionid, serial) = data operation = serialize.deserialize(serial) records[count] = (transactionid, operation) if verbose: print count, ": read for", transactionid print operation count = count+1 if verbose: print len(records), "records total" records = records.items() records.sort() return records def dump(self): verbose = self.verbose self.shutdown() print "dumping log" self.verbose = 1 try: file = open(self.filename, "rb") except: print "DUMP FAILED, cannot open", self.filename else: self.read_records(file) self.verbose = verbose self.restart() # # $Log: store.py,v $ # Revision 1.7 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.6 2002/05/08 00:49:00 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.5 2002/05/08 00:31:52 richard # More cleanup. # # Revision 1.4 2002/05/07 23:19:02 richard # Removed circular import (at import time at least) # # Revision 1.3 2002/05/07 14:36:10 anthonybaxter # oops. silliness fixed # # Revision 1.2 2002/05/07 04:38:03 anthonybaxter # remove annoying try/except that was hiding an error. # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # gadfly-1.0.0/kjbuckets/0040755000157700012320000000000007512763043013765 5ustar rjonestechgadfly-1.0.0/kjbuckets/2.0/0040755000157700012320000000000007512763043014264 5ustar rjonestechgadfly-1.0.0/kjbuckets/2.0/kjbuckets.pyd0100644000157700012320000016000007465607564016775 0ustar rjonestechMZÿÿ¸@к´ Í!¸LÍ!This program cannot be run in DOS mode. $õ”Yù±õ7ª±õ7ª±õ7ªÓê$ª³õ7ªYê=ª´õ7ª±õ6ªœõ7ªNÕ3ª²õ7ªYê<ª°õ7ªYê3ª²õ7ªRich±õ7ªPELŽÆž&à! €P[€à°”N¸PÐ ¸.textq€ `.rdataþ@@.datah  0 @À.relocr ÐÐ@Bƒì ‹D$$SUVW3í3ÿ3Ûpƒý‰l$‰t$~¡0À…ÀtUhô¤ÿ$ƒÄ‹L$8…Étމƒù€‹D$‰D$4ëz‹~‹‰t$4‰|$ƒûÿt?T$D$RL$$PT$,QD$4R‹T$ L$ƒú9‹L$õ+ÆÁ‹ƒùw"ÿ$„,‹L$¶Á‹ƒùw ÿ$”,x¡0À‹7…Àt‹„$œPPVh°¦ÿ$‹T$$ƒÄ;´$œ…hÿÿÿ¡0À…Àt5‹T$‹´$œRVh̦ÿ$¡0ÀƒÄ …ÀtVhä¦ÿ$ƒÄë‹´$œ‹l$…ít3~JƒýEõ+Æ‹t$‹ Æƃùw7ÿ$¤,X‰\$ ‰\$(ë' ¶‹t$΋ ΃ùwÿ$´,‹\$ ‹t$ë‹\$ ¡0À‹|$p…ÀtWhô¦ÿ$ƒÄ…ít"~<ƒý7ý+׋ ÖÖƒùw#ÿ$Ä,¿‹ Æƃùwÿ$Ô,ƒÀ‰D$4¡0À‹|$…ÀtWh§ÿ$ƒÄ…ít&~_ƒýZ ý+Ï‹Î4΃ø‰t$$wBÿ$…ä, ¿‹Î4΃ø‰t$$w)ÿ$…ô,‹T$$z‰|$L‰|$0ë‹D$$x‰|$L‰|$0ë‹|$L‹L$(‹D$0…í‹‹L$$‰Çt~ƒý¹ ë¹ ‹óó¥‹D$4‹T$‹l$x3ÿs3ɉ‹D$ƒÿ‹Þ~9 0ÀtWh§ÿ$‹D$ƒÄ3É;Át!~ƒø‹ë;Á~ƒøÇEÿÿÿÿ‰M‰Më ‹îÇÿÿÿÿ‰NGƒÃ ƒÆƒÿ|Ä‹T$(‹Œ$œ‰l$x‰ éöƒø…æ …ÉtWhT§ÿ$‹ 0ÀƒÄ‹„$ˆ‹¼$œ3ö‹‹h‹@‰\$(…ɉD$pt'Whp§ÿ$¡0ÀƒÄ…ÀtWhˆ§ÿ$ƒÄ…Ût1~JƒûE ý+Ï‹DÍL̓ø‰L$hw/ÿ$…-q‰t$\ë¿‹DÕLÕƒø‰L$hw ÿ$…-‹L$h‹ƒøt‹ 0À…ÉtPh˜§ÿ$ƒÄ‹6;÷‰t$4u$‹„$ˆ‹T$(3ÿ3ÉÇ@ÿÿÿÿ‹D$\p‹Þé•¡0À3Û…Àt%‹5$WhÀ§ÿÖ¡0ÀƒÄ…Àt WhاÿփċD$(…Àt$~IƒøD ý+Ï‹DÍ\̓øw.ÿ$…$- ¿‹DÍ\̓øwÿ$…4-S‰T$PëC‰D$P‹D$P‹0‹x¡0À…ÀtVhè§ÿ$ƒÄ‹T$(…Òt$~@ƒú; õ+ÎDÍ‹L̓ùw%ÿ$D-¶‹LÅDŃùwÿ$T-ƒÀ‰D$T¡0À…ÀtWhø§ÿ$‹T$0ƒÄ…Òt$~@ƒú; ý+ÏDÍ‹L̓ùw%ÿ$d-¿‹LÅDŃùwÿ$t-ƒÀ‰D$X‹L$T‹D$X‰y‹Œ$œ‰0‹D$p;ÁÇu‹Œ$ˆ‹D$4‰A‹D$\3ÿ3Ép‹Þë‹T$(3Ƀÿ~9 0ÀtWh¨ÿ$‹T$0ƒÄ3É;ÑtŽ€ƒú{‹Ã‰D$tëw‰t$tÇÿÿÿÿ‰NGƒÃ ƒÆƒÿ|¨¡0À‹´$œ…ÀtVhl¨ÿ$ƒÄ‹D$…ÀtQ~gƒøb‹T$ õ+΋Ê ÊƒøwNÿ$…„-Alj0ëL‹D$t;Ñ~’ƒúÇÿÿÿÿ‰H‰Hé|ÿÿÿ‹L$¶ Á‹ƒøw ÿ$…”-‹L$`‹D$lj0ë…ÿ}‹”$˜Ç„$ ÿÿÿÿ‰”$œ‹Œ$¨‹„$˜‹´$œ‰¡0À3Û;Ãt%‹=$Vh|¨ÿס0ÀƒÄ;Ãt Vh¨ÿ׃Ä‹T$;Ót2~HƒúC‹L$õ+ÆÁ‹ƒùw,ÿ$¤-ƒÀ‰D$8‰D$,ë‹L$¶Á‹ƒùwÿ$´-‹D$8‹Œ$¬‰\$‰1‹Œ$ Aƒù‰L$0N‹¬$´tÈ I‰\$‰t$4Dˆ‰D$`…Òt~ƒú‹L$`‰Më‹T$4‰U‹„$¼‹L$3ö3ÿ3ۅɉt~ƒù‹E‹0‹x‹Xë ‹E‹x‹0‹ß‹Áƒè„0H„ŽH…é‹”$¤ƒúÿu ;ò…‡ëƒþÿu‹„$”¿;Ç„Ëé:;ò…2‹„$Œ…À„#‹ÈWQÿ ƒÄ…À…‹„$…À„‡PSÿ ƒÄ…À„téç‹„$¤ƒøÿu;ð…ƒ¼$”u.ƒþÿ„_;Æu!‹”$ŒWRÿ ƒÄ…À„A‹„$¤ƒøÿ„’;Æ…Š‹„$Œ…ÀtWPÿ ƒÄ…Àup‹„$…À„¥PSÿ ƒÄ…À„’ëLƒþÿu!‹Œ$¤;Ît<‹„$”¾;Æ„ë&‹„$¤ƒøÿt^;Æu‹Œ$ŒWQÿ ƒÄ…ÀtD‹D$0‹t$`‹T$4@ƒÆ ƒÂƒø‰D$0‰t$`‰T$4}5‹T$éüýÿÿ¡0À…Àth ©ÿ$ƒÄÇD$‹Œ$°‹D$0‰‹T$,‹Œ$˜‹;Á„Z‹ð‹D$…ÀÇ„$ ÿÿÿÿ…é×üÿÿƒúÿ„Ï‹„$Œ…À„À‹„$…À„±¡0À…ÀthH©ÿ$‹”$¨ƒÄ‹E‹H‹p…Ét‹H‰u‹QQÿR‹”$¨ƒÄ…öt‹H‰u‹FVÿP‹”$¨ƒÄ‹M3À‰|$Çÿÿÿÿ‹M‰A‹M‰A‹E‰‹M‹„$Œ‰A‹B‰‹U‹„$‰B‹A‰‹„$¼‰8éèþÿÿ¡0À…Àth4©ÿ$ƒÄÇD$ÿÿÿÿéÄþÿÿ¡0À…À„¯þÿÿhœ©éœþÿÿ‹„$Œ…ÀtÑ‹„$…Àtƃþÿu‹Œ$¼‹øÇë$‹¼$WSÿ ƒÄ…Àt‹”$ˆÇB‹E3Û‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹UÇÿÿÿÿ‹E‰X‹M‹„$¤‰Y‹U‰‹M‹„$Œ‰A‹A‰‹U‰z‹@‰éÝýÿÿ‹„$Œ…Àu3ƒùÿt.‹0À…Òt$‹„$¤PhÀ¨ÿ$‹„$”‹Œ$¬ƒÄ‹U…À‰ ‹M‰Atÿ‹”$¼‰t$‰2é„ýÿÿ‹D$…À…¹ƒ¼$”…” ‹0À…Òthªÿ$‹0ÀƒÄ‹´$ˆƒÏÿ9~…6…Òth@ªÿ$ƒÄ‹F …QVè7ƒÄ…À„þ‹„$Œ‰¼$ ‰¼$œ‰¼$˜3öé]îÿÿ‹¤h,¥‹Pÿ¨ƒÄƒÈÿ_^][ƒÄtÃhH¨ë)¡0À…Àt‹D$Ph´©ÿ$ƒÄ‹D$…À}"hÈ©‹ ¤‹Rÿ¨ƒÄƒÈÿ_^][ƒÄtË„$¼ƒ8t ‹„$ˆÿ@ ¡0À…Àt@‹Œ$¼‹„$¸‹‹R‹”$´Q‹Œ$´‹‹P‹„$´R‹Qhä©ÿ$ƒÄ_^]¸[ƒÄtËÇ_^][ƒÄtË„$ˆ3ö…Ò‹(‹X‹x‰l$8‰\$t‰|$t.Wh\ªÿ$‹0ÀƒÄ…ÒtWhtªÿ$‹0ÀƒÄ…ít+~>ƒý9ý+Ç‹ ÃÃùw,ÿ$Ä-p‰t$`ë ¿Ë‹ ˃ùwÿ$Ô-‹„$€‹ƒøt…ÒtPh„ªÿ$‹0ÀƒÄ‹6;÷‰t$xu‹”$ˆ‹D$3ÉÇBÿÿÿÿ‹T$8éj3í…Òt,‹5$Wh¬ªÿÖ‹0ÀƒÄ…ÒtWhĪÿÖ‹0ÀƒÄ‹D$8…Àt'~:ƒø5ý+Ç,ËÃøw!ÿ$…ä-Eë ¿‹Ë,˃øwÿ$…ô-‹„$€‹0‹x…ÒtVhÔªÿ$‹0ÀƒÄ‹D$8…Àt'~:ƒø5õ+Æ‹ ÃÃùw!ÿ$.Xë ¶Ë‹ ˃ùwÿ$.‹œ$€…ÒtWhäªÿ$ƒÄ‹T$8…Òt*~@ƒú;‹L$tý+ÇÁ‹ƒùw$ÿ$$.ƒÀë‹L$t¿Á‹ƒùwÿ$4.‹„$€‰{‰0‹D$3É;À‰Mu‹¼$ˆ‹t$x‰w‹t$`‹œ$€3ÿƒÆƒÿ‹î~!9 0ÀtWhôªÿ$‹T$@‹D$ƒÄ3É;Ñt ~ƒú‹Ý;Ñ~ƒúÇÿÿÿÿ‰K‰Kë ‹ÞÇÿÿÿÿ‰NGƒÅ ƒÆƒÿ|Å9 0Àt‹T$Rh4«ÿ$‹D$ƒÄ3É‹l$;ét-~Cƒý>‹T$ Å+È‹Ê Êƒøw*ÿ$…D.A‹Ðë*‹L$€ Á‹ƒøw ÿ$…T.‹L$|‹„$€‹”$€Ç‹Œ$˜‰ ‹T$,‹L$p‰ 3ɉL$`‰t$d‰t$hƒù~¡0À…ÀtQhD«ÿ$‹l$ƒÄ…ít~ƒý‹”$´‹D$h‰ë‹”$´‰2ë‹”$´‹Œ$¼3ö3ÿ3Û…í‰t~ƒý‹‹0‹x‹Xë ‹‹x‹0‹ß‹Åƒè„üHtsH…y‹¬$¤ƒýÿ„Uƒþÿ„u;õ…‹„$Œ…À„‹ÐWRÿ ƒÄ…À…ë‹„$…À„-PSÿ ƒÄ…À„éÄ‹„$¤ƒøÿ„%ƒþÿ„%;Æ…£‹„$ŒWPÿ ƒÄ…À„‹„$¤;Æu~‹„$Œ…Àts‹ÈWQÿ ƒÄ…Àub‹„$…À„ðPSÿ ƒÄ…À„Ýë>‹„$¤ƒþÿu ƒøÿ…jë'ƒøÿ„»;Æu‹”$ŒWRÿ ƒÄ…À„‹D$`‹t$h‹T$d@ƒÆ ƒÂƒø‰D$`‰t$h‰T$d¬‹l$‹ò‹Èé þÿÿƒþÿ„ô¡0À…À„RhÄ«ÿ$ƒÄé?ƒýÿ„Ë‹„$Œ…À„¼‹„$…À„­¡0À…Àthì«ÿ$‹¬$¨‹”$¸ƒÄ‹‹H‹p…Ét‹H‰u‹AQÿP‹¬$¨‹”$¸ƒÄ…öt‹H‰u‹NVÿQ‹¬$¨‹”$¸ƒÄ‹Çÿÿÿÿ‹ 3À‰A‹ ‰A‹‰(‹ ‹„$Œ‰A‹0F‰0‹‹„$‰Bé\¡0À…ÀthØ«ÿ$ƒÄƒÈÿéQ¡0À…À„?h@¬ÿ$ƒÄé,ƒþÿ…#‹„$Œ…ÀtÄ‹„$…Àt¹ƒþÿu‹Œ$¼‹èÇë$‹¬$USÿ ƒÄ…Àt‹”$ˆÇB‹¼$´3Û‹‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹Çÿÿÿÿ‹‰X‹‹„$¤‰Y‹‰‹‹„$Œ‰A‹A‰‹‰j‹E@‰Eë\‹„$Œ…Àu.‹ 0À…Ét$‹„$¤Phd«ÿ$‹”$¼‹„$”ƒÄ‹ ‹´$¤…À‰1‹‰Btÿ‹„$¼Ç¸‹Œ$°‹T$`…À‰}!¡¤hX¬‹Qÿ¨ƒÄ_^]ƒÈÿ[ƒÄtË”$¬‹D$‹Œ$¼‰ƒ9t ‹„$ˆÿ@ _^]¸[ƒÄtÃ_^]3À[ƒÄtÃgˆggˆgˆˆ‚‚‚‚‚‚‚‚i`iii`iiÔÔÔÔÔÔÔÔ66666666yyyyyyyy########®®®®®®®®%%%%%%““““““““…|………|……ððððððððRRRRRRRR========########!$!$!$!$!$!$!$!$é$é$é$é$é$é$é$é$R%R%R%R%R%R%R%R%³%³%³%³%³%³%³%³%¹&¹&¹&¹&¹&¹&¹&¹&¡0À…Àtht¬ÿ$ƒÄV‹t$‹F‹‹VPQRèƒÄ ÇF ¸^Ãì¡0ÀV3ö;Ɖt$t‹D$$Ph|¬ÿ$ƒÄ‹D$$‰t$ ;Æެ‹D$S‹\$$UW‹ø‹è‰|$ ‰l$ë‹t$¡0À…Àt'Vh¬ÿ$¡0ÀƒÄ…ÀtVh ¬ÿ$ƒÄ‹t$,…öt(~HƒþC‹E‰l$ƒøw7ÿ$…°0Më'U‰T$(ë"‹‰|$ƒøwÿ$…À0G‰D$(ëO‰L$(‹T$‹ƒøt ƒø…Ó‹D$(3ÿƒÿh‰l$~¡0À…ÀtWh°¬ÿ$ƒÄ…öt~ ƒþ‹\$…ö~JƒþE‹C‹së‹E‹Ý‹ð…Àt‹I‰u ‹HPÿQƒÄ…öt‹D$,…Àt0‹H‰u ‹VVÿRƒÄ‹t$,3À;ðt~$ƒþÇÿÿÿÿ‰C‰Cë‹t$,ÇÿÿÿÿÇC‹T$GƒÂ ƒÅƒÿ‰T$Œ_ÿÿÿ‹l$‹|$ ‹D$‹L$0@ƒÅ8ƒÇ(;Á‰D$‰l$‰|$ Œpþÿÿ_][¡0À^…Àthð¬ÿ$ƒÄƒÄËÿZ/U/Z/Z/~/u/~/~/ƒìSUV‹t$(3íW‹‹^‹N‹V‹|$0‰D$¡0À‰l$;ʼn\$‰L$‰T$$ÇD$ tWh­ÿ$ƒÄWVè9ƒÄƒøt _^]3À[ƒÄÃ;Ýޝ‹\$‹û¡0À…ÀtUh­ÿ$ƒÄ‹L$…Ét'~GƒùB‹‰|$ƒøw7ÿ$…82G‰D$,ë'W닉\$ƒøwÿ$…H2C‰D$,ëS‰T$,‹D$‹ƒøtƒøuVQ‹L$4Qè4ÞÿÿƒÄ …Àt‹D$EƒÇ8ƒÃ(;èŒaÿÿÿëÇD$ ‹T$‹D$‹L$RPQè|‹T$0ƒÄ ‰V‹t$ …öu¡0À…Àth(­ÿ$ƒÄ‹Æ_^][ƒÄÉ1€1‰1‰1©1 1©1©1ƒì0‹L$8S‹Á‹\$8™ƒâU‹+ÂV‹ð¡0ÀWÁþF‰l$4…ÀÇD$HtQh@­ÿ$ƒÄ¸;Æ}D…Àô;Æ} _^]3À[ƒÄ0Ã3ö<;î‰C‰{‰s t,~ ƒýWj8ë#¡¤hX­‹Qÿ¨ƒÄ3À_^][ƒÄ0ÃWj(ÿ ƒÄ;ƉD$,uÿ”_^]3À[ƒÄ0Ã…ÿ~L‹\$,‹ë¡0À…ÀtVh€­ÿ$ƒÄ‹D$4…Àt ~ƒø ‰\$Hë‰l$H‹T$HFƒÃ8ƒÅ(;÷Ç|º‹D$D‹L$,wÿ…ö‰HÇ@ÿÿÿÿŒ–ë‹D$D‹‹h‹X¡0À3ÿ‰T$H;lj\$<‰|$4‰|$t+Vh­ÿ$¡0ÀƒÄ…ÀtVh¤­ÿ$ƒÄ‹T$H3À;Ðt~ƒú õ+Îë ¶|͉|$49„¯‹ 0À‰D$;ȉD$8tVh´­ÿ$ƒÄ¡0À3Û…ÀtVhÄ­ÿ$ƒÄ‹T$H‹þ¡0À‰|$0…ÀtWhÔ­ÿ$‹T$PƒÄ…Òt$~<ƒú7ý+Ç‹LÅDŃùw!ÿ$49 ¿DÍ‹L̓ùw ÿ$D9X¡0À‹;…ÀtVVWhä­ÿ$‹T$XƒÄ;þ…vÿÿÿ¡0À…ÀtVh®ÿ$‹T$PƒÄ…Òt$~@ƒú;õ+Æ‹LÅDŃùw%ÿ$T9 ¶DÍ‹L̓ùwÿ$d9ƒÀ‰D$¡0À…Àt‹T$0Rh®ÿ$‹T$PƒÄ‹D$0…Òt)~>ƒú9 Å+ÈDÍ‹L̓ùw#ÿ$t9ƒÀ뀋LÕDÕƒùwÿ$„9‹D$8‹L$‹|$4‹\$<‹‰‹T$H‰1ƒûÿÇ…d¡0À3ÿ…Àt)VVVh ®ÿ$¡0ÀƒÄ…ÀtVhL®ÿ$ƒÄ‹\$H…Ût$~IƒûDõ+Æ|Å‹DŃøw.ÿ$…”9¶|Å‹DŃøwÿ$…¤9O‰L$ëW‰T$¡0À…ÀtVh\®ÿ$ƒÄ…Ût$~@ƒû;õ+Æ‹LÅDŃùw%ÿ$´9 ¶DÍ‹L̓ùwÿ$Ä9ƒÀ‰D$¡0À…ÀtVhl®ÿ$ƒÄ…Ût$~@ƒû;õ+Ö‹LÕDÕƒùw%ÿ$Ô9¶‹LÅDŃùwÿ$ä9ƒÀ‰D$ ‹D$‹L$‹T$ ljp‰0‰q‰2éç¡0À…ÀtSh|®ÿ$‹T$PƒÄ…Òt$~@ƒú;Ý+ËLÅDŃùw%ÿ$ô9 ›DÍ‹L̓ùwÿ$:ƒÀ‰D$¡0À3ÿ…Àt3‹T$S‹PVhŒ®ÿ$¡0ÀƒÄ…ÀtVh¸®ÿ$ƒÄ‹T$H…Òt-~IƒúD õ+΋DÍ|̓øw.ÿ$…:O‰L$$ë¶|Å‹DŃøwÿ$…$:G‰D$$¡0À…Àt‹L$‹RhÈ®ÿ$‹T$PƒÄ…Òt*~LƒúG‹D$‹ Å+ÈDÍ‹L̓ùw+ÿ$4:‹D$‹ €DÍ‹L̓ùwÿ$D:ƒÀ‰D$(¡0À…ÀtShØ®ÿ$‹T$PƒÄ…Òt$~@ƒú;Ý+Ó‹LÕDÕƒùw%ÿ$T:›‹LÅDŃùwÿ$d:ƒÀ‰D$,‹D$$‹L$ljX‹‹L$,‰‹D$(‰p‰1‹T$D‰rN‰lúÿÿ_^]¸[ƒÄ0ð4°4°4°4°4°4°4°46565656565656565‰5‰5‰5‰5‰5‰5‰5‰5N6E6N6N6N6E6N6N6¬6¬6¬6¬6¬6¬6¬6¬6 7 7 7 7 7 7 7 777777777ú78ú7ú78ú788Ž8Ž8Ž8Ž8Ž8Ž8Ž8Ž8ð8ð8ð8ð8ð8ð8ð8ð8‹D$ ‹L$V‹t$PQVèôÿÿVÿƒÄ^ÃìxSUV‹´$ˆT$XW‹NR‰L$\L$$QT$L$8R‹¬$ QT$4‹¼$ L$(‹RQjÿjÿ3ÀjÿjÿPUWV‰„$˜‰„$œ‰\$d‰D$lèôÕÿÿƒÄ8ƒøÿu _^]3À[ƒÄxÃ…Àu‹ˆW‹PÿŒƒÄ3À_^][ƒÄxÃÇF…Ût~HƒûC‹L$‹A‹që ‹L$‹A‹ð…Àt‹I‰u ‹PPÿRƒÄ…öt…Ût8‹H‰u ‹FVÿPƒÄ3À;Øt!~4ƒû/‹L$Çÿÿÿÿ‹T$‰B‹L$‰Aë‹T$Çÿÿÿÿ‹D$Ç@‹´$ŒL$\T$ Q‹F RHL$8‰F D$PT$0Q‹L$0D$(R‹T$DP‹D$ƒú9 Å+ÈDÍ‹L̓ùw#ÿ$lFƒÀ뀋LÕDÕƒùwÿ$|F‹D$x‹L$8‹t$h‹\$(‹‰‹D$‰9ƒûÿÇ…a¡0À3ö…Àt'‹$WWWhD°ÿÓ¡0ÀƒÄ…Àt Whp°ÿӃċD$…Àt-~Bƒø=ý+ÇtÅ‹DŃøw'ÿ$…ŒF^‰\$@ë ¿‹DÍt̓øwÿ$…œF‹\$@¡0À…ÀtWh€°ÿ$ƒÄ‹T$…Òt$~@ƒú;ý+Ç‹LÅDŃùw%ÿ$¬F ¿DÍ‹L̓ùwÿ$¼FƒÀ‰D$D¡0À…ÀtWh°ÿ$‹T$ƒÄ…Òt$~@ƒú;ý+׋LÕDÕƒùw%ÿ$ÌF¿‹LÅDŃùwÿ$ÜFƒÀ‰D$H‹L$D‹T$Hlj{‰;‰yéó‹ 0À…ÉtSh °ÿ$‹D$ƒÄ…Àt$~@ƒø;Ý+ËLÅDŃùw%ÿ$ìF ›DÍ‹L̓ùwÿ$üFƒÀ‰D$¡0À3ö…Àt1‹D$‹T$(‹$R‹QWh°°ÿÓ¡0ÀƒÄ…Àt WhܰÿӃċD$…Àt-~Bƒø=ý+׋DÕtÕƒøw'ÿ$… G^‰\$Lë¿tÅ‹DŃøwÿ$…G‹\$L¡0À…Àt‹L$‹Rhì°ÿ$ƒÄ‹T$…Òt*~LƒúG‹D$‹ Å+ÈDÍ‹L̓ùw+ÿ$,G‹D$‹ €DÍ‹L̓ùwÿ$Ö>Ö>Ö>Ö>Ö>Ö>Ö>Í?Í?Í?Í?Í?Í?Í?Í?AAAAAAAA™A™A™A™A™A™A™A™AòAòAòAòAòAòAòAòA•B•B•B•B•B•B•B•BCCCCCCCCvCvCvCvCvCvCvCvCôCôCôCôCôCôCôCôCcDcDcDcDcDcDcDcDôDôDôDôDôDôDôDôDGEGEGEGEGEGEGEGE‹D$‹L$jPQèƒÄ Ãƒì ¡0ÀSUVW3ÿ3í;lj|$‰|$‰|$th ±ÿ$ƒÄ‹t$8;÷u 3À‰D$é3‹<D$Ph±VÿÓƒÄ …À…ÿ4L$Qh ±VÿÓƒÄ …À„Ó‹L$‹03Ò‹A;ԅ҉T$t7¡0À…Àth$±ÿ$‹L$ƒÄQÿ,‰D$ƒÄ™+ÂÑø‰D$é¤=(¢t=˜¢t=£u1¡0À…Àth<±ÿ$‹L$ƒÄ‹Q‹é‹Â‰T$™+ÂÑø‰D$ë^‹583Ò;Æ”‹ú…ÿt'¡0À…ÀthX±ÿ$‹L$ƒÄQÿXékÿÿÿ¡Thp±‹Qÿ¨ƒÄ3À_^][ƒÄ ËD$‹T$‹D$ _^[‹‰L$‹L$(‹P‰‹P‹D$,‰3ÀƒÄËD$ ‹T$4‹‰L$$‹@‹L$8‰‰_^3À[ƒÄËD$ƒì$L$VWPQèÜ‹D$ƒÄƒøud‹t$0‹=$¡0À…Àt h@²ÿ׃Ä‹L$(T$4D$R‹T$(P‹D$(jQRPVè’þÿÿƒÄ…ÀuL$ Qè!‹D$ƒÄƒøt¯_^ƒÄ$ÃÈÿ_^ƒÄ$ÃìSUVW‹|$(3ö‰t$‹‹G ‰\$‹K‹+;Á‰L$ ~ ‰w_^]3À[ƒÄÃøÿtƒ~ @ÇGÿÿÿÿ‰G ‰t$(ë3ö950Àt‹G PhT²ÿ$ƒÄ;ît8~lƒýg‹G ‹S Å+ÈʉD$‹ƒøwJÿ$…xN‹D$ƒÀ‰D$ë6‹G €‹CЉD$‹ƒøwÿ$…ˆN‹L$ƒÁ‰L$ë ‹T$ƒÂ‰T$‹D$‹ƒøt ƒø…¶‰t$(‹wFƒþ³‹T$ vÇD$(\òTŠ…ít ~ƒý ‰T$ë‰\$3É…ít~ƒý‹D$‹ë‹L$‹ ‹ÅƒètHt Hu!ƒùÿu&ë ƒùÿu6ëƒùÿu/FƒÂ ƒÃƒþ}<ë¢ÇD$(ÿÿÿÿë¡0À…Àthä²ÿ$ƒÄÇD$(‹\$‰w‹D$(…Àu3ë‹\$‹W ‹L$ BÇGÿÿÿÿ‹Â‰W ;ÁŒ~þÿÿÇG_^]3À[ƒÄÃ…ít,~<ƒý7‹D$‹‰W‹H‰O‹P¸‰W‰G_^][ƒÄËD$‹‰O‹@‰G‰G¸‰G_^][ƒÄÃIMÛLMMMMMMV‹t$ƒÈÿV‰F ‰F‹D$‰è†ýÿÿƒÄ‰F^á0ÀVW‹=$…Àt hx³ÿ׃Äj,ÿ‹ðƒÄ…öu ÿ”_^ËL$‹Áƒèt3Ht'Ht¡¤hˆ³‹Qÿ¨ƒÄ3À_^ÃÇF£ëÇF˜¢ëÇF(¢‹T$ FRQPèGƒÄ …ÀuVÿƒÄ3À_^á0À…Àt h¤³ÿ׃ÄÇF ÿÿÿÿÇ‹Æ_^ËD$V‹t$‰ÇF¡0À…Àthijÿ$ƒÄ‹L$QVèŒâÿÿƒÄ^ËD$‹L$jPQèŸ÷ÿÿƒÄ ËD$‹L$jPQè÷ÿÿƒÄ Ë 0À3À…É”À£0À¡@‹A‰¡@á0À…Àthгÿ$ƒÄ‹D$‹H…Éu‹L$jjQPèƒÄËT$jjRPè ƒÄá0Àƒì …Àthسÿ$ƒÄ‹D$(SUVWh4ÀPÿ<ƒÄ…Àu_^][ƒÄ Ët$4‹FP‰D$<ÿP‹ØƒÄ…Ûu_^][ƒÄ ÃÆL$VQè’ýÿÿ‹D$ƒÄ3ÿƒø…Ë‹-L;|$8Û‹D$<…À‹D$@t…Àu:‹D$$…Àt ‹A‰‹D$$PWSÿÕƒÄ ëu…Àt‹D$(…Àt ‹A‰‹D$(PWSÿÕƒÄ ëVjÿH‹ðƒÄ…ö„®‹D$$…Àt ‹A‰‹D$$PjVÿD‹D$4ƒÄ …Àt ‹A‰‹D$(PjVÿDVWSÿÕƒÄT$GRèeúÿÿ‹D$ƒÄƒø„;ÿÿÿƒøÿuh‹H‰u ‹KSÿQƒÄ_^]3À[ƒÄ ËH‰u ‹CSÿPƒÄ‹ ¤h䳋Rÿ¨ƒÄ3À_^][ƒÄ ËH‰u½‹CSÿPƒÄ3À_^][ƒÄ Ã_^‹Ã][ƒÄ á0À…Àth´ÿ$ƒÄ‹L$3À‹Q…Ò•Àá0Àƒì…Àth´ÿ$ƒÄ‹D$$…ÀtjL$Qh ´Pÿ<ƒÄ …ÀtRT$D$RL$PT$QD$ R‹T$L$(P‹D$4QjÿjÿjÿjÿjjƒÀRPèú½ÿÿƒÄ8ƒøÿt%Pÿ¬ƒÄƒÄË Th$´‹Rÿ¨ƒÄ3ÀƒÄá0Àƒì$…ÀS‹$UVWt hP´ÿӃċD$<…À„_L$Qh\´Pÿ<ƒÄ …À„C‹T$8‹Br…Àu hŒ´é0‹T$jÿRD$VP3íèÜ¡0ÀƒÄ…Àt h¼´ÿӃċD$ƒøuL$EQè‹D$ƒÄƒøtéƒøÿ„î¡0À…Àt hÜ´ÿÓƒÄUÿP‹øƒÄ…ÿ„Ç‹T$jÿRD$VPèd¡0ÀƒÄ3ö…Àt hü´ÿӃċD$ƒøu7‹L;õ}M‹D$,…Àt ‹A‰‹D$,PVWÿÓL$ FQèx‹D$(ƒÄƒøtσøÿu8‹H‰uQ‹WWÿRƒÄ3À_^][ƒÄ$ËH‰u ‹WWÿRƒÄ¡¤hµë‹Ç_^][ƒÄ$Ãh`´¡T‹Qÿ¨ƒÄ_^]3À[ƒÄ$á0Àƒì…ÀthDµÿ$ƒÄSV‹t$\$ WSF\$NP‹V S~Q‹ R‹WP‹Q‹NR‹PjjQRè¹»ÿÿƒÄ8ƒø‰Fu*‹‹ …Ét~ ƒù‹T$ _‹J‰N^[ƒÄËT$ ‹J‰N_^[ƒÄá0À…ÀthPµÿ$ƒÄ‹D$‹L$ ‹T$V‹t$V‰ƒÈÿ‰F‰F ‰F‰N‰VèÿÿÿƒÄ‰F^á0À…Àth\µÿ$ƒÄ‹D$‹L$jPQèƒÄ á0Àƒì(…Àthdµÿ$ƒÄ‹D$0SUVWh8ÀPÿ<ƒÄ…À„‹L$<‹Aq…Àu!‹Thlµ‹Pÿ¨ƒÄ3À_^][ƒÄ(ËF j™ƒâÂÁøPè4øÿÿ‹ØƒÄ…Û„°L$VQ{èé÷ÿÿ‹D$$ƒÄ3öƒøuV‹l$DƒþÿtzT$D$RP…íjt ‹L$‹KSÿQƒÄ3À_^][ƒÄÃjT$jRèõÿÿƒÄ 3Û¡0À…Àthˆ¶ÿ$ƒÄjUUD$WPè‹ð¡0ÀƒÄ…ÀtVh ¶ÿ$ƒÄ…ö}»ë`…Ûu\…ö~X¡0À…ÀthĶÿ$ƒÄL$QUè°ðÿÿ‹ø¡0ÀƒÄ…Àthܶÿ$ƒÄ…ÿt»T$RèAÓÿÿ‹|$4ƒÄ¡0À…ÀtSVhø¶ÿ$ƒÄ …ö~…Û„-ÿÿÿ¡0À…Àth$·ÿ$ƒÄ‹D$$‹L$(PjQèýÞÿÿ‹D$@ƒÄ …Ût‹I‰u ‹PPÿRƒÄ3À_^][ƒÄÃìd‹D$lSUV‹WT$4PR‰L$èÔòÿÿ¡0À‹=$ƒÄ…Àt h<·ÿ׃Ä‹D$8ÇD$|ƒø…&‹œ$ˆ‹´$„‹l$x‹D$…Àu ‹D$P‹L$HPë‹L$Ljÿ‹”$„QD$\RPèeùÿÿ¡0ÀƒÄ…Àt h\·ÿ׃Ä‹D$Xƒø…¦…ötX¡0À…Àt h|·ÿ׃ÄL$T$QD$ RL$(PT$0Q‹L$`D$8R‹”$€P‹D$`QjÿjÿjÿjRPVè]´ÿÿƒÄ8…Àu0…Ûu(‹D$PL$,T$0Q‹L$pR‹T$PjPQRUèÁíÿÿƒÄ…ÀuBÿD$|D$TPè øÿÿ‹D$\ƒÄƒø„ZÿÿÿƒøÿtL$4Qè-ïÿÿ‹D$<ƒÄƒø„ìþÿÿƒøÿu _^]ƒÈÿ[ƒÄdËD$|_^][ƒÄdá0Àƒì…Àthœ·ÿ$ƒÄ‹D$…À„‚L$T$QRh¬·Pÿ<ƒÄ…Àte‹L$ ‹QA…Òu¡ThØ·‹Qÿ¨ƒÄ3ÀƒÄÃy ÿt èX3ÀƒÄËT$‹L$RQPèâÜÿÿƒÄ …Àt*¡@‹A‰¡@ƒÄËTh´·‹Pÿ¨ƒÄ3ÀƒÄáThø·‹Qÿ¨ƒÄá0À…Àth$¸ÿ$ƒÄ‹D$‹L$jPQèƒÄ á0Àƒì …Àth0¸ÿ$ƒÄ‹D$$UVW‹|$4‹Hp…ÿu¡Th<¸‹Qÿ¨ƒÄ3À_^]ƒÄ Ëh 3Òƒýÿ‹l$8•Â3À…í•À…ÐtèCÿÿÿ_^3À]ƒÄ Ã…Éu'L$ Qhd¸Wÿ<ƒÄ …Àu_^]ƒÄ Ã3À‰D$ë(T$D$ RPhh¸Wÿ<ƒÄ…Àu_^]ƒÄ ËD$…íuDL$T$QRL$$T$(QRL$4T$8QRjÿjÿjÿjÿUP‹D$u¡Th´¼‹Qÿ¨ƒÄƒÈÿ_^ƒÄ ÃT$PRèßÿÿ‹D$ƒÄƒøuã…ÿuG‹T$$D$,L$0P‹D$ QjRWPVèTÛÿÿƒÄ…Àu¸L$,T$0QRjjÿP‹D$4PVè3ÛÿÿƒÄ…Àu—ë)‹D$L$,T$0Q‹L$$RjjÿPQVè ÛÿÿƒÄ…À…lÿÿÿT$Rè—Üÿÿ‹D$ƒÄƒø„rÿÿÿ_^ƒÄ ÃQ¡0À…Àth̼ÿ$ƒÄ¡@SUVW‹|$;ø„ß‹l$;è„Ó‹G‹M;Á‹ð‹ñ‹UE_L$‰D$j‹QRP較ăøt‹ ThÔ¼‹Rÿ¨ƒÄ3À_^][YËD$P‹Æ™+ÂÑøPèsÞÿÿ‹ðƒÄ…öu_^][YËG…ÀtÇF‹E…ÀtÇF~SWèÛÿÿƒÄ…Àu‹L$QWè ÛÿÿƒÄ…Àt‹H‰u ‹VVÿRƒÄ_^]3À[YËÆ_^][YË_A^‰¡@][YËD$‹T$ ‹L$V‹t$;ȉ~…ö;È}…ö}‰ ¸^ÃQ¡0À…Àthø¼ÿ$ƒÄ¡@V‹t$ W;ðu‹D$PPè~þÿÿƒÄ_^YË|$;øuVVèhþÿÿƒÄ_^YËG‹N;Át$…Àt…Éuh½¡T‹Qÿ¨ƒÄ3À_^YÃT$jÿRPQèEÿÿÿƒÄƒøth0½ëÌ‹V‹G;Ð}‹D$PjWVèƒÄ_^YËL$QjVWè ƒÄ_^Yá0À…ÀthT½ÿ$ƒÄ‹D$‹L$ SU‹l$ VWxj]QWSjè—ñÿÿƒÄ…À}_^]3À[ËT$ R™+ÂÑøPè™Üÿÿ‹ðƒÄ…öu_^][ËE…ÀtÇF‹D$‹H…ÉtÇF‹L$jQWVSRè9ñÿÿƒÄ…À}‹H‰u ‹FVÿPƒÄ_^]3À[ËÆ_^][á0À…Àth`½ÿ$ƒÄ‹T$¡@;ÐVuhp½ë/‹t$ ;ðu‹RRjèñÛÿÿƒÄ^ËN‹B;Èt"…Ét…Àuh½¡T‹Qÿ¨ƒÄ3À^ÃPjVRèÆþÿÿƒÄ^ÃQ¡0À…Àth¸½ÿ$ƒÄ¡@SU‹l$V;èW„¡‹\$;Ø„•‹K‹UƒÃ}D$jPQRèmýÿÿƒÄƒøthܽëq‹T$RjèBÛÿÿ‹ðƒÄ…ötm‹E…ÀtÇF‹D$‹H…ÉtÇFjjSNWQèêçÿÿƒÄ…À}‹H‰u0‹VVÿRƒÄ3À_^][YËÆ_^][YÃhĽ¡T‹Qÿ¨ƒÄ_^]3À[YËD$V‹0¡0À…Àth¾ÿ$ƒÄ;5@t‹F=(¢t=˜¢t=£t¸^Ë‹L$B‰‹^‹A‰3Àá0Àƒì…Àth ¾ÿ$ƒÄ‹T$D$L$P‹D$QjjÿjƒÀRPèôÕÿÿƒÄƒøÿu3ÀƒÄÃjÿ¬ƒÄƒÄá0Àƒì…Àth ¾ÿ$ƒÄ‹D$ ƒx ÿt èéÿÿƒÈÿƒÄËL$…Éu‹L$jƒÀQPè˜ÅÿÿƒÄ ÷ØÀ÷ØHƒÄÃT$L$R‹T$QjjÿjƒÀRPè\ÕÿÿƒÄƒÄá0Àƒì…Àth8¾ÿ$ƒÄ‹T$D$L$P‹D$QjjÿjƒÀRPèÕÿÿƒÄƒøÿu3ÀƒÄËD$…Àt ‹A‰‹D$ƒÄá0Àƒì…ÀthL¾ÿ$ƒÄ‹D$ ƒx ÿt è7èÿÿƒÈÿƒÄËL$…ÉuQ‹L$ƒÀQPè¹ÄÿÿƒÄ ÷ØÀ÷ØHƒÄÃT$ƒÀRT$RjjÿQ‹L$$QPè~ÔÿÿƒÄƒÄá0À…Àthd¾ÿ$‹D$ƒÄ‹@ËL$‹AÃé ¡dW£,¢3À¹ ¿l¢£0¢Ç4¢¤¤Ç8¢,£<¢Ç@¢wÇD¢0xÇH¢|£L¢ÇP¢°yÇT¢ÐxÇX¢€¡£\¢Ç`¢¢Çd¢@w£h¢ó«_ÃV‹t$‹F$‹N‹V(PQRèZÃÿÿVÿƒÄ^Ãì U‹l$(VW‹Eu‰D$0¡0À…Àtht¾ÿ$ƒÄ‹E ƒøÿ…¬‹EV@ €Áá+È‹D$(…Àt6‹P‹ ˜;Ñu‹p ƒþÿuPÿœ‹ðƒÄƒþÿuÿ4vÁà+ÆØL$3ûQèGÔÿÿ‹D$ƒÄƒøtž[ƒøÿu _^ À]ƒÄ Ãÿÿu¿Í‰} ‹Ç_^]ƒÄ ÃSV‹t$ W‹FƒètHtHth˜¾ëhŒ¾ë h„¾ëh|¾‹|$‹WÿÓƒÄjVèÙ×ÿÿ‹ðƒÄ…öuh°¾WÿӃăÈÿ_^[ÃjWVÿhƒÄ …Àt_^ƒÈÿ[ËH‰u ‹FVÿPƒÄhоWÿÓƒÄ3À_^[ÃìV‹´$ W‹Fƒèt?Ht0Ht!¡¤hð¾‹Qÿ¨ƒÄ3À_^ÄÃT$ hä¾RëD$ hܾPë L$ hÔ¾Qÿ‹=tƒÄT$ Rÿ×jV‰D$èþÖÿÿ‹ðƒÄ …öu _^ÄÃSVÿp‹Ø‹ƒÄH‰u ‹FVÿPƒÄ‹5lL$ SQÿÖh¿ÿ×T$PRÿÖ‹D$ ƒÄ[_^Äá0Àƒì…Àth¿ÿ$ƒÄSU‹l$$VW‹|$0;ïu _^]3À[ƒÄËE‹OuƒÇ;È~ _^]ƒÈÿ[ƒÄÃ} _^]¸[ƒÄÃD$L$ PT$ QRWV輋ØD$(L$8PT$0QRVW褃Ä(ƒûÿt{ƒøÿtv…Ûu …Àu_^][ƒÄËD$‹L$‹5 +Áu‹D$‹L$PQÿփąÀ} _^]ƒÈÿ[ƒÄÃ~ _^]¸[ƒÄËL$ …É„6ÿÿÿ‹D$$…À„*ÿÿÿPQÿÖƒÄ_^][ƒÄË|$03À;ï_ÀH^$þ]@[ƒÄËD$ƒì4L$SUVWPQèªÓÿÿ‹D$0ƒÄ3íƒø…Þ‹|$X‹\$T‹t$PT$HD$RL$PT$ QD$(R‹T$PL$0P‹D$PQ‹L$PR‹T$hjÿjÿjÿjPQR轕ÿÿƒÄ8ƒøÿ„Œ…Àui…íu‹D$8‹L$<‹T$@‰‰ ‰½ëL‹‹L$@+Áu‹‹L$8PQÿ ƒÄ…Àu+‹D$<…Àt#‹RPÿ ƒÄ…À~‹D$8‹L$<‹T$@‰‰ ‰D$$PèjÐÿÿ‹D$,ƒÄƒø„.ÿÿÿƒøÿu _^]ƒÈÿ[ƒÄ4Ã_‹Å^][ƒÄ4ËD$‹L$PQh  ÿxƒÄ Ãé ¡dW£œ¢3À¹ ¿Ü¢£ ¢Ç¤¢¬¤Ç¨¢,£¬¢Ç°¢wÇ´¢0xǸ¢|£¼¢ÇÀ¢°yÇÄ¢ÐxÇÈ¢€¡£Ì¢ÇТ¢ÇÔ¢@w£Ø¢ó«_Ãé ¡dW£ £3À¹ ¿L£££Ç£´¤Ç£,££Ç £wÇ$£0xÇ(£|£,£Ç0£°yÇ4£ÐxÇ8£€¡£<£Ç@£¢ÇD£@w£H£ó«_ËD$ƒì…ÀUVWu h¿é¹T$L$ RT$QRL$$T$0QRh0¿Pÿ<ƒÄ…Àu h8¿郋L$$‹A=˜¢t'=£t ‹Thp¿‹Pÿ¨ƒÄ3À_^]ƒÄËT$‹B=˜¢t=£th ¿ë2‹T$‹B=˜¢t=£thпë‹D$‹58‹P;ÖthÀ¡T‹Qÿ¨ƒÄ3À_^]ƒÄÃPQè~èÿÿ‹øƒÄ…ÿuZÿ`‹ˆ;t _^3À]ƒÄÃÿ4‹D$$‹L$ ‹T$PQRè÷ÿÿƒÄ ƒøÿu _^3À]ƒÄá@_^]‹A‰¡@ƒÄË-HjÿÕ‹ðƒÄ…öu_^]ƒÄËD$ S‹DPjVÿÓ‹D$‹A‰‹L$4QjVÿÓ‹D$@ƒÄ‹A‰¡@;øujÿÕ‹@‹øRjWÿӃċD$VWPègöÿÿƒÄ ƒøÿ‹[uH‰u ‹NVÿQƒÄ_^3À]ƒÄÃH‰u ‹VVÿRƒÄ‹Ç_^]ƒÄÃhñjjhx£h$Àÿ|ƒÄËD$…Àu9TÀ~.ÿ TÀ‹ ƒø‹ ‰ XÀu?h€ÿ…ÀY£`Àu3Àëfƒ ¡`Àh h £\ÀèêÿTÀYYë=…Àu9¡`À…Àt0‹ \ÀVqü;ðr‹…ÉtÿÑ¡`ÀƒîëêPÿƒ%`ÀY^jX U‹ìS‹]V‹u W‹}…öu ƒ=TÀë&ƒþtƒþu"¡dÀ…Àt WVSÿÐ…Àt WVSèÿÿÿ…Àu3ÀëNWVSèSƒþ‰E u …Àu7WPSèñþÿÿ…ötƒþu&WVSèàþÿÿ…Àu!E ƒ} t¡dÀ…ÀtWVSÿЉE ‹E _^[] ÿ% ƒ|$uƒ=dÀu ÿt$ÿjX z”j”^”V”@”6”,”"””´’Â’Ð’¤’Þ’ì’þ’““0“‚’”’d“x“Š“˜“ª“Â“Ò“è“ø“p’^’L’:’,’’ ’ú‘æ‘Ò‘À‘>“P“4‘ ”,‘J”‘–”z”j”^”V”@”6”,”"””´’Â’Ð’¤’Þ’ì’þ’““0“‚’”’d“x“Š“˜“ª“Â“Ò“è“ø“p’^’L’:’,’’ ’ú‘æ‘Ò‘À‘>“P“OPyErr_SetStringzPyExc_SystemError1PyObject_Compare9PyObject_Hash~PyString_TypeBPyErr_NoMemoryäPyMem_FreeNPyErr_SetObjectnPyExc_KeyErrorŸPyTuple_GetItem¾PyList_GetItem}PyExc_TypeError£PyTuple_Size¤PyTuple_TypeÅPyList_SizeÇPyList_Type<PyErr_ClearPyArg_Parse0_Py_NoneStruct¢PyTuple_SetItem¡PyTuple_NewÃPyList_SetItemÁPyList_New´PyInt_FromLongmPyExc_IndexError€PyExc_ValueErrorDPyErr_Occurred¥PyType_Type@PyObject_PrinttPyString_ConcatAndDelBPyObject_RepryPyString_FromStringãPy_FindMethodòPy_InitModule4python20.dllžprintf@calloc‘mallocXfprintf²sprintfMSVCRT.dll^free_initterm_adjust_fdiv]DisableThreadLibraryCallsKERNEL32.dllŽÆž&â”ؔܔà”ð”kjbuckets.pydinitkjbuckets`v |Ð|è£0^ð£À_ô£@]¤ R¤ W¤ d¤PS(¤ðf0¤Àm8¤pl@¤aL¤0XT¤PP\¤ðUd¤pWl¤ð_t¤e|¤0jˆ¤€Y”¤ðdœ¤@eÀo r sàmàmàmn n n npR@nàmàmqàmÀot n n n n n0vptÐt0vPu°u¼¤pGĤP̤àOÔ¤Phऀ}ì¤ Pmemberadddelete_archas_keychoose_keyCleanneighborsdumpdgetdtestreachablesubsetitemskeysvaluesidentremaprestricttclosureSoilWashkjSetkjDictkjGraphkjSetkjDictkjGraphkjUndumpkjKeyPutdebug BAD INDEX IN GETBUCKET %ld unable to resize tablebug in kjbuckets implementation (tableMatch)tm: hash = %ld computed GArrayRef %ld root is overflow %ld tm: resizing for root UnFreeTableIndex %ldnGArrayRef %ld UnFreeTableIndex State=%ld not FREE ExtractFreeGroup %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld Gprevious %ld GArrayRef %ld Gprevious at %ld %ld %ld Gswapout %ld --> %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld unfreeing rootgroup %ld UnFreeTableIndex %ldnGArrayRef %ld UnFreeTableIndex State=%ld not FREE ExtractFreeGroup %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld bad rootgroup state in tablematchGArrayRef %ld tm: looking %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph null match graph match error graph forced match setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph unforced match tm: found = %ld groupmatch abnormal returntm: success, rg=%ld, ng=%ld, nb=%ld, ho=%ld, in=%ldtm: trying to force insert to overflow tm: resizing for overflow UnFreeTableIndex %ldnGArrayRef %ld UnFreeTableIndex State=%ld not FREE ExtractFreeGroup %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph null match graph match error graph forced match setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph unforced match groupmatch abnormal returntclear groupsReinit %ld greinit at %ld GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld greinit done tresize: resizing %ld GArrayRef %ld failing in tableresize AllocateBuckets %ld invalid internal table behavior flagGArrayRef %ld FreeTableIndex %ld GArrayRef %ld Gremove %ld Gprevious %ld GArrayRef %ld Gprevious at %ld %ld %ld GArrayRef %ld GArrayRef %ld SetFreeGroup(self=%ld, next=%ld, prev=%ld) GArrayRef %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld SetFreeGroup(self=%ld, next=%ld, prev=%ld) GArrayRef %ld GArrayRef %ld GArrayRef %ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld ?? cannot backfill on deleteGArrayRef %ld BAD INDEX IN GETBUCKET %ld Gprevious %ld GArrayRef %ld Gprevious at %ld %ld %ld FreeTableIndex %ld GArrayRef %ld Gremove %ld Gprevious %ld GArrayRef %ld Gprevious at %ld %ld %ld GArrayRef %ld GArrayRef %ld SetFreeGroup(self=%ld, next=%ld, prev=%ld) GArrayRef %ld GArrayRef %ld GArrayRef %ld GArrayRef %ld SetFreeGroup(self=%ld, next=%ld, prev=%ld) GArrayRef %ld GArrayRef %ld GArrayRef %ld makeWrapper iOmakeWrapper from list makeWrapper from kj-table makeWrapper from tuple initializer must be integer or list or tuple or kj-TablemakeWrapper unpacking Python sequence (OO)makeWrapper augmenting from kj-table TableGet1 called with NULL??tg1: calling tablematch taug: TableGet1 GArrayRef %ld BAD INDEX IN GETBUCKET %ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph null match graph match error graph forced match setting mem to 0, hash =%ld setting mem to 0, hash =%ld graph unforced match WnewWrapper invalid internal table flagWnewWrapper: table initialized initTable WItems WItems1 loop overflowing in WrapperItemsWnonzero Whas_key Otable method has_key requires an argumentGneighbors Otable method neighbors requires an argumentneighbors not defined for table of this typeGneighbors: counting neighbors Gneighbors: making resultlist Gneighbors: storing results loop overflow in neighbors calculationNextbykey Initbykey Wkeys Wparts keys/values not defined for setsWvalues Wchoose_key table is emptyWSubset Subset test requires argumentOSubset defined only between kj-tablesWtransclose transitive closure not defined for this table typetc: allocating closure tc: augmenting closure tc: calling tcompose tc: delta computed, count = %ld tc: augmenting target tc: done augmenting target tc: loop body done, count=%ld, abort=%ld tc: deallocating Delta Tcompose: lwalker initialized Tcompose: rwalker initialized Tcompose: computing exclude Wdelete_arc (OO)delete_arc requires two argumentsdelete_arc not defined on setstable has been hashed, it is now immutableWmember Wmember1 membership test requires argument(s)O(OO)Waddmember Gidentity graph identity not defined for table of this typeGreachable reachable not defined for this table typeOreachable requires key argumentWClean WSoil WWash Dremap remap only defined for kjDictsremap requires equality table argumentOremap defined only between kj-tableskjDictDump dump only defined for kjDictsdictionary dump requires tuple argumentOdictionary dump requires nonempty tuple argdictionary dump arg must be tuplekjUndump kjUndump called with no args(OO)kjUndump: tuple must be non-emptykjUndump -- tuple lengths don't matchkjUndump -- nonunary tuple with non-tuplekjUndump requires 2 args, first must be tupleOrestriction function requires one kjTable argumentrestrict function requires kjTable argumentdget requires 2 arguments(OO)dget requires dict, dumperfirst arg of dget must be kjDict or kjGraphsecond arg of dget must be tupleop not valid for table of this typeop not valid for table of this typeop not valid for table of this typeWtranspose Cannot transpose setCannot transpose setWunion incompatible types for table unionWintersect mixed intersection not allowed with kjSetunable to coerce for intersectionWintdiff Wdifference cannot difference from Py_Nonemixed difference not allowed with kjSetWcompose cannot compose Py_Noneincompatible types for compositionWcoerce kjSet_subscript kjSet_ass_subscript kjDict_subscript kjDict_ass_subscript Wrapper_length Whash kjSet(kjDict(kjGraph(??unknown table type?? ??couldn't allocate items?? )kjSet(kjDict(kjGraph(Bad flag in table)Wcompare KeyPut requires 5 arguments(OOOOO)KeyPut requires dict, dumper, index, psuedokey, nullbagfirst arg of KeyPut must be kjDict or kjGraphthird arg of KeyPut must be kjDict or kjGraphfifth arg of KeyPut must be kjDict or kjGraphsecond arg of KeyPut must be tuplekjbuckets"0,020ð0õ0þ01¢1º1Ë1Ñ12*20262c2„2¸2¾2Ä2ã2é23393>3K3Q3~3¢3³3½3Ã3î3ú3444E4\4z4„4Š4¹4Ð4Ü4æ4ì4525z5‚5ˆ5Ý5666'6-6]6u6}6’6˜6±6Ç6Í6Ò6ß6å67E7X7f7l7•7ª7¶7Ä7Ê7÷788•8›8 999B9H9M9Z9`99³9Ä9Î9Ô9 :::#:0:a:x:–: :¦:Õ:ì:ø:;;7;N;¥;­;³;ñ;<<9<|<Ç<Ó<Ù<à<í<=D=F>h>²>è>?X?‘?š? ? (0 0&0È0Ñ0×0ì0ù091Ü1í1ó1G2P2V2\2{22É2Î2×2ê2ñ2ÿ2333$3M3ˆ3Ž3Ï3Õ3Û3è3î3ô34;4S4Y4_4•4›4£4°4¸4å4ÿ455!5N5h5y55¯5Ì5666l6x6~6µ6Ô67$7*7Û7ý7=8h8†8Æ89!9'9[9d9j9:::':4:::†:#;4;:;‰;Ž;—;ä;è;ì;ð;ô;ø;ü;<<< <<<<< <$<(<,<0<4<8<<<@>> >>>>> >$>(>,>0>4>8><>@>D>H>L>P>T>X>\>`>q>z>€>´>É>Ï>????&?,?Q?q?¬?¶?¼?0´“00£0°0´0¸0¼0À0Ä0È0Ì0ñ011H1R1X1|1œ12 2&282<2@2D2H2L2P2T2{2–2œ2á2æ2ï233/393?3 3¼3Â3Ç3Ô3Ú34$4*424>4D4R4`4f4•4¬4´4Â4È4Ü4æ4ì4525>5L5R5…5¡5Ï5Ý5ã5è5õ5û5*6A6V6`6f6‘6¨6´6¾6Ä6ï6737=7C7r7‰7•7©7¯7´7Á7Ç7ö78"82888m8Š8–8 8¦8Õ8ì84989<9@9D9H9L9P9T9X9\9`9d9h9l9p9t9x9|9€9„9ˆ9Œ99”9˜9œ9 9¤9¨9¬9°9´9¸9¼9À9Ä9È9Ì9Ð9Ô9Ø9Ü9à9ä9è9ì9ð9ô9ø9ü9::: ::::: :$:(:,:0:4:8:<:@:D:H:L:P:T:X:\:`:d:h:l:p:˜:4;>;|>†>Œ>»>Ò>???R?`?f?p?z?€?±?É?Ñ?ß?å?@ô0 0&0+080>0u0‡00•0¡0§0µ0Ã0É0ø011%1+1?1I1O1~1•1¡1¯1µ1î1 282D2L2S2`2‘2±2º2Ä2Ê2ù233&3,3[3r3š3¤3ª3Ù3ð3ü344!4.4_44ˆ4˜4ž4Ó4ð4ü4 55C5g5ì5ð5ô5ø5ü5666 66666 6$6(6,6064686<6@6D6H6L6P6T6X6\6`6d6h6l6p6t6x6|6€6„6ˆ6Œ66”6˜6œ6 6¤6¨6¬6°6´6¸6¼6À6Ä6È6Ì6Ð6Ô6Ø6Ü6à6ä6è6ì6ð6ô6ø6ü6777 77777 7$7(7,7074787<7@7D7H7L7P7T7X7\7`7d7h7”7±7·7Ó7Ý7ñ7û78*83898G8a8h8o8v88…8¨8º8Ã8É8×8á8æ8ï899B9H9„99“9Á9É9: :µ:º:Ã:Õ:Þ:ä:<;F;Ö;Û;ä;”<Ÿ<¥<×<=Ã=Ì=Ò=x>|>€>„>ˆ>Œ>>”>Ñ>Ù>â>ï>þ>??%?4?=?F?c?p?y?³?¼?Â?P`"0.030=0Q0Z0`0¡0­0³0Ã0Ê0ë0"1~1¦1Ä12#2,2q2z2€2¡2­2³2È2Ï2!3.333<3Q3]3g3‚3‰3§3Å3Ñ34 44=4K4_4Æ4Ë4Ü4á4ê45 55¡5ª5°5ñ5ú56!6-636C6J6i6n6w6q7z7€7¡7­7³7¿7Æ7 88818=8C8T8Y8b8v8}8•8œ8£8«8°8¹8Y9i999“9£9ª9Î9Ó9Ü9î9÷9ý9/:;:A:‰:’:˜:±:¾:Ä:ß:è:î:;;;7;B;H;\;e;k;Í;Ó;ß;<>>1>:>@>a>m>s>>•>ž>à>ç> ??c??ª?Á?Ê?Ð?ñ?ý?`400080=0F01 110151>1`1g1‘3–3Ÿ3¡4ª4°4¼4Ã4Û4å4ñ4ú45 55+555A5J5P5\5c5{5…5’5›5¡5±5¸5È5Í5Ö5é5ð56666!6*6Ô6à6ò6û6777"757:7C7X7_7t7†7•7©7À7Õ7ø7&8+848Q8]8c8w8‹8’8§8¿8Õ8Ú8 9T9o9t9}9–9ì9ñ9ú9 :::I:P:g:n:u:|:F>>!>&>/>A>J>P>i>n>w>ö>û>?Â?Ë?Ñ?Ù?p¨.030<0Ô011!1)1r1w1€1¤1á1ê1ð1¡2ª2°2¼2Æ2÷2ü23"3+31393|3ï3ô3ý34!4'404:4A4H4q4}4ƒ4¾4Ñ4Ý4ã4Q5]5c5±5½5Ã516:6@6q6w6ƒ6ˆ6Ž6’6˜6¡6§6«6±6µ6»6¿6Ä6Ê6Î6Ô6Ø6Þ6â6ç6í6ñ6÷6û67)7U7^7d7¼7Ï7ß7F8M8T8[8e88˜8¼8î8ó8ü899+92989h9„9‘9±9½9Ã9c:”;¯; <<1<7=D=H=N=R=W=]=a=g=k=p==²=¹=Å=Ö=Ý=å=ê=ó= >>>'>.>5>A>M>R>[>}>ƒ>–>À>Í>×>ö> ?.?š?Ÿ?¥?º?Â?È?Ó?à?è?ö?û?€$0000'0<0H0N0p0‚0Þ0ú011 ´00 0 0$00040@0D0P0T0`0d0p0t0€0„00”0 0¤0°0´0À0Ä0Ð0Ô0à0ä0ð0ô01111 1$10141@1D1P1T1`1d1€1„1ˆ1Œ11”1˜1œ1 1¤1¨1¬1°1´1¸1¼1À1Ä1È1Ì1Ð1Ô1Ø12 2222 2x3|3ˆ3Œ3˜3œ3¨3¬3¸3¼3È3Ì3gadfly-1.0.0/kjbuckets/2.0/semantics.py-patch0100644000157700012320000000574607465430476017742 0ustar rjonestech*** sqlsem.py-orig Tue Oct 6 22:35:53 1998 --- sqlsem.py Thu Jun 14 16:31:03 2001 *************** *** 17,28 **** ### # use kjbuckets builtin if available try: import kjbuckets except ImportError: import kjbuckets0 kjbuckets = kjbuckets0 ! Tuple = kjbuckets.kjDict Graph = kjbuckets.kjGraph Set = kjbuckets.kjSet --- 17,31 ---- ### # use kjbuckets builtin if available + pyd=0 try: import kjbuckets + pyd=1 except ImportError: import kjbuckets0 kjbuckets = kjbuckets0 ! ! Tuple = kjbuckets.kjDict Graph = kjbuckets.kjGraph Set = kjbuckets.kjSet *************** *** 1279,1284 **** --- 1282,1288 ---- def map(self, assnlist): """remap btlist by self. return (tuplelist, attorder)""" # DON'T eliminate nulls + #kjbuckets.debug() from types import IntType tt = type values = [] *************** *** 1296,1302 **** valtups[i] = 0 # null/false else: tup = valtups[i] ! valtups[i] = kjUndump(undumper, tup) return (valtups, self.attorder) def relbind(self, dict, db): --- 1300,1311 ---- valtups[i] = 0 # null/false else: tup = valtups[i] ! if pyd: ! valtups[i] = kjUndump((undumper, tup)) ! else: ! valtups[i] = kjUndump(undumper, tup) ! ! #kjbuckets.debug(0) return (valtups, self.attorder) def relbind(self, dict, db): *************** *** 2505,2519 **** undumper = map(None, [0]*ndynamic, range(ndynamic)) undumper = tuple(undumper) result = list(dynamic) ! kjUndump = kjbuckets.kjUndump for i in xrange(len(dynamic)): dyn = dynamic[i] ldyn = len(dyn) ! #print undumper, dyn if ldyn==1: ! dynresult = kjUndump(undumper, dyn[0]) else: ! dynresult = kjUndump(undumper, dyn) result[i] = dynresult return result --- 2514,2541 ---- undumper = map(None, [0]*ndynamic, range(ndynamic)) undumper = tuple(undumper) result = list(dynamic) ! ! #Ad Modified ! #kjUndump = kjbuckets.kjUndump ! kjUndump = kjbuckets.kjUndump ! for i in xrange(len(dynamic)): dyn = dynamic[i] ldyn = len(dyn) ! ! if ldyn==1: ! if pyd: ! dynresult = kjUndump((undumper, dyn[0])) ! else: ! dynresult = kjUndump(undumper, dyn[0]) else: ! if pyd: ! #Ad convert before calling C module if not error non-unary tuple ! dyntuple=tuple(dyn) ! dynresult = kjUndump(undumper, dyntuple) ! else: ! dynresult = kjUndump(undumper, dyn) result[i] = dynresult return result gadfly-1.0.0/kjbuckets/2.1/0040755000157700012320000000000007512763043014265 5ustar rjonestechgadfly-1.0.0/kjbuckets/2.1/kjbuckets.pyd0100644000157700012320000012000007465607564016772 0ustar rjonestechMZÿÿ¸@غ´ Í!¸LÍ!This program cannot be run in DOS mode. $/º(ÉkÛFškÛFškÛFš ÄUšiÛFš6ùLšnÛFš6ùBšiÛFškÛGšGÛFš4ùLšjÛFš”ûBšoÛFšRichkÛFšPELüÄ;à! `0‹kp  tN´pPp´.textN\` `.rdataîpp@@.dataD€€@À.relocê@Bƒì ‹D$$SUVW3Ûp3ÿ3í‰\$‰t$‹L$8…ÉtŽ‹ƒù‚‹D$‰D$4ë|‹~‹‰t$4‹ïƒûÿt;L$T$QD$$RL$,PT$4QD$<‹L$LRPSjÿjÿjÿjUWQèpƒÄ8ƒøuG‹D$‹L$@ƒÁ ƒÆƒø‰D$‰L$Œtÿÿÿ_^]¸[ƒÄ ËD$4…É~‹ƒù†‹‹x‹héyÿÿÿ‹˜phÌ„‹Pÿ¨pƒÄ3À_^][ƒÄ ËD$ƒì`3ÉSU‹l$lVW‹¼$ˆ‹]‹U3ö;Á‰\$‰T$‰t$X‰L$h‰L$$‰L$‰L$lu9Œ$„Œý ƒ¼$€„ï ƒ¼$ÿu>;Át:‹P;Œpu‹@ ƒøÿ‰„$u ‹D$xPÿœpƒÄƒøÿ‰„$„*3É‹”$¤‹„$‰‹„$„;Áe‹M‹„$3ÒÇ„$Œÿÿÿÿ÷ñ…Û‹ú‰¼$ˆ‰¼$„t)~SƒûN‹D$ý+×4Љt$X‹ƒøw3ÿ$…%‹D$¿4Љt$X‹ƒøwÿ$…%V‰T$ëF‰D$‹ƒø„Ø‹”$€…Ò„¨ƒø…:‹}ƒÎÿ;þ‰¼$Œu; QUèʃąÀ„Ú ‹D$x‹þ‰´$Œ‰¼$ˆ‰´$„3Ééeþÿÿ‹\$t3Ò‹+‹K…ít"~>ƒý9ý+Ç‹4ÁÁƒþw%ÿ$µ %¿‹4ÁÁƒþwÿ$µ0%ƒÀ‰D$D‹Ð‹;׉T$0uÇCÿÿÿÿ3ÿéì3Û…ít+~Eƒý@ý+׋Ñуøw,ÿ$…@%S‰T$8ë¿Á‹Áƒøwÿ$…P%C‰D$8‹D$8…í‹‹p„º~'ƒý"Õ+‹<ÁÁƒÿwÿ$½`%ƒÀ‰D$<…í~:ƒý5õ+ÆÁ‹ƒùw"ÿ$p%¶Á‹ƒùwÿ$€%ƒÀ‰D$@‹L$<‹D$@3ÿ‰q‰‹„$Œ‰;;Àu ‹T$t‹L$0‰J‹D$D‹t$`ƒÀº‹È;ït7Žïƒýæ‹ñéß’‹<ÁÁƒÿw„ÿ$½%ƒÀ‰D$<éqÿÿÿ‹ðÇÿÿÿÿ‰xƒÁ ƒÀJu±‹¼$ˆ‹\$3Ò‰t$`‰T$ ‰T$0‰T$(3ö‹Ç‹L$‹è…Ét"~8ƒù3 Å+ÈË‹ ˃ùwÿ$ %€‹ ÃÃùw ÿ$°%P‹;Çu¶‹D$…ÀtYŽ’ƒø‰ ý+ÏË‹ ˃ùwuÿ$À%ƒÀ‰D$‰D$ ëa;ïŽEÿÿÿƒý<ÿÿÿÇÿÿÿÿ‰~‰~é+ÿÿÿ¿‹ ÓÓƒùwÿ$Ð%ƒÀ‰D$‰D$ L­Ë‹ ˃ùwnÿ$à%ƒÀ‰D$0ë^‹D$…À~'ƒø"í+Å‹ ÃÃùwÿ$ð%ƒÀ‰D$0‹D$…À~NƒøI‹„$ŒÅ+ЋÓƒø4Ów.ÿ$…&‹„$Œ€4ËÃøwÿ$…&F‰D$,‰D$(‹L$ ‹D$(‹‰‹D$…ÀÇt~ƒø¹ ë¹ ‹t$‹|$,ó¥‹T$0‹D$‹Œ$Œ‹|$‹t$dƒÀ‰ ‹Èº3Û;ût ~ƒÿ‹ñ;û~ƒÿÇÿÿÿÿ‰^‰^ë ‹ðÇÿÿÿÿ‰XƒÁ ƒÀJuÈ‹T$ ‹Œ$ˆ‰t$d‰ éäƒø…Ë‹\$t3Ò‹+‹C‹K‰„$Œ…ít4~N‹¼$ˆƒýIý+Ç‹4ÁÁƒþw5ÿ$µ &ƒÀ‰D$T‹Ðë#‹¼$ˆ¿‹4ÁÁƒþwÿ$µ0&‹¼$ˆ‹;׉T$0u‹D$T3ÿƒÀÇCÿÿÿÿ‹Èºé3Û…ít+~>ƒý9ý+׋Ñуøw%ÿ$…@&S‰T$Hë¿Á‹Áƒøwÿ$…P&‹T$H‹‹R…í„¥~'ƒý"4Å+ð‹<ñ4ñƒÿwÿ$½`&ƒÆ‰t$L…í~:ƒý54Õ+ò ñ‹1ƒþw"ÿ$µp&4’ ñ‹1ƒþwÿ$µ€&ƒÁ‰L$P‹L$L3ÿ‰Q‹T$P‹Œ$Œ‰‹„$ˆ;ȉ;u ‹T$t‹L$0‰J‹D$TºƒÀ‹Èë 4€‹<ñ4ñƒÿw™ÿ$½&ƒÆ‰t$Lë‰3ÿ;ït~dƒý_‹ñ‰t$\ë[‰D$\Çÿÿÿÿ‰xƒÁ ƒÀJuÕ‹D$…ÀtT~qƒøl‹”$ˆ‹D$ Õ+Ê È‹ƒøwgÿ$… &Aljëe‹t$\;ï~®ƒý©Çÿÿÿÿ‰~‰~뛋”$ˆ‹D$ ’ È‹ƒøw ÿ$…°&‹L$X‹D$‹”$ˆÇ‰ë‹D$ljë…ÿ}‹Œ$„Ç„$Œÿÿÿÿ‰Œ$ˆ‹”$”‹„$„‹¼$Œ‰‹”$ˆ‹t$3Û;ót2~HƒþC‹D$ Õ+ÊÈ‹ƒùw,ÿ$À&ƒÀ‰D$4‰D$$ë‹D$ ’È‹ƒùwÿ$Ð&‹D$4‹Œ$˜‰œ$ˆ‰Oƒù‰Œ$ŒíTÈ I‰T$0‰œ$ˆTˆ‰T$X…öt~!ƒþ‹„$ ‹L$X‰ë ‹”$ ‹D$0‰‹Œ$¨3Û3ö3ÿ‰‹L$…Ét~,ƒù'‹¬$ ‹E‹0‹x‹X달$ ‹E‹x‹0‹ß달$ ‹Áƒè„Ht~H…‹‹”$ƒúÿu ;ò…ëƒþÿuƒ¼$€„ré&;ò…‹D$x…À„‹5 pWPÿփąÀ…ý‹D$|…À„ÍPSÿփąÀ„¾éÝ‹„$ƒøÿu;ð…¥ƒ¼$€u+ƒþÿ„Û;Æu‹T$xWRÿ pƒÄ…À„À‹„$ƒøÿ„‹;Æ…ƒ‹D$x…Àt{WPÿ pƒÄ…Àul‹D$|…À„<PSÿ pƒÄ…À„)ëK‹„$ƒþÿuƒøÿt:‹”$€¹;Ñ„ë$ƒøÿ„÷;Æu‹L$xWQÿ pƒÄ…À„Ü‹„$Œ‹t$X‹T$0@ƒÆ ƒÂƒø‰„$Œ‰t$X‰T$0É‹t$éíýÿÿÇ„$ˆÿÿÿÿé ƒúÿtë‹\$x…Ûtã‹|$|…ÿtÛ‹E‹H‹p…Ét‹H‰u‹QQÿR‹”$”ƒÄ…öt‹H‰u‹FVÿP‹”$”ƒÄ‹M3ÀÇÿÿÿÿ‹M‰A‹M‰A‹E‰‹M‰Y‹ A‰ ‹U‰z‹@‰‹„$¨ÇÇ„$ˆ‹Œ$œ‹”$Œ‰‹D$$‹Œ$„‹;Á„þ‹Ð‹„$ˆƒÏÿ…À…¤éjüÿÿ‹D$x…À„ÿÿÿ‹|$|…ÿ„úþÿÿƒþÿu‹Œ$¨ÇëWSÿ pƒÄ…Àt ‹T$tÇB‹E3Û‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹UÇÿÿÿÿ‹E‰X‹M‹„$‰Y‹U‰‹M‹D$x‰A‹A‰‹U‰z‹@‰éüþÿÿ‹U‰‹U‹D$x…À‰Btÿ‹„$¨‰Œ$ˆ‰éÝþÿÿ‹„$ˆ…À…«ƒ¼$€… ‹D$tƒÏÿ9x…Ê‹HRPè5 ƒÄ…À„¤‹l$t‹D$x‰¼$Œ‰¼$ˆ‰¼$„3ÉéÎòÿÿ¡˜phä„‹Qÿ¨pƒÄƒÈÿ_^][ƒÄ`ËÆ_^][ƒÄ`˘ph…‹Pÿ¨pƒÄƒÈÿ_^][ƒÄ`Ã}"‹ ˜ph8…‹Rÿ¨pƒÄƒÈÿ_^][ƒÄ`Ë„$¨ƒ8é&‹Ç_^][ƒÄ`Ë\$t3É‹+‹{‹S‰¼$Œ…ít"~Aƒý<ý+Ç‹4ƒþw(ÿ$µà&¿‹4ƒþwÿ$µð&ƒÀ‰„$ˆ‹È‹ ;ωŒ$€u ÇCÿÿÿÿéü3Û…ít'~:ƒý5 ý+Ï‹Êʃøw!ÿ$…'Cë¿‹ƒøwÿ$…'‹D$x‹0‹@…ít'~?ƒý: õ+΋<Ê Êƒÿw&ÿ$½ 'yë ¶‹<Ê Êƒÿw8ÿ$½0'yë0‹|$x…í~<ƒý7 Å+È Ê‹ƒúw$ÿ$•@'ƒÁë‹|$x € Ê‹ƒúwÿ$•P'‹L$x‰G‹„$Œ‰1;ÀÇu‹D$t‹”$€‰P‹Œ$ˆ¾3ÛA‹L$x‹Ð;ët ~ƒý‹Ê;ë~ƒýÇÿÿÿÿ‰Y‰Yë ‹ÈÇÿÿÿÿ‰XƒÂ ƒÀNuÈ‹|$;ût4~RƒÿM‹´$Œ‹D$õ+Ö Ð‹Ѓøw9ÿ$…`'A‹Ðë3‹´$Œ‹T$ ¶‹Ê ʃøwÿ$…p'‹´$Œ‹L$h‹D$x‹T$lÇ‹Œ$„‰ ‹T$$ƒÀ‰œ$„‰2‰„$€‰„$ˆ;ût~ ƒÿ‹„$ ‹Œ$ˆ‰ë ‹”$ ‰‹„$¨‹L$3ö3ÿ3ۅɉt~,ƒù'‹¬$ ‹E‹0‹x‹X달$ ‹E‹x‹0‹ß달$ ‹Áƒè„éHtiH…‹”$ƒúÿ„Oƒþÿ„T;ò…ù‹D$x…À„í‹5 pWPÿփąÀ…Ø‹D$|…À„—PSÿփąÀ„ˆ鸋„$ƒøÿ„˜ƒþÿ„˜;Æ…—‹L$xWQÿ pƒÄ…À„y‹„$;Æuu‹D$x…Àtm‹ÐWRÿ pƒÄ…Àu\‹D$|…À„PSÿ pƒÄ…À„ë;‹„$ƒþÿu ƒøÿ…Ïë$ƒøÿ„æ;Æu‹D$xWPÿ pƒÄ…À„Ë‹Œ$„‹¼$ˆ‹„$€AƒÇ ƒÀƒù‰Œ$„‰¼$ˆ‰„$€Ë‹|$3Ûéþÿÿƒþÿ„šéxƒúÿ„Œ‹\$x…Û„€‹|$|…ÿtx‹E‹H‹p…Ét‹H‰u‹QQÿR‹”$”ƒÄ…öt‹H‰u‹FVÿP‹”$”ƒÄ‹M3ÀÇÿÿÿÿ‹M‰A‹M‰A‹E‰‹M‰Y‹ A‰ ‹U‰z‹@‰éÖƒÈÿéàƒþÿ…Ò‹D$x…Àtç‹|$|…ÿt߃þÿu‹Œ$¨ÇëWSÿ pƒÄ…Àt ‹T$tÇB‹E3Û‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹UÇÿÿÿÿ‹E‰X‹M‹„$‰Y‹U‰‹M‹D$x‰A‹A‰‹U‰z‹@‰ë"‹M‰‹U‹D$x…À‰Btÿ‹„$¨Ç¸‹Œ$œ‹”$„…À‰}!¡˜phT…‹Qÿ¨pƒÄ_^]ƒÈÿ[ƒÄ`Ë„$˜‹”$Œ‹Œ$¨‰ƒ9t‹D$tÿ@ _^]¸[ƒÄ`Ã_^]3À[ƒÄ`Ã-$---$--éééééééé2P22P2PPŠŠŠŠÈÈÈÈÈÈÈÈ7777¹¹¹¹¹¹¹¹ôôôô8888YYYYŠŠŠŠÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜXXXXXXXX©©©©ççççççççAAAA©©©©©©©©aaaaaaaaªªªªÄÄÄÄðððððððð¹ ¹ ¹ ¹ ¹ ¹ ¹ ¹ V‹t$‹F‹‹VPQRèƒÄ ÇF ¸^Ãì‹L$3Ò;ʉT$ŽV‹D$S‹\$UV‹ð‹èW‹|$,‰t$‰l$‰L$;Út(~HƒûC‹E‰l$ƒøw7ÿ$… )E‰D$,ë'M닉t$ƒøwÿ$…0)F‰D$,ëN‰L$,‹D$‹ƒøt ƒø…³‹L$,ÇD$(i‰l$$;Út~ ƒû‹|$$;Ú~HƒûA‹G‹wë‹E‹ý‹ð;Ât‹I‰u ‹PPÿRƒÄ3Ò;òt;Út,‹H‰u ‹FVÿPƒÄ3Ò;Út~ƒûÇÿÿÿÿ‰W‰Wë Çÿÿÿÿ‰W‹t$$‹D$(ƒÆ ƒÅH‰t$$‰D$(…hÿÿÿ‹t$‹l$‹D$ƒÅ8ƒÆ(H‰l$‰t$‰D$…Îþÿÿ_^][ƒÄà (( ( (,(#(,(,(ƒì‹T$SUV‹t$$WRV‹^‹‹~‹N3í‰\$‰D$ ‰|$‰L$(ÇD$$èƒÄƒøt _^]3À[ƒÄÃ…ÛÇD$,Ž˜‹ß‹L$…Ét%~Cƒù>‹‹ïƒøw5ÿ$…\*G‰D$(ë%Wë‹‹ëƒøwÿ$…l*C‰D$(ëS‰T$(‹Eƒøtƒøu‹D$(VQPèûåÿÿƒÄ …Àt‹D$,‹L$@ƒÇ8ƒÃ(;Á‰D$,ŒxÿÿÿëÇD$‹|$‹L$‹T$QRWèû‹D$,ƒÄ ‰F‹D$_^][ƒÄÃÇ)¾)Ç)Ç)å)Ü)å)å)‹D$ƒì(™SƒâUVÂW‹|$<¹Áø‹@3í;È}L ;Íô;È} _^]3À[ƒÄ(Ã4 ;݉O‰w‰o t,~ ƒûVj8ë#¡˜php…‹Qÿ¨pƒÄ3À_^][ƒÄ(ÃVj(ÿpƒÄ;Åuÿ”p_^]3À[ƒÄ(Ã;õ~'‹Ð‹È‹þ…Ût ~ ƒû‹éë‹êƒÁ8ƒÂ(OÇEuß‹|$‹D$P‹ƒøÿ…/‹¬$€;ÝuH3Ò‹Å…öt"~8ƒþ3 Å+ÈÏ‹ σùwÿ$Ì9€‹ Çǃùw ÿ$Ü9P‹;Åu¼‹D$|3ö‰t$‹8‹H‹h3À;þ‰l$h‰D$Tt~ƒÿÝ+ÓÑë›Á‰D$T90„Ú‰t$(‰t$,‹Ã…ÿ‹èt"~8ƒÿ3Å+ÐÑ‹уúwÿ$•ì9€‹ÁÁƒúw ÿ$•ü9p‹;Ãuº…ÿ„Ê~'ƒÿ"Ý+ÓÑ‹уúwÿ$• :ƒÀ‰D$(…ÿ~=ƒÿ8í+ÕÑ‹уúw$ÿ$•:D­‹ÁÁƒúwÿ$•,:ƒÀ‰D$,‹D$(‹t$,‹l$h‹‰‰‹D$T3öƒýÿÇ…43À;þtXŽƒÿ„Ý+ËÁÁƒúwpÿ$•<:P‰T$4ë`›‹ÁÁƒú‡qÿÿÿÿ$•L:ƒÀ‰D$(é^ÿÿÿ›Á‹ƒùwÿ$\:H‰L$4‹ƒùwwÿ$l:H‰L$8ëgP‰T$4ëà…ÿ~xƒÿ"Ý+Ó‹4Ñуþwÿ$µ|:ƒÂ‰T$8…ÿ~MƒÿHÝ+Ó Ñ‹ƒúw5ÿ$•Œ:ƒÁ‰L$<ë%P‰T$8‹ƒùwÿ$œ:H‰L$<ëP‰T$<Ç‹D$4‹L$<‰X‰‹D$8‰Xé[;þt"~=ƒÿ8í+ÕÑ‹уúw$ÿ$•¬:D­‹ÁÁƒúwÿ$•¼:ƒÀ‰D$3ö…ÿti~'ƒÿ"Ý+Ó‹Ñ4уøwÿ$…Ì:F‰D$@…ÿŽÁƒÿ|‹T$‹Å+ÐÑ‹уúwbÿ$•Ü:ƒÀ‰D$DëRV‰T$@ëÀ›4Á‹Áƒøwÿ$…ì:V‰T$@‹D$‹€Ñ‹уúw<ÿ$•ü:ƒÀ‰D$Dë,F‰D$@ëÓ…ÿ~<ƒÿ7í+ÅÁ‹ƒùw$ÿ$ ;T­Ñ‹ уùwÿ$;ƒÀ‰D$H‹D$@‹L$ljh‹‹L$H‰‹D$D‰X‹T$|‹l$|‹t$‹|$d‰‹L$‰Z‹T$0‹„$€‹Ù;È…ûÿÿ‹Mƒù~‹E ÁáÅ;Ñ} PUè¥ïÿÿƒÄ_^]¸[ƒÄhÃÈ4È4È4È4È4È4È4È4…5…5…5…5…5…5…5…566666666O6O6O6O666666666î6î6î6î67777N707N7N7®7E7®7®7{7{7{7{7¥7¥7¥7¥7Ì7Ã7Ì7Ì7.8.8.8.8.8.8.8.8›8]8›8›8’8’8’8’8ä8¹8ä8ä8Û8Û8Û8Û8&9&9&9&9&9&9&9&9‹D$‹L$jPQèƒÄ ÃìSUV‹t$0W3À3ÿ3í;ð‰D$‰D$‰D$u ‰D$4騋‹D$(…Àt6‹P‹ Œp;Ñu‹p ƒþÿuPÿœp‹ðƒÄƒþÿuÿ8pvÁà+ÆØL$3ûQèîûÿÿ‹D$ƒÄƒøtž^[ƒøÿu_ À]ƒÄ Ãÿÿu¿Í‰} ‹Ç_]ƒÄ ËD$‹H…Éu‹L$jjQPè&ƒÄËT$jjRPèƒÄËD$ƒì SUVWhPÿu¡(ph䌋Qÿ¨pƒÄƒÈÿ_^ƒÄ ÃT$PRèÁÞÿÿ‹D$ƒÄƒøuã…ÿuG‹T$$D$,L$0P‹D$ QjRWPVè4ÛÿÿƒÄ…Àu¸L$,T$0QRjjÿP‹D$4PVèÛÿÿƒÄ…Àu—ë)‹D$L$,T$0Q‹L$$RjjÿPQVèìÚÿÿƒÄ…À…lÿÿÿT$Rè'Üÿÿ‹D$ƒÄƒø„rÿÿÿ_^ƒÄ áhpSUVW‹|$;ø„Û‹l$;è„Ï‹G‹M;Á‹ð‹ñ‹UE_L$‰D$j‹QRP贃ăøt‹ (phüŒ‹Rÿ¨pƒÄ3À_^][ËD$P‹Æ™+ÂÑøPèÌÝÿÿ‹ðƒÄ…öu_^][ËG…ÀtÇF‹E…ÀtÇF~SWèèÚÿÿƒÄ…Àu‹L$QWèÖÚÿÿƒÄ…Àt‹H‰u ‹VVÿRƒÄ_^]3À[ËÆ_^][Ë_A^‰¡hp][ËD$‹T$ ‹L$V‹t$;ȉ~…ö;È}…ö}‰ ¸^áhpV‹t$W;ðu‹D$PPè¶þÿÿƒÄ_^Ë|$;øu VVè¡þÿÿƒÄ_^ËG‹N;Át#…Àt…Éuh ¡(p‹Qÿ¨pƒÄ3À_^ÃT$jÿRPQè`ÿÿÿƒÄƒøthLëÍ‹V‹G;Ð}‹D$PjWVè)ƒÄ_^ËL$QjVWèƒÄ_^ËD$‹L$ SU‹l$ VWxj]QWSjèNòÿÿƒÄ…À}_^]3À[ËT$ R™+ÂÑøPè Üÿÿ‹ðƒÄ…öu_^][ËE…ÀtÇF‹D$‹H…ÉtÇF‹L$jQWVSRèðñÿÿƒÄ…À}‹H‰u ‹FVÿPƒÄ_^]3À[ËÆ_^][áhpV‹t$;ðuhpë/‹T$ ;Ðu‹VRjèˆÛÿÿƒÄ^ËJ‹F;Èt"…Ét…Àuh¡(p‹Qÿ¨pƒÄ3À^ÃPjRVèíþÿÿƒÄ^áhpSU‹l$ V;èW„Ÿ‹\$;Ø„“‹K‹UƒÃ}D$jPQRèµýÿÿƒÄƒøthÐëo‹T$RjèêÚÿÿ‹ðƒÄ…ötk‹E…ÀtÇF‹D$‹H…ÉtÇFjjSNWQè’éÿÿƒÄ…À}‹H‰u.‹VVÿRƒÄ3À_^][ËÆ_^][Ãh¸¡(p‹Qÿ¨pƒÄ_^]3À[ËD$‹ hp‹;Át!‹Hù‚tùˆ‚tùø‚t¸ËA‰‹L$‹‹A‰3ÀËT$D$L$P‹D$QjjÿjƒÀRPè^ÖÿÿƒÄƒøÿu3ÀÃjÿ`pƒÄËD$ƒx ÿt è‘êÿÿƒÈÿËL$ …Éu‹L$jƒÀQPèÅÈÿÿƒÄ ÷ØÀ÷ØHÃT$L$R‹T$ QjjÿjƒÀRPèìÕÿÿƒÄËT$D$L$P‹D$QjjÿjƒÀRPè¾ÕÿÿƒÄƒøÿu3ÀËD$…Àt ‹A‰‹D$ËD$ƒx ÿt èñéÿÿƒÈÿËL$ …ÉuQ‹L$ ƒÀQPè&ÈÿÿƒÄ ÷ØÀ÷ØHÃT$ƒÀRT$RjjÿQ‹L$QPèNÕÿÿƒÄËD$‹@ËD$ƒì…ÀUVWu hôé¹T$L$ RT$QRL$$T$0QRhŽPÿ@>I>g>l>u>‹>>™>û>?]?w?¦?À?ì? < 0µ0Ü0Ö122Z2u2²2â3¦4«4´4555 55555 5$5(5,5054585<5@5D5H5L5P5T5X5\5`5d5h5l5p5t5x5|5€5„5ˆ5Œ55”5˜5œ5 5¤5¨5¬5°5´5¸5¼5À5Ä5È5Ì5Ð5Ô5Ø5Ü5à5ä5è5ì5ð5ô5ø5ü5666 66666 6$6(6,6064686<6@6D6H6L6P6T6X6\6`6d6h6l6p6t6x6|6€6„6ˆ6Œ66”6˜6œ6 6¤6¨6¬6°6´6¸6¼6À6Ä6È6Ì6Ð6Ô6Ø6Ü6à6ä6è6ì6ð6ô6ø6ü6777 77777 7$7(7,7074787<7@7D7H7L7P7T7X7\7`7d7h7l7p7t7x7|7ÿ78 9$9(9,9094989<9º9Ø9\:`:d:h:l:p:t:x:Ù:Þ:ç:ý: ;Ë;à;<>Q>s>Ÿ>´>ü>??? ????? ?$?(?,?0?4?8?E>‹?«?@Ìô0ø0ü0111 11U1d1|11Š1˜1¡1ª1Ç1i2å2ø23¬3³3Ô3 4g44­45 55f5m5t5{5…5¡5¸5Ü566636?6K6R6X6ˆ6¤6±6|7¤8¿8!9(9z9‡9Œ9•9É9Ð9î9::m:Ô:Ù:ê:ï:ø:ü;<"<'<0 >>>">Â>Ò>ì>ó>??!?Pì”1›1µ1º1Ã1õ1ÿ12 22!2&2/2y2~2‡2É2Ð2õ2ü2L3†3“3Ë3Ò3ñ3ö3ÿ3Ù4Þ4ç4 5577<7E7U8\8t8~8•8œ8´8¾8Õ8Ü8ô8þ89 90959>9P9W9m9t9{9‚9‡997:C:\:c:j:}:‚:‹:Ÿ:¦:»:Í:Ü:ð:;;>;j;o;x; ;´;»;Ð;è;þ;<6<}<˜<<¦<¿<==#=5=:=C=y=€=—=ž=¥=¬=v?{?„?Ë?ß?æ?ó?ø?`ì000$0)020D0P0U0^0|0‚0‘0–0¡01!11161?1Q1V1_1q1v11¢1§1°162;2D23V3[3d3ø314x4}4†4©4¡5¯5à5å5î56T6Å6Ê6Ó6ö67 77q7Ÿ8Â8É8Õ8æ8í8õ8ú899"9)979>9E9Q9]9b9k99“9¦9Ð9Ý9ç9:0:>:¡:¯:´:¹:¾:Ã:É:ê:ò:ø:;;;&;+;0;5;@;M;W;l;x;~; ;²;<*<7 and Oleg Broytmann . This file defines three Python datatypes (kjSet, kjGraph, and kjDict) which share a common representational and procedural infrastructure: a hash table with table driven behavior. [ want to add .keys(n) -- pick n keys, Null == all want to make the deletion algorithm more complicated and faster! ] ================================================ A hint at the table structure: By setting WDEBUGPRINT and recompiling the structure of tables can be examined using python. Below we have a Graph constructed and examined with OVLFACT of 1 and GSIZE 2. >>> G = kjGraph() >>> for i in range(15): G[i%5] = i%3 ... >>> G Table (size=14, basesize=7, entries=15, free=9, GRAPH) 0: ROOT(next=3)Group:Bkt[0, 0, 0] Bkt[0, 0, 1] 1: ROOT(next=13)Group:Bkt[-131071, 1, 1] Bkt[-131071, 1, 0] 2: ROOT(next=7)Group:Bkt[-393213, 3, 0] Bkt[-393213, 3, 2] 3: OVFLW(next=0)Group:Bkt[0, 0, 2] Bkt[-1, NULL, NULL] 4: OVFLW(next=5)Group:Bkt[-262142, 2, 0] Bkt[-1, NULL, NULL] 5: ROOT(next=4)Group:Bkt[-262142, 2, 2] Bkt[-262142, 2, 1] 6: ROOT(next=8)Group:Bkt[-524284, 4, 0] Bkt[-524284, 4, 1] 7: OVFLW(next=2)Group:Bkt[-393213, 3, 1] Bkt[-1, NULL, NULL] 8: OVFLW(next=6)Group:Bkt[-524284, 4, 2] Bkt[-1, NULL, NULL] 9: FREE next=10, prev=12 10: FREE next=11, prev=9 11: FREE next=12, prev=10 12: FREE next=9, prev=11 13: OVFLW(next=1)Group:Bkt[-131071, 1, 2] Bkt[-1, NULL, NULL] >>> The basic unit for archiving is the bucket, which contains a hash value (where -1 represents "No value"), a key object pointer and (for dicts and graphs) a map object pointer. The different behaviors for the tables are determined primarily by the different behaviors of the bucket structures under the appropriate interpretation. Interpretations are indicated by flags from enum BucketFlag. The table is an array of bucket groups, with each bucket group containing 2 (GSIZE) buckets. The table has a base size of 7, so all hash index loops are rooted between indices 0 and 6. Thus an item with hash 23 will be placed in the hash sequence rooted at 23%7 = 2. Hash index loops consist of a root group and possibly one or more linked overflow groups arranged in a circular list (embedded in the array). For example the arcs with source 1 are rooted at index 1 with one overflow group at index 13. The code assumes in several places that any used group with "undefined entries" is the last group in its hash index loop and all undefines are at the higher indices of the group. Dedicated overflow groups: In this case 7 (basesize / OVLFACT) additional groups have been allocated with indices 7..13 which can only be used as overflow groups. Those groups which are not used either as a root or an overflow are kept in a circular free list with head at index 9. This basic table structure has 3 encarnations: kjSet represent "sets of hashable objects." It has a smaller Bucket size which archives only one object. kjDict represents only relations that are "partial functions from hashable objects to objects." kjGraph represents arbitrary relations from hashable objects to objects. Both kjDict's and kjGraph's are indexed "on the left" only. The behavior of tables under the differing interpretations are determined primarily by the behavior of the function BPtrMatch which defines what it means for a Bucket to match a key/map pair under the differing interpretations. */ /* include a bunch of stuff */ #include "Python.h" /* #include "rename2.h" */ /* #include "allobjects.h" */ /* #include "modsupport.h" */ /* #include "ceval.h" */ #ifdef STDC_HEADERS #include #else #include #endif /* THE FOLLOWING IS HISTORICAL AND NOT NEEDED */ /* define this flag to remove stuff which won't link under 1.2 */ /* #define PYTHON1DOT2 1 */ /* PROBLEM FIXED */ /* flag to enable optional debug printing during execution turned on/off by kjbuckets.debug() from python */ /* #define KJBDEBUG 1 */ #ifdef KJBDEBUG static long DebugLevel = 0; /* usage: Dprint(("this is an long %ld",i)); */ #define Dprint(x) if (DebugLevel) printf x #else #define Dprint(x) {} #endif /***************************************************************/ /** local parameters **/ /* if set, this changes printing to show internal structure of table */ /* if undefined, the debug printing will be omitted */ /* #define WDEBUGPRINT 0 */ /* overflow fudge factor, low values mean more fudge array size = basesize + basesize/OVLFACT extra space is used only for overflows */ #define OVLFACT 1 /* group size for each bucket group, smaller means faster/bigger (roughly) */ #define GSIZE 4 /* if you redefine OVLFACT, better rethink the following macro which is designed to force a resize to a size large enough for additional inserts. !!!AN INFINITE RECURSION WILL RESULT IF THE RESULTING TABLE IS NOT LARGE ENOUGH!!! */ #define RESIZEUPSIZE(tp) ( tp->basesize * GSIZE + 1 ) /* resize down when fewer than 1/RESIZEFACTOR buckets are used */ #define RESIZEFACTOR 8 /* don't resize down if size is smaller than this */ #define RESIZETHRESHOLD 16 /* the test for resizing down */ #define RESIZEDOWNTEST(tp) \ ( (tp->size > RESIZETHRESHOLD) && \ ( (tp->entries * RESIZEFACTOR) < (tp->size * GSIZE) ) ) /* group states */ #ifdef OVERFLOW #undef OVERFLOW #endif enum GState { UNKNOWN, FREE, ROOT, OVERFLOW }; /* bucket behaviors, smaller is less general! */ enum BucketFlag { SETFLAG=0, DICTFLAG=1, GRAPHFLAG=2 }; /* special invalid hash value (from python convention) */ #define NOHASH ( (long) -1 ) /* to force or not to force insertions during lookups */ enum ForceFlag { FORCE=1, NOFORCE=0 }; /* macro for getting hash values (snarfed from mappingobject.c) */ #ifdef CACHE_HASH #define GETHASH(hashvalue, object) \ if (!PyString_Check(object) || \ (hashvalue = ((PyStringObject *) object)->ob_shash) == -1)\ hashvalue = PyObject_Hash(object) #else #define GETHASH(hashvalue, object) hashvalue = PyObject_Hash(object) #endif /*********************************************************/ /* bucket methods **/ /* set bucket structure */ typedef struct { long hash; PyObject * member; } SetBucket; /* graph and dict bucket structure */ typedef struct { long hash; PyObject * member; PyObject * map; } DiBucket; /* for passing general buckets around, with external flags */ typedef union { SetBucket * SBucketp; DiBucket * DBucketp; } Bucketptr; /* destructuring a bucket (macroized) */ #define BPtrDestructure(/*Bucketptr*/ Bp, /*enum BucketFlag*/ flag,\ /*long*/ hp, /*PyObject*/ memp, /*PyObject*/ mapp)\ {\ switch (flag) {\ case SETFLAG:\ hp = Bp.SBucketp->hash;\ memp = Bp.SBucketp->member;\ mapp = memp; /* map is copy of memp */\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ hp = Bp.DBucketp->hash;\ memp = Bp.DBucketp->member;\ mapp = Bp.DBucketp->map;\ break;\ }\ } #ifdef WDEBUGPRINT /* testing only */ static long BPtrDump(Bucketptr Bp, enum BucketFlag flag, FILE *fp) { long h; PyObject *mem, *map; BPtrDestructure(Bp, flag, h, mem, map); fprintf(fp, "Bkt[%ld, ",h); if (mem == 0) { fprintf(fp, "NULL"); } /*else { if (PyObject_Print(mem, fp, 0) != 0) { return -1; } }*/ fprintf(fp, "%ld, ",mem); if (map == 0) { fprintf(fp, "NULL"); } /*else { if (PyObject_Print(map, fp, 0) != 0) { return -1; } }*/ fprintf(fp, "%ld] ",map); return 0; } #endif /* setting a bucket Py_INCREFs handled here. assumes initial contents are null or garbage. (macroized) */ /* static long */ #define BPtrSet( \ /* Bucketptr */ Bp, /* enum BucketFlag */ flag,\ /* long */ h, /* PyObject * */mem1, /* PyObject * */map1)\ {\ switch(flag) {\ case SETFLAG:\ if ((mem1==0)&&(h!=NOHASH)) Dprint(("setting mem to 0, hash =%ld\n",h));\ /* ignore map */\ Bp.SBucketp->hash = h;\ Bp.SBucketp->member = mem1;\ if (mem1 != 0) { Py_XINCREF (mem1); }\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ Bp.DBucketp->hash = h;\ Bp.DBucketp->member = mem1;\ if (mem1 != 0) { Py_XINCREF (mem1); }\ Bp.DBucketp->map = map1;\ if (map1 != 0) { Py_XINCREF (map1); }\ break;\ }\ } /* initialization assuming invalid value -- not used. (no decrefs, could macroize) */ /*static long BPtrInit( Bucketptr Bp, enum BucketFlag flag ) { PyObject *dummy; dummy = 0; BPtrSet( Bp, flag, NOHASH, dummy, dummy ); }*/ /* re-initialization assuming valid value Py_DECREFs handled here. to save values in the bucket for use after reinitialization, incref them first and decref after... (macroized) */ /*static void*/ #define BPtrReInit( /*Bucketptr*/ Bp, /*enum BucketFlag*/ flag )\ {\ long hashBBB;\ PyObject *MemberBBB = 0, *MapBBB = 0, *dummyBBB = 0;\ BPtrDestructure( Bp, flag, hashBBB, MemberBBB, MapBBB );\ if ( MemberBBB != 0 ) { Py_DECREF(MemberBBB); }\ /* don't decref map for sets!! */\ if ( (MapBBB != 0) && (flag != SETFLAG) ) { Py_DECREF(MapBBB); }\ dummyBBB = 0;\ BPtrSet( Bp, flag, NOHASH, dummyBBB, dummyBBB );\ } /* returns 1 on match, 0 no match, -1 error newflag is set if new entry, else reset dirtyflag is set if this is a forced overwrite, else left alone */ /* static long */ #define BPtrMatch(/*int*/ result,\ /*Bucketptr*/ Bp, \ /*enum BucketFlag*/ flag,\ /*long*/ h, \ /*PyObject * */ Mm, \ /*PyObject * */ Mp, \ /*enum ForceFlag*/ Force,\ /*long * */ newflag, \ /*long * */ dirtyflag) \ {\ long hashAAA = 0;\ PyObject *MemberAAA = 0, *MapAAA = 0, *dummyAAA = 0;\ newflag = 0; /* default assumption */\ result = 0; /* default: fail */\ BPtrDestructure( Bp, flag, hashAAA, MemberAAA, MapAAA );\ switch (flag) {\ case SETFLAG:\ /* ignore maps */\ if ( ( hashAAA == NOHASH) && (h != NOHASH)) { \ /* force it? */\ if (Force == FORCE) {\ dummyAAA = 0;\ BPtrSet( Bp, flag, h, Mm, dummyAAA );\ newflag = 1; /* entry is new */\ result = 1; /* forced match on empty bucket */\ }\ } else {\ if (hashAAA != NOHASH) {\ /* null match */\ if (h == NOHASH)\ { result = 1; } /* bucket full, hash null == null match */\ else { /* fully defined match */\ if ((h == hashAAA) && (PyObject_Compare(Mm, MemberAAA)==0))\ { result = 1; } /* hash defined, all eq == match */\ }\ }\ }\ break;\ case DICTFLAG:\ /* null match case */\ if ((h == NOHASH) && (hashAAA != NOHASH)) { result = 1; }\ else {\ /* Forced match succeeds if bucket is empty or members match */\ if ((Force == FORCE) &&\ ( (hashAAA == NOHASH) || \ ((h == hashAAA)&&(PyObject_Compare(Mm, MemberAAA)==0)) ) ) {\ if ((Mm == 0) || (Mp == 0)) { result = -1; } /* error */\ else {\ if (hashAAA == NOHASH) { newflag = 1; } /* new if old was empty */\ else {\ if (PyObject_Compare(MapAAA,Mp)!=0) { /* overwriting: dirty */\ dirtyflag = 1;\ }\ }\ BPtrReInit( Bp, flag );\ BPtrSet( Bp, flag, h, Mm, Mp );\ result = 1; /* successful forced match */\ }\ } else {\ if ( (h!=NOHASH) && (h==hashAAA) &&\ (Mm != 0) && (PyObject_Compare(Mm, MemberAAA)==0) &&\ ( ( Mp == 0 ) || (PyObject_Compare(MapAAA,Mp)==0) ) )\ { result = 1; } /* successful unforced match */\ }\ }\ break;\ case GRAPHFLAG:\ if ( ( h == NOHASH ) && (hashAAA != NOHASH) ) { \ Dprint(("graph null match\n")); \ result = 1; /* null match */\ } else {\ /* force only on empty buckets */\ if ( ( hashAAA == NOHASH ) && (Force == FORCE) ) {\ if ( (h==NOHASH) || (Mm==0) || (Mp==0) ) { \ Dprint(("graph match error\n")); \ result = -1; /* error */\ } else {\ Dprint(("graph forced match\n")); \ BPtrReInit( Bp, flag );\ BPtrSet( Bp, flag, h, Mm, Mp );\ newflag = 1;\ result = 1; /* successful forced match */\ }\ } else {\ /* unforced match, can match if Mm is null */\ if (( hashAAA != NOHASH ) && ( hashAAA == h ) &&\ (Mm != 0) && ( PyObject_Compare(Mm,MemberAAA)==0 ) && \ ( (Mp == 0) || ( PyObject_Compare(MapAAA,Mp)==0 ))) {\ Dprint(("graph unforced match\n")); \ result = 1; /* successful unforced match */\ }\ }\ }\ break;\ default:\ /* error case */\ result = -1;\ break;\ }\ } /*************************************************************/ /** group methods **/ /* array types for bucket groupings */ typedef SetBucket SBuckets[GSIZE]; typedef DiBucket DBuckets[GSIZE]; /* free group template */ typedef struct { long Next; long Previous; } FreeGroup; /* DiBucket group template */ typedef struct { long Next; DBuckets Buckets; } DBGroup; /* SetBucket group template */ typedef struct { long Next; SBuckets Buckets; } SBGroup; /* SetGroup structure */ typedef struct { enum GState State; union { FreeGroup free; SBGroup group; } mem; } SetGroup; /* DiGroup structure */ typedef struct { enum GState State; union { FreeGroup free; DBGroup group; } mem; } DiGroup; /* union of different group template pointer types */ typedef union { FreeGroup *fgp; DBGroup *dbp; SBGroup *sbp; } Groupptr; /* get a bucket from a group (macroized) */ /*static Bucketptr*/ #define GetBucket(/*Bucketptr * */ Bp, \ /*Groupptr*/ g, \ /*enum BucketFlag*/ flag, \ /*int*/ index)\ {\ if (index>GSIZE) Dprint((" BAD INDEX IN GETBUCKET %ld \n", index));\ switch(flag){\ case SETFLAG:\ Bp.SBucketp = &(g.sbp->Buckets[index]);\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ Bp.DBucketp = &(g.dbp->Buckets[index]);\ }\ } /* testing for empty group -- assumes correct backfilling (macroized) */ /*static int*/ #define GroupEmpty(/*int*/ Eresult, \ /*Groupptr*/ Eg, /*enum BucketFlag*/ Eflag)\ {\ long Eh = 0;\ PyObject *EMm, *EMp;\ Bucketptr EBp;\ GetBucket(EBp, Eg, Eflag, 0);\ BPtrDestructure(EBp, Eflag, Eh, EMm, EMp);\ if (Eh == NOHASH) { Eresult = 1; }\ else { Eresult = 0; }\ } /* initialize a groupptr to empty, assuming garbage initially (macroized) */ /*static void */ #define Groupinit(/*Groupptr*/ Dg, /*enum BucketFlag*/ Dflag)\ {\ Bucketptr DBp;\ PyObject *Ddummy;\ long Di;\ Ddummy = 0;\ for (Di=0; DiState);\ switch (SGptr->State) {\ case FREE:\ templateptr.fgp = &(SGptr->mem.free);\ Nextptr = &(SGptr->mem.free.Next);\ break;\ case ROOT:\ case OVERFLOW:\ case UNKNOWN:\ templateptr.sbp = &(SGptr->mem.group);\ Nextptr = &(SGptr->mem.group.Next);\ }\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ DGptr = & (g.Dgroups[index]);\ Stateout = &(DGptr->State);\ switch (DGptr->State) {\ case FREE:\ templateptr.fgp = &(DGptr->mem.free);\ Nextptr = &(DGptr->mem.free.Next);\ break;\ case ROOT:\ case OVERFLOW:\ case UNKNOWN:\ templateptr.dbp = &(DGptr->mem.group);\ Nextptr = &(DGptr->mem.group.Next);\ break;\ }\ break;\ }\ } /* free group methods */ /* (macroized) */ /* static void */ #define SetFreeGroup(/*GroupArray*/ Fg, \ /*enum BucketFlag*/ Fflag,\ /*int*/ Fselfindex, \ /*int*/ Fnextindex, \ /*int*/ Fpreviousindex)\ {\ Groupptr Fself, Fnext, Fprev;\ long *Fdummy;\ enum GState *FselfState = 0, *FnextState = 0, *FprevState = 0;\ Dprint(("SetFreeGroup(self=%ld, next=%ld, prev=%ld)\n", \ Fselfindex, Fnextindex, Fpreviousindex));\ GArrayRef(Fg, Fflag, Fselfindex, Fself, FselfState, Fdummy );\ GArrayRef(Fg, Fflag, Fnextindex, Fnext, FnextState, Fdummy );\ GArrayRef(Fg, Fflag, Fpreviousindex, Fprev, FprevState, Fdummy );\ *FselfState = FREE;\ Fself.fgp->Previous = Fpreviousindex;\ Fself.fgp->Next = Fnextindex;\ Fnext.fgp->Previous = Fselfindex;\ Fprev.fgp->Next = Fselfindex;\ } /* get a free group (macroized) */ /*static void*/ #define ExtractFreeGroup(/*GroupArray*/ Gg, \ /*enum BucketFlag*/ Gflag, \ /*int*/ Gindex )\ {\ long Gnextindex, Gpreviousindex, *Gdummy;\ Groupptr Gself, Gnext, Gprev;\ enum GState *GselfState = 0, *GnextState, *GprevState;\ Dprint(("ExtractFreeGroup %ld\n",Gindex));\ GArrayRef(Gg, Gflag, Gindex, Gself, GselfState, Gdummy );\ Gnextindex = Gself.fgp->Next;\ Gpreviousindex = Gself.fgp->Previous;\ GArrayRef(Gg, Gflag, Gnextindex, Gnext, GnextState, Gdummy );\ GArrayRef(Gg, Gflag, Gpreviousindex, Gprev, GprevState, Gdummy );\ Gnext.fgp->Previous = Gpreviousindex;\ Gprev.fgp->Next = Gnextindex;\ *GselfState = UNKNOWN;\ } /* for a non-free group, find previous entry in circular list */ /* macroized */ /* static long */ #define Gprevious( /*int*/ Hresult,\ /* enum BucketFlag */ Hflag, \ /*int*/ Hindex, \ /*GroupArray*/ Harray)\ {\ long Hnext, HHHindex;\ enum GState *HdummyState;\ Groupptr Hdummytemplate;\ long *HNptr = 0;\ Dprint(("Gprevious %ld\n",Hindex));\ HHHindex = Hnext = Hindex;\ do {\ Hresult = Hnext;\ GArrayRef(Harray, Hflag, Hnext, Hdummytemplate, HdummyState, HNptr);\ Hnext = *HNptr;\ Dprint(("Gprevious at %ld %ld %ld\n", Hnext, HHHindex, Hindex));\ } while (Hnext != HHHindex);\ /* return Hresult; */\ } /* remove a group from its circular list */ /* macroized */ /* static void*/ #define Gremove( /*enum BucketFlag*/ Iflag, \ /*int*/ Iindex, \ /*GroupArray*/ Iarray)\ {\ enum GState *IdummyState;\ Groupptr Idummytemplate;\ long *INext = 0, *INextOfPrev = 0;\ long Iprevious;\ Dprint(("Gremove %ld\n",Iindex));\ Gprevious(Iprevious, Iflag, Iindex, Iarray);\ GArrayRef(Iarray, Iflag, Iindex, Idummytemplate, IdummyState, INext);\ GArrayRef(Iarray, Iflag, Iprevious, Idummytemplate, \ IdummyState, INextOfPrev);\ *INextOfPrev = *INext;\ *INext = Iindex;\ } /* Swap out overflow at fromindex contents from its circular list to toindex */ /* assumes toindex is currently on a unary list */ /* macroized */ /* static void */ #define Gswapout(/*GroupArray*/ Jarray, \ /*int*/ Jfromindex, \ /*int*/ Jtoindex,\ /*enum BucketFlag*/ Jflag)\ {\ long *JNext = 0, *JNextOfPrev = 0, *JNextOfOther = 0;\ enum GState *JState, *JOtherState = 0, *JPrevState;\ Groupptr Jg, Jgprev, Jgother;\ long Jprevious;\ Gprevious(Jprevious, Jflag,Jfromindex,Jarray);\ Dprint(("Gswapout %ld --> %ld\n",Jfromindex, Jtoindex));\ GArrayRef(Jarray,Jflag,Jfromindex, Jg, JState, JNext);\ GArrayRef(Jarray,Jflag,Jprevious, Jgprev, JPrevState, JNextOfPrev);\ GArrayRef(Jarray,Jflag,Jtoindex, Jgother, JOtherState, JNextOfOther);\ *JNextOfOther = *JNext;\ *JOtherState = OVERFLOW;\ GroupCopy(Jgother, Jg, Jflag);\ *JNextOfPrev = Jtoindex;\ Groupinit(Jg, Jflag);\ /* *JState = ROOT; */\ *JNext = Jfromindex;\ } /******************************************************************/ /** table methods **/ /* table structure */ typedef struct { enum BucketFlag flag; /* bucket behavior */ long Dirty; /* should be set if the table has had a "bucket overwrite" ie, if a deletion or entry overwrite has occurred */ long Free; /* head of circular free list */ long entries; /* number of entries used */ long basesize; /* basesize for truncating hash */ long size; /* number of groups (basesize+extras) */ GroupArray groups; /* array of groups of buckets */ } Table; /* place an entry on the free list, assuming it isn't there already */ /* macroized */ /*static void*/ #define FreeTableIndex(/*Table * */ Ktp, /*int*/ Kindex)\ {\ register enum BucketFlag Kflag = tp->flag;\ GroupArray Kgroups = Ktp->groups;\ long Kfreeindex = Ktp->Free;\ Groupptr Kthis, Kfree;\ enum GState *KthisState = 0, *KfreeState = 0;\ long *KNext = 0, *KfreeNext = 0;\ Dprint(("FreeTableIndex %ld\n",Kindex));\ GArrayRef( Kgroups, Kflag, Kindex, Kthis, KthisState, KNext);\ /* extract the group, only if its in a known state */\ if (*KthisState != UNKNOWN) {\ Gremove( Kflag, Kindex, Kgroups );\ }\ *KthisState = FREE;\ if (Kfreeindex == -1) {\ SetFreeGroup( Kgroups, Kflag, Kindex, Kindex, Kindex );\ }\ else {\ GArrayRef( Kgroups, Kflag, Kfreeindex, Kfree, KfreeState, KfreeNext);\ SetFreeGroup( Kgroups, Kflag, Kindex, *KfreeNext, Kfreeindex);\ }\ Ktp->Free = Kindex;\ } /* bucket allocation for table */ static long AllocateBuckets(Table *tp, long numMembers) { register enum BucketFlag flag = tp->flag; long ExpSize = numMembers/GSIZE + 1; long basesize, size, *Next, i; enum GState *State = NULL; Groupptr g; GroupArray groups; Dprint(("AllocateBuckets %ld\n",numMembers)); /* this weird heuristic is chosen arbitrarily (powers of 2 minus 1) */ for (basesize=1; ; basesize += basesize + 1) { if ((basesize <= 0) || (basesize>=ExpSize)) { break; } } if (basesizebasesize = basesize; tp->size = size = basesize + basesize/OVLFACT; tp->entries = 0; switch (flag) { case SETFLAG: groups.Sgroups = (SetGroup *) calloc(sizeof(SetGroup), size); break; case DICTFLAG: case GRAPHFLAG: groups.Dgroups = (DiGroup *) calloc(sizeof(DiGroup), size); break; default: PyErr_SetString(PyExc_SystemError, "invalid internal table behavior flag"); return 0; /* error */ } if (groups.Dgroups == NULL) { PyErr_NoMemory(); return 0; /* error */ } /* initialize all states to unknown */ for (i=0; igroups = groups; tp->Free = -1; /* initialize free groups backwards, to encourage use of dedicated free groups */ for (i=size-1; i>=0; i--) { FreeTableIndex(tp, i); } return 1; } #ifdef WDEBUGPRINT /* printing for testing only */ static long TableDump(Table *tp, FILE *fp) { register enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; Groupptr g; enum GState *State; long size = tp->size; long i, *Next; fprintf(fp, "Table (size=%ld, basesize=%ld, entries=%ld, free=%ld, ", size, tp->basesize, tp->entries, tp->Free); switch (flag) { case SETFLAG: fprintf(fp, "SET)\n"); break; case DICTFLAG: fprintf(fp, "DICT)\n"); break; case GRAPHFLAG: fprintf(fp, "GRAPH)\n"); break; default: fprintf(fp, "!unknown flag!\n"); } for (i=0; iNext, g.fgp->Previous); break; case ROOT: fprintf(fp, "ROOT(next=%ld)",*Next); if (GroupDump(g,flag,fp)!=0) { return -1; } break; case OVERFLOW: fprintf(fp, "OVFLW(next=%ld)",*Next); if (GroupDump(g,flag,fp)!=0) { return -1; } break; default: fprintf(fp, "!invalid GState!\n"); } } return 0; } #endif /* empty out all groups in this table */ static void groupsReinit(GroupArray g, enum BucketFlag flag, long size) { enum GState *State = 0; Groupptr groupp; long i, j, *d; Bucketptr Bp; Dprint(("groupsReinit %ld \n",size)); /* reinit all the groups to properly handle object references */ for (i=0; iflag;\ GroupArray Lgroups = Ltp->groups;\ long Lfreeindex = Ltp->Free;\ long *LNextp = 0, LNextind;\ enum GState *LState;\ Groupptr Lthis;\ Lresult = Lindex;\ Dprint(("UnFreeTableIndex %ldn",Lresult));\ GArrayRef(Lgroups, Lflag, Lresult, Lthis, LState, LNextp);\ /* debug */\ if (*LState != FREE) \ Dprint(("UnFreeTableIndex State=%ld not FREE\n",*LState));\ LNextind = *LNextp; /* save */\ if (LNextind == Lresult) {\ /* free list has one elt, zero after */\ Ltp->Free = -1;\ } else {\ ExtractFreeGroup(Lgroups, Lflag, Lresult);\ if (Lfreeindex == Lresult) { Ltp->Free = LNextind; }\ }\ Groupinit(Lthis,Lflag);\ /*return Lindex;*/\ } /* table initializer could macroize */ static long initTable(Table *tp, enum BucketFlag flag, long numMembers) { tp->flag = flag; tp->Dirty = 0; Dprint(("initTable\n")); return AllocateBuckets(tp, numMembers); } /* forward decl for table resizer */ long tableResize( Table *, long ); /* matching within a table. inputs: tp -- the table member1 -- the member to match map1 -- the map to match (null for set/dict) Force -- whether or not to force an insert on failure rootgroupI -- for reentrance, the rootgroup for current loop lastgroupI -- for reentrance, the current group lastbucketI -- for reentrance, the *previous* bucket to look past. (-1 means none for I* args) hsh -- the hash value if known (NOHASH means not known) outputs: (only valid after a successful search) rtgrp -- index of current root group (for later reenter) nxtgrp -- index of group where found nxtbkt -- index of bucket where found Bp -- Bucketptr to bucket where found hshout -- hash value isnew -- 1 if new entry inserted, 0 otherwise return value 1 (found) 0 (not found) -1 (error occurred) Behaviors: if hsh == NOHASH and Member1 == 0 then rootgroupI should be valid; match any full value past reentrant state else if hsh, rootgroup, etc. not defined compute them. if the rootgroup is currently an overflow swap it out. search in circular list headed at rootgroup for match (if Force and there is space in existing bucket, force insert) if no match found and Force, allocate a new group on this list and force insert the member. */ /* crazy idea: macroize this monster, and use stub only for recursive calls... */ static long tableMatch( Table *tp, PyObject *member1, PyObject *map1, enum ForceFlag Force, long rootgroupI, long lastgroupI, long lastbucketI, long hsh, /*outputs*/ long *rtgrp, long *nxtgrp, long *nxtbkt, Bucketptr *Bp, long *hshout, long *isnew) { register enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; Groupptr root, thisgroup, avail; enum GState *state = 0, *availState = 0; long *Next = 0, *rNext = 0, AvailableI, found, *availNext = 0, *dirtyptr; unsigned long lhsh; /*Dprint(("TableMatch %ld\n",hsh));*/ /* used to mark the table dirty upon "bucket overwrite" */ dirtyptr = &(tp->Dirty); /* sanity checks (comment out later?) */ if ( (member1 == 0) && ( (rootgroupI < 0) || (Force == FORCE) ) ) { PyErr_SetString(PyExc_SystemError, "bug in kjbuckets implementation (tableMatch)"); return -1; } /* compute hash value if absent and needed */ if ((hsh == NOHASH) && (member1 != 0)) { GETHASH(hsh, member1); if (hsh == -1) { return -1; } /* unhashable */ Dprint(("tm: hash = %ld computed\n",hsh)); } /* sanity check */ /*if (tp->Free != -1) { GArrayRef(groups, flag, tp->Free, root, state, rNext); if (*state != FREE) { PyErr_SetString(PyExc_SystemError, "free index not free in table"); return -1; } }*/ *hshout = hsh; /* return value */ lhsh = /*(unsigned long)*/ hsh; /* find the root group if needed */ if (rootgroupI < 0) { rootgroupI = lastgroupI = lhsh % tp->basesize; lastbucketI = -1; /* swap out or free root group if needed */ GArrayRef(groups, flag, rootgroupI, root, state, rNext); if (*state != ROOT) { /* failure, unless forced insert */ if (Force == NOFORCE) { return 0; } /* lastgroup and lastbucket must be none */ lastgroupI = lastbucketI = -1; /* otherwise must force an insert, need root group... */ if (*state == OVERFLOW) { /* swap out the overflow group */ Dprint(("root is overflow %ld\n",rootgroupI)); if (tp->Free == -1) { /* nowhere to swap, must resize up */ Dprint(("tm: resizing for root\n")); if (tableResize(tp, RESIZEUPSIZE(tp)) == 0) { return -1; /* failure to resize */ } return tableMatch(tp, member1, map1, Force, -1, -1, -1, hsh, rtgrp, nxtgrp, nxtbkt, Bp, hshout, isnew); } UnFreeTableIndex(AvailableI, tp, tp->Free); Gswapout(groups, rootgroupI, AvailableI, flag); } else { if (*state == FREE) { Dprint(("unfreeing rootgroup %ld\n", rootgroupI)); UnFreeTableIndex(rootgroupI, tp, rootgroupI); } else { PyErr_SetString(PyExc_SystemError, "bad rootgroup state in tablematch"); return -1; /* error */ } } /* set the next of new root group to self */ /* paranioa: technically the structure may have changed... (omit?) */ GArrayRef(groups, flag, rootgroupI, root, state, rNext); *state = ROOT; *rNext = rootgroupI; } } if (lastgroupI<0) { lastgroupI = rootgroupI; lastbucketI=-1; } *rtgrp = rootgroupI; /*Dprint(("tm: lg = %ld, rg = %ld, lb = %ld\n",\ lastgroupI, rootgroupI, lastbucketI));*/ /* look in circular list until looped or found */ do { Dprint(("tm: looking %ld\n", lastgroupI)); GArrayRef(groups, flag, lastgroupI, thisgroup, state, Next); *nxtgrp = lastgroupI; groupmatch(found, thisgroup, flag, hsh, member1, map1,\ Force, lastbucketI, (*nxtbkt), \ (*Bp), (*isnew), (*dirtyptr)); if (*Next == rootgroupI) { break; } lastgroupI = *Next; lastbucketI = -1; } while (found == 0); /* success if found */ if (found != 0) { Dprint(("tm: found = %ld\n",found)); if (found<0) { PyErr_SetString(PyExc_SystemError, "groupmatch abnormal return"); return -1; } if (*isnew != 0) { tp->entries++; } Dprint(("tm: success, rg=%ld, ng=%ld, nb=%ld, ho=%ld, in=%ld", \ *rtgrp, *nxtgrp, *nxtbkt, *hshout, *isnew)); return 1; } /* otherwise force an insert into a new group, if requested */ if (Force == FORCE) { Dprint(("tm: trying to force insert to overflow\n")); if (tp->Free == -1) { /* no room, no room (mad hatter) */ Dprint(("tm: resizing for overflow\n")); if (tableResize(tp, RESIZEUPSIZE(tp)) == 0) { return -1; /* failure to resize */ } return tableMatch(tp, member1, map1, Force, -1, -1, -1, hsh, rtgrp, nxtgrp, nxtbkt, Bp, hshout, isnew); } UnFreeTableIndex(AvailableI, tp, tp->Free); GArrayRef(groups, flag, AvailableI, avail, availState, availNext); *availState = OVERFLOW; *availNext = rootgroupI; *Next = AvailableI; groupmatch(found, avail,flag,hsh,member1,map1, Force, -1, (*nxtbkt), (*Bp), (*isnew), (*dirtyptr)); if (found<0) { PyErr_SetString(PyExc_SystemError, "groupmatch abnormal return"); return -1; } *nxtgrp = AvailableI; if (*isnew != 0) { tp->entries++; } return 1; /* successful insert */ } return 0; /* not found */ } /* some simple uses of table matching */ /* find (or set) a matching pair */ static long TableGet1( Table *tp, PyObject *member1, PyObject *map1, long hash, enum ForceFlag Force, PyObject **memout, PyObject **mapout) { long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; enum BucketFlag flag = tp->flag; if (member1 == NULL) { PyErr_SetString(PyExc_SystemError, "TableGet1 called with NULL??"); return -1; } Dprint(("tg1: calling tablematch\n")); found = tableMatch(tp, member1, map1, Force, -1, -1, -1, hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } if (found == 0) { PyErr_SetObject(PyExc_KeyError, member1); return -1; } BPtrDestructure(Bp, flag, hashout, *memout, *mapout); return 0; } /* utility function for resizing a table: reinserting a group */ /* could macroize */ long ReInsertGroup( Groupptr g, enum BucketFlag flag, Table *tp) { PyObject *Member = 0, *Map = 0; long i, rt, nxt, nxtb, isnew, test; long hash = 0, h; Bucketptr Bp, Bpdummy; for (i=0; igroups, tp->flag, tp->size ); tp->entries = 0; return 1; } long tableResize( Table *tp, long expected ) { long i, *Next; enum GState *State = 0; Groupptr g; long size = tp->size; enum BucketFlag flag = tp->flag; GroupArray oldgroups = tp->groups; long DirtyVal = tp->Dirty; long success = 1; /* assume success */ Dprint(("tresize: resizing %ld\n",expected)); /* allocate a new Table */ if (AllocateBuckets(tp, expected) != 1) { return 0; } /* for debug */ /*if (tp->Free!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint(("free ptr %ld corrupted in resize/alloc, State=%ld not %ld\n",\ tp->Free,*State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (1)"); return 0; } }*/ /* now reinsert all former contents */ for (i=0; iFree!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint((\ "free ptr %ld corrupted in resize/reinsert %ld, State=%ld not %ld\n",\ tp->Free,i,*State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (2)"); return 0; }*/ } } /* deallocate the old groups */ groupsDealloc(oldgroups, flag, size); tp->Dirty = DirtyVal; /* use old dirty value... (paranoia) */ /* for debug */ /*if (tp->Free!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint(("free ptr %ld corrupted in resize, State=%ld not %ld\n",tp->Free,\ *State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (3)"); return 0; }*/ if (success==0) Dprint(("failing in tableresize\n")); return success; } /* deleting a member from a group, deletes *all* matching members */ long deleteFromTable(Table *tp, PyObject *member1, PyObject *map1) { PyObject *M = 0, *Mp = 0; enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; long hash, bhash; long test, rtgrp, nxtgrp, nxtbkt, isnew, found, grp, *N = 0, brt, bnxtgrp, bnxtbkt, bisnew, bfound, rtg1, rtg2; Bucketptr Bp, bBp; Groupptr g; enum GState *State; /* find first match */ found = tableMatch(tp, member1, map1, NOFORCE, -1, -1, -1, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* external error */ if (found == 0) { PyErr_SetObject(PyExc_KeyError, member1); return 0; } /* mark the table as dirty */ tp->Dirty = 1; /* delete all such matches */ while (found) { BPtrReInit(Bp, flag); tp->entries--; found = tableMatch(tp, member1, map1, NOFORCE, rtgrp, nxtgrp, nxtbkt, hash, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* external error */ } /* back fill nulled entries in circular list (could be faster?) */ found = tableMatch(tp, 0, 0, NOFORCE, rtgrp, rtgrp, -1, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* error */ brt = bnxtgrp = rtgrp; bnxtbkt = -1; while (found) { BPtrDestructure(Bp, flag, hash, M, Mp); tp->entries--; /* !!! NOTE: since BPtrReInit Py_DECREFs the contents, must Py_INCREF contents here to prevent deallocation of the members and decref after reinstallation in the table !!! (kinda subtle python thing!) !!! */ Py_XINCREF(M); Py_XINCREF(Mp); BPtrReInit(Bp,flag); bfound = tableMatch(tp, M, Mp, FORCE, brt, bnxtgrp, bnxtbkt, hash, &brt, &bnxtgrp, &bnxtbkt, &bBp, &bhash, &bisnew); Py_DECREF(M); Py_DECREF(Mp); if (found != 1) { PyErr_SetString(PyExc_SystemError, "?? cannot backfill on delete"); return 0; } found = tableMatch(tp, 0, 0, NOFORCE, rtgrp, nxtgrp, nxtbkt, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } } /* now free up any groups on this cycle that are left empty */ /* this will only delete the rootgroup if there is nothing in the cycle */ grp = rtgrp; do { GArrayRef(groups, flag, grp, g, State, N); nxtgrp = *N; GroupEmpty(test, g,flag); if (test) { if (grp == rtgrp) { rtg1 = rtg2 = rtgrp; /* nasty macro bug fixed here */ Gprevious(rtg1,flag,rtg2,groups); /* for termination */ } FreeTableIndex(tp,grp); } grp = nxtgrp; } while (grp != rtgrp); /* finally, resize if too few entries */ if (RESIZEDOWNTEST(tp)) { tableResize(tp, tp->entries); } return 1; } /***********************************************************/ /** table walker methods **/ /* TableWalkers are used for *strictly local and temporary* walking of table structure in two ways: - by key - by all values in table (things like increfs and decrefs aren't done since use is temporary). */ typedef struct { Table *tp; long valid; /* 1 means okay, 0 means done, -1 means error */ long root; long lastgroup; long lastbucket; PyObject *key; PyObject *map; long hash; } TableWalker; /* methods for walking by all values */ static long NextAll(TableWalker *twp) { Bucketptr Bp; Groupptr g; enum BucketFlag flag; enum GState *State = 0; long *Next, size, found, isnew, dirtyptr; PyObject *dummy; Table *tp = twp->tp; size = tp->size; flag = tp->flag; if (twp->lastgroup > size) { twp->valid = 0; return 0; /* failure return */ } if ((twp->lastgroup == -1) || (twp->lastbucket>GSIZE)){ twp->lastbucket = -1; twp->lastgroup++; } found = 0; do { GArrayRef(tp->groups, flag, twp->lastgroup, g, State, Next); if ((*State==ROOT) || (*State==OVERFLOW)) { dummy = 0; groupmatch(found, g, flag, NOHASH, dummy, dummy, NOFORCE,\ (twp->lastbucket), (twp->lastbucket), \ Bp, isnew, dirtyptr); } if (found==0) { twp->lastgroup++; twp->lastbucket = -1; } } while ( (found == 0) && (twp->lastgroup < size) ); if (found == 0) { twp->valid = 0; return 0; /* failure return */ } /* success: find the hash, key and map values */ BPtrDestructure(Bp, flag, (twp->hash), (twp->key), (twp->map)); twp->valid = 1; /*printf("allwalker: item found with hash %ld\n",twp->hash);*/ return 1; /* successful return */ } /* could macroize */ static void InitAll(TableWalker *twp, Table *tp) { twp->lastgroup = -1; twp->lastbucket = -1; twp->tp = tp; twp->valid = NextAll(twp); } /* methods for walking my key NOHASH may be used as an "unknown" hash value */ static long Nextbykey(TableWalker *twp) { Bucketptr Bp; PyObject *dummyk; long dummyh; long isnew; Dprint(("Nextbykey\n")); twp->valid = tableMatch(twp->tp, twp->key, 0, NOFORCE, twp->root, twp->lastgroup, twp->lastbucket, twp->hash, &(twp->root), &(twp->lastgroup), &(twp->lastbucket), &Bp, &(twp->hash), &isnew); if (twp->valid == 1) { BPtrDestructure(Bp, twp->tp->flag, dummyh, dummyk, (twp->map)); } return twp->valid; } /* could macroize */ static void Initbykey(TableWalker *twp, Table *tp, PyObject *key, long hash) { Dprint(("Initbykey\n")); twp->tp = tp; twp->root = -1; twp->lastgroup = -1; twp->lastbucket = -1; twp->key = key; twp->hash = hash; twp->valid = Nextbykey(twp); } /*******************************************************************/ /** methods for combining tables **/ /* augmenting one table using another, assuming types are compatible */ static long Taugment(Table *target, Table *source) { long test; TableWalker tw; PyObject *d1, *d2; /* walk through the source */ (void) InitAll(&tw, source); while (tw.valid == 1) { Dprint(("taug: TableGet1\n")); test = TableGet1(target, tw.key, tw.map, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error return */ (void) NextAll(&tw); } return tw.valid; /* 0 for success, -1 for error */ } /* transpose a table (can't be a set!) if target is a dictionary result may be nondeterministic unless source is 1:1. if target is a set result will be set of all targets+dests (nodes) */ static long Ttranspose(Table *target, Table *source) { long test; TableWalker tw; PyObject *d1, *d2; enum BucketFlag tflag = target->flag; /* source flag cannot be set */ if (source->flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "Cannot transpose set"); return -1; /* error return */ } /* walk through the source */ (void) InitAll(&tw, source); while (tw.valid == 1) { if (tflag == SETFLAG) { /* add mem and map separately to target */ test = TableGet1(target, tw.key, 0, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ test = TableGet1(target, tw.map, 0, NOHASH, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } else { /* add inversion */ test = TableGet1(target, tw.map, tw.key, NOHASH, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } /* advance cursor */ (void) NextAll(&tw); } return tw.valid; /* 0 for success, -1 for error */ } /* Compose a dict/graph with a dict/graph and put the result in another. If mask is non-null mask out any members of mask (for tclosure). Table types assumed to be sensible. target = ( (left o right) - mask ) long returned is number of inserts or -1 on error. if prelim is set only counting will be done, no inserts (target may be null). */ static long Tcompose(Table *target, Table *left, Table *right, Table *mask, long prelim) { TableWalker lwalker, rwalker; PyObject *d1, *d2; long test, count, exclude, rt, nxt, nxtb, isnew; Bucketptr Bp; long hashout; enum BucketFlag lflag = left->flag; /* walk through left */ (void) InitAll(&lwalker, left); Dprint(("Tcompose: lwalker initialized\n")); count = 0; while (lwalker.valid == 1) { /* walk through members of right matching lwalker.map */ /* if left is a set then don't recompute the hash value */ if (lflag == SETFLAG) { (void) Initbykey(&rwalker, right, lwalker.key, lwalker.hash); } else { (void) Initbykey(&rwalker, right, lwalker.map, NOHASH); } Dprint(("Tcompose: rwalker initialized\n")); while (rwalker.valid == 1) { exclude = 0; if (mask != 0) { Dprint(("Tcompose: computing exclude\n")); exclude = tableMatch(mask, lwalker.key, rwalker.map, NOFORCE, -1, -1, -1, lwalker.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); } if (exclude==0) { if (prelim==0) { test = TableGet1(target, lwalker.key, rwalker.map, lwalker.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } count++; } (void) Nextbykey(&rwalker); } if (rwalker.valid == -1) { return -1; } /* error */ (void) NextAll(&lwalker); } if (lwalker.valid == -1) { return -1; } /* error */ return count; } /* Add the intersection or difference of two tables to another table. On error returns -1, else returns count of inserts. Invoke with a nonzero prelim value to get just count of inserts without inserting, in this case target may be null. */ static long Tintdiff(Table *target, Table *left, Table *right, long include, long prelim) { long hashout; long test, rt, nxt, nxtb, isnew, found, count; Bucketptr Bp; TableWalker tw; PyObject *d1, *d2; /* walk through left */ (void) InitAll(&tw, left); count = 0; while (tw.valid == 1) { /* is current in right? */ found = tableMatch(right, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } /* error */ /* maybe either include or exclude the member based on flag value */ if ( ((include==1)&&(found==1)) || ((include==0)&&(found==0)) ) { if (prelim == 0) { test = TableGet1(target, tw.key, tw.map, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } count++; } /* advance cursor */ (void) NextAll(&tw); } if (tw.valid == -1) { return -1; } /* error */ return count; /* success */ } /* Utility function for comparisons: find the "smallest" pair in left that is not in right return 1 if found, else 0 (-1 on error). */ static long Tmindiff(Table *left, Table *right, PyObject **mem, PyObject **map, long *hash) { long hashout; long gotit, rt, nxt, nxtb, isnew, found, cmp; Bucketptr Bp; TableWalker tw; /* walk through left */ (void) InitAll(&tw, left); gotit = 0; while (tw.valid == 1) { /* is current in right? */ found = tableMatch(right, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } /* error */ /* if it wasn't in right test it for minimality */ if (found == 0) { if (gotit == 0) { *mem = tw.key; *map = tw.map; *hash = tw.hash; gotit = 1; } else { cmp = *hash - tw.hash; if (cmp == 0) { cmp = PyObject_Compare( tw.key, *mem ); } if ((cmp>0) || ((cmp==0) && (tw.map!=0) && (PyObject_Compare( tw.map, *map )>0))) { *mem = tw.key; *map = tw.map; *hash = tw.hash; } } } (void) NextAll(&tw); } if (tw.valid == -1) { return -1; } /* error */ return gotit; } /* for coercing table types: Dict intersect Graph is Dict, Dict union Graph is Graph, etc. generality should be positive (nonzero) to default to more general negative to default to less general */ static long FlagCoercion(enum BucketFlag flag1, enum BucketFlag flag2, enum BucketFlag *fp, long Generality) { *fp = flag2; if ( ((flag1 > flag2) && (Generality>0) ) || ((flag1 < flag2) && (Generality<0) ) ) { *fp = flag1; } return 1; /* always succeed */ } /*********************************************/ /* python data structures and interfaces... */ /*********************************************/ /* general structure for all table behaviors */ typedef struct { PyObject_VAR_HEAD /* the hash flag */ /* IF THIS IS NOT NOHASH THE TABLE SHOULD BE IMMUTABLE */ long hashvalue; /* the flag in member rep determines behaviors */ Table rep; } TableWrapper; /* predeclarations of type objects */ staticforward PyTypeObject kjSettype; staticforward PyTypeObject kjDicttype; staticforward PyTypeObject kjGraphtype; /* type test macros */ #define is_kjSetobject(op) ((op)->ob_type == &kjSettype) #define is_kjDictobject(op) ((op)->ob_type == &kjDicttype) #define is_kjGraphobject(op) ((op)->ob_type == &kjGraphtype) #define is_kjTable(op) \ ( is_kjSetobject(op) || is_kjDictobject(op) || is_kjGraphobject(op) ) /* for algebraic operations that may be using a tainted argument propagate the taintedness... (requires ending semicolon!) */ #define propagateDirt(in,out) \ if (in->rep.Dirty!=0) out->rep.Dirty = 1 /* internal allocation function for table wrappers */ static PyObject * newWrapper(long expectedsize, enum BucketFlag flag) { /* allocate one wrapper */ TableWrapper *wp; Dprint(("WnewWrapper\n")); wp = PyMem_NEW(TableWrapper, 1); if (wp == NULL) { return PyErr_NoMemory(); /* allocation failure */ } switch (flag) { case SETFLAG: wp->ob_type = &kjSettype; break; case DICTFLAG: wp->ob_type = &kjDicttype; break; case GRAPHFLAG: wp->ob_type = &kjGraphtype; break; default: PyErr_SetString(PyExc_SystemError, "invalid internal table flag"); return NULL; } /* initialize the internal table */ if (initTable(&(wp->rep), flag, expectedsize) == 0) { /* initialization failed, assume an appropriate error is set */ PyMem_Del(wp); return NULL; } Dprint(("WnewWrapper: table initialized\n")); wp->hashvalue = NOHASH; /* INITIALIZE THE REFERENCE COUNT FOR THE NEW OBJECT */ _Py_NewReference(wp); return (PyObject *) wp; } /* *almost* an external python constructor for wrappers */ static PyObject * makeWrapper(PyObject *module, PyObject *args, enum BucketFlag flag) { TableWrapper *result, *initWrapper; PyObject *initlist, *pair, *key, *map, *d1, *d2; long len = 0, members, valid, index, islist, iskjtable, istuple; Table *tp; islist = 0; iskjtable = 0; istuple = 0; initlist = NULL; initWrapper = NULL; Dprint(("makeWrapper\n")); /* no args: allocate a smallest table: */ if (args == NULL) { members = 0; } else { /* some args: check it and determine its length */ valid = PyArg_Parse(args, "i", &members); if (!valid) { PyErr_Clear(); valid = PyArg_Parse(args, "O", &initlist); if (valid) { islist = PyList_Check(initlist); if (islist) { Dprint(("makeWrapper from list\n")); len = PyList_Size(initlist); } else { iskjtable = is_kjTable(initlist); if (iskjtable) { Dprint(("makeWrapper from kj-table\n")); initWrapper = (TableWrapper *) initlist; len = initWrapper->rep.entries; } else { istuple = PyTuple_Check(initlist); if (istuple) { Dprint(("makeWrapper from tuple\n")); len = PyTuple_Size(initlist); } else { valid = 0; } } } } if (!valid) { PyErr_SetString(PyExc_TypeError, "initializer must be integer or list or tuple or kj-Table"); return NULL; } members = len/2; /* try to conserve space when initializing from list */ } } result = (TableWrapper *) newWrapper(members, flag); if (result == NULL) { return NULL; } /* error */ /* use initialization list if there is one */ if (initlist != NULL) { /* if its a Python list or tuple, initialize from it... */ if ( islist || istuple ) { Dprint(("makeWrapper unpacking Python sequence\n")); tp = &(result->rep); for (index = 0; indexrep), &(initWrapper->rep) ); if (valid!=0) { Py_DECREF(result); return NULL; } } } return (PyObject *) result; } /* specialization for sets */ static PyObject * makekjSet(PyObject *module, PyObject *args) { return makeWrapper(module, args, SETFLAG); } /* specialization for graphs */ static PyObject * makekjGraph(PyObject *module, PyObject *args) { return makeWrapper(module, args, GRAPHFLAG); } /* specialization for dicts */ static PyObject * makekjDict(PyObject *module, PyObject *args) { return makeWrapper(module, args, DICTFLAG); } #ifdef KJBDEBUG static PyObject * Wdebug( PyObject *m, PyObject *a) { if (DebugLevel) { DebugLevel = 0; } else { DebugLevel = 1; } Py_INCREF(Py_None); return Py_None; } #endif static void WrapperDeallocate(TableWrapper *wp) { /* must properly decref references... */ groupsDealloc( wp->rep.groups, wp->rep.flag, wp->rep.size ); PyMem_Del(wp); } /* hash value: symmetrical on members, a symmetrical within pairs */ static long Wrapper_hash(TableWrapper *wp) { enum BucketFlag flag = wp->rep.flag; long this, that; long result = 121345; /* silly init value */ TableWalker tw; Dprint(("Whash\n")); if (wp->hashvalue != NOHASH) { /* memoized hash value */ return wp->hashvalue; } result *= (wp->rep.entries+1); (void) InitAll(&tw, &(wp->rep)); while (tw.valid == 1) { this = tw.hash; /* bug/feature: structures that differ only on unhashable maps will have the same hash value. I don't know whether to keep this of "fix" it. Hmmm. */ if ( (flag != SETFLAG) &&(tw.map != 0)) { GETHASH(that,tw.map); if (that == -1) { PyErr_Clear(); } this += (that*23); } result ^= this; (void) NextAll(&tw); } if (tw.valid == -1) { return NOHASH; } /* error */ if (result == -1) { result = 973; } wp->hashvalue = result; return result; } static PyObject * WrapperItems1(TableWrapper *wp, PyObject *args, long dokey, long domap) { PyObject *resultlist, *membertuple; TableWalker tw; long index, entries; Dprint(("WItems1\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ entries = wp->rep.entries; /* make a list for all entries */ resultlist = PyList_New( entries ); if (resultlist == NULL) { return NULL; } /* error */ /* walk through the table */ (void) InitAll(&tw, &(wp->rep)); index = 0; while (tw.valid == 1) { /* sanity check */ if (index >= entries) { Py_DECREF(resultlist); PyErr_SetString(PyExc_SystemError, "loop overflowing in WrapperItems"); return NULL; /* error */ } /* get only the key, if requested */ if ((dokey != 0) && (domap == 0)) { Py_XINCREF(tw.key); PyList_SetItem(resultlist, index, tw.key); } else { /* get only the map, if requested */ if ((domap != 0) && (dokey == 0)) { Py_XINCREF(tw.map); PyList_SetItem(resultlist, index, tw.map); } else { /* otherwise get both */ membertuple = PyTuple_New(2); if (membertuple == NULL) { Py_DECREF(resultlist); return NULL; /* error */ } Py_XINCREF(tw.key); PyTuple_SetItem(membertuple, 0, tw.key); Py_XINCREF(tw.map); PyTuple_SetItem(membertuple, 1, tw.map); PyList_SetItem(resultlist, index, membertuple); } } index++; (void) NextAll(&tw); } if (tw.valid == -1) { Py_DECREF(resultlist); return NULL; /* error */ } return resultlist; } static PyObject * WrapperItems(TableWrapper *wp, PyObject *args) { Dprint(("WItems\n")); if (wp->rep.flag == SETFLAG) { /* for sets do key only */ return WrapperItems1(wp, args, 1, 0); } else { /* for others, get both */ return WrapperItems1(wp, args, 1, 1); } } /* prlong function with debug option */ static long WrapperPrint(TableWrapper *wp, FILE *fp, long flags) { PyObject * items; #ifdef WDEBUGPRINT if (WDEBUGPRINT) { return TableDump((wp->rep), fp); } #endif switch (wp->rep.flag) { case SETFLAG: fprintf(fp, "kjSet("); break; case DICTFLAG: fprintf(fp, "kjDict("); break; case GRAPHFLAG: fprintf(fp, "kjGraph("); break; default: fprintf(fp, "??unknown table type??\n"); } items = WrapperItems(wp, NULL); if (items == NULL) { fprintf(fp, "??couldn't allocate items??\n"); return -1; } if (PyObject_Print(items, fp, 0) != 0) { return -1; } Py_DECREF(items); fprintf(fp, ")"); return 0; } static PyObject* WrapperRepr(TableWrapper *wp) { PyObject *items, *result, *itemstring; char buf[256]; switch (wp->rep.flag) { case SETFLAG: sprintf(buf, "kjSet("); break; case DICTFLAG: sprintf(buf, "kjDict("); break; case GRAPHFLAG: sprintf(buf, "kjGraph("); break; default: PyErr_SetString(PyExc_SystemError, "Bad flag in table"); return NULL; } result = PyString_FromString(buf); items = WrapperItems(wp, NULL); if (items == NULL) { return NULL; } itemstring = PyObject_Repr(items); Py_DECREF(items); PyString_ConcatAndDel(&result, itemstring); PyString_ConcatAndDel(&result, PyString_FromString(")")); return result; } /* nonzero testing */ static long Wrapper_nonzero(TableWrapper *wp) { Dprint(("Wnonzero\n")); return (wp->rep.entries != 0); } /* comparison: if w1 and w2 are of same type then w1rep); rtable = &(right->rep); lentries = ltable->entries; rentries = rtable->entries; if (lentries 0) { return 1; } /* mems are identical, try maps */ if ( (lmap != 0) && (rmap != 0) ) { /* if we get this far the following shouldn't return 0, ever. */ return PyObject_Compare(lmap,rmap); } /* this should be an error, but it can't be done?? */ return 0; } static PyObject * Whas_key(TableWrapper *wp, PyObject *args) { long test, rt, nxt, nxtb, isnew; long hashout; Bucketptr Bp; PyObject *key; Dprint(("Whas_key\n")); if ((args == NULL) || !PyArg_Parse(args, "O", &key)) { PyErr_SetString(PyExc_TypeError, "table method has_key requires an argument"); return NULL; } test = tableMatch(&(wp->rep), key, 0, NOFORCE, -1, -1, -1, NOHASH, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (test == -1) { return NULL; } /* error */ return PyInt_FromLong((long) test); } /* Get the neighbors of a node in a graph. */ static PyObject *Gneighbors(TableWrapper *wp, PyObject *args) { PyObject *key, *resultlist; Table *tp; TableWalker tw; long count, index; Dprint(("Gneighbors\n")); if ((args == NULL) || !PyArg_Parse(args, "O", &key)) { PyErr_SetString(PyExc_TypeError, "table method neighbors requires an argument"); return NULL; } tp = &(wp->rep); if ( tp->flag == SETFLAG ) { PyErr_SetString(PyExc_TypeError, "neighbors not defined for table of this type"); return NULL; } /* find out how many neighbors there are */ count = 0; (void) Initbykey(&tw, tp, key, NOHASH); Dprint(("Gneighbors: counting neighbors\n")); while (tw.valid == 1) { count++; (void) Nextbykey(&tw); } if (tw.valid == -1) { return NULL; } /* error */ /* make a list large enough */ Dprint(("Gneighbors: making resultlist\n")); resultlist = PyList_New( count ); if (resultlist == NULL) { return NULL; } /* failure to allocate */ /* record neighbors in list */ (void) Initbykey(&tw, tp, key, NOHASH); index = 0; Dprint(("Gneighbors: storing results\n")); while (tw.valid == 1) { if (index >= count) { Py_DECREF(resultlist); PyErr_SetString(PyExc_SystemError, "loop overflow in neighbors calculation"); return NULL; } Py_XINCREF(tw.map); PyList_SetItem(resultlist, index, tw.map); index++; (void) Nextbykey(&tw); } if (tw.valid == -1) { Py_DECREF(resultlist); return NULL; } return resultlist; } /* utility function for extracting keys or values if domaps is set this will get maps uniquely *only if all maps are hashable!* */ static PyObject *Wparts(TableWrapper *wp, PyObject *args, long domaps) { TableWalker tw; Table *tp, *Settp; TableWrapper *tempSet; PyObject *mem, *map, *items; long test; Dprint(("Wparts\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ tp = &(wp->rep); if (tp->flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "keys/values not defined for sets"); return NULL; } /* initialize a temp set to hold the keys */ /* try to save a little space here, may actually waste space sometimes */ tempSet = (TableWrapper *) newWrapper(tp->entries/4, SETFLAG); if (tempSet == NULL) { return NULL; } Settp = &(tempSet->rep); /* walk the table and record the keys */ (void) InitAll(&tw, tp); test = 0; while ((tw.valid == 1) && (test != -1)) { if (domaps) { test = TableGet1(Settp, tw.map, 0, NOHASH, FORCE, &mem, &map); } else { test = TableGet1(Settp, tw.key, 0, tw.hash, FORCE, &mem, &map); } (void) NextAll(&tw); } if ((test == -1) || (tw.valid == -1)) { Py_DECREF(tempSet); return NULL; } items = WrapperItems(tempSet, NULL); Py_DECREF(tempSet); return items; } static PyObject *Wkeys(TableWrapper *wp, PyObject *args) { Dprint(("Wkeys\n")); return Wparts(wp, args, 0); } static PyObject *Wvalues(TableWrapper *wp, PyObject *args) { Dprint(("Wvalues\n")); /* return Wparts(wp, args, 1); -- wrong! */ return WrapperItems1(wp, args, 0, 1); } /* choose an arbitrary key from the table or raise an indexerror if none */ static PyObject *Wchoose_key(TableWrapper *wp, PyObject *args) { TableWalker tw; Dprint(("Wchoose_key\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ (void) InitAll(&tw, &(wp->rep)); if (tw.valid == 1) { Py_XINCREF(tw.key); return tw.key; } if (tw.valid == 0) { PyErr_SetString(PyExc_IndexError, "table is empty"); return NULL; } /* external error otherwise (tw.valid == -1) */ return NULL; } static PyObject *WSubset(TableWrapper *subset, PyObject *args) { TableWrapper *superset; long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; TableWalker tw; Table *supertable; Dprint(("WSubset\n")); /* verify argument */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "Subset test requires argument"); return NULL; } if (!PyArg_Parse(args, "O", &superset)) { return NULL; } if ( !is_kjTable(superset)) { PyErr_SetString(PyExc_TypeError, "Subset defined only between kj-tables"); return NULL; } /* walk through subset, test for membership of all members */ (void) InitAll(&tw, &(subset->rep)); supertable = &(superset->rep); while (tw.valid == 1) { found = tableMatch(supertable, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return NULL; } /* error */ if (found == 0) { /* subset test fails */ return PyInt_FromLong((long) 0); } (void) NextAll(&tw); } if (tw.valid == -1) { return NULL; } /* error */ /* otherwise, success */ return PyInt_FromLong((long) 1); } /* transitive closure of a graph */ /* algorithm could be made faster, KISS for now. */ static PyObject *Wtransclose(TableWrapper *wp, PyObject *args) { Table *source, *target, Delta; TableWrapper *closure; enum BucketFlag flag; long count, test, abort; Dprint(("Wtransclose\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ source = &(wp->rep); flag = source->flag; if (flag != GRAPHFLAG) { PyErr_SetString(PyExc_TypeError, "transitive closure not defined for this table type"); return NULL; } Dprint(("tc: allocating closure\n")); closure = (TableWrapper *) newWrapper(source->entries, flag); if (closure == NULL) { return NULL; } propagateDirt(wp, closure); target = &(closure->rep); /* closure of source contains source */ Dprint(("tc: augmenting closure\n")); test = Taugment( target, source ); if (test != 0) { Py_DECREF(closure); return NULL; } /* initialize temp table Delta for transitive arcs */ test = initTable(&Delta, flag, 0); /* add all transitive arcs */ abort = 0; do { /* Delta = (source o target) - target */ Dprint(("tc: calling tcompose\n")); count = Tcompose(&Delta, source, target, target, 0); Dprint(("tc: delta computed, count = %ld\n",count)); if (count<0) { abort = 1; } if ((abort == 0) && (count>0)) { /* target = target U Delta */ Dprint(("tc: augmenting target\n")); test = Taugment( target, &Delta ); Dprint(("tc: done augmenting target\n")); if (test!=0) { abort = 1; } tableClear( &Delta ); } Dprint(("tc: loop body done, count=%ld, abort=%ld\n",count,abort)); /* loop terminates when (source o target) subset target */ } while ((count>0) && (abort==0)); Dprint(("tc: deallocating Delta\n")); groupsDealloc(Delta.groups, flag, Delta.size); if (abort != 0) { Py_DECREF(closure); return NULL; } return (PyObject *) closure; } static void Wset_hash_error(void) { PyErr_SetString(PyExc_TypeError, "table has been hashed, it is now immutable"); } static PyObject * Wdelete_arc(TableWrapper *wp, PyObject *args) { PyObject *key, *map; Dprint(("Wdelete_arc\n")); if ((args == NULL) || !PyArg_Parse(args, "(OO)", &key, &map)) { PyErr_SetString(PyExc_TypeError, "delete_arc requires two arguments"); return NULL; } if (wp->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "delete_arc not defined on sets"); return NULL; } if (wp->hashvalue != NOHASH) { Wset_hash_error(); return NULL; } if (deleteFromTable(&(wp->rep), key, map) == 0) { return NULL; } Py_INCREF(Py_None); return Py_None; } /* simple membership test */ static PyObject * Wmember1(TableWrapper *wp, PyObject *args, long insert) { PyObject *key, *map; Table *tp; enum BucketFlag flag; long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; Dprint(("Wmember1\n")); tp = &(wp->rep); flag = tp->flag; /* determine key and map */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "membership test requires argument(s)"); return NULL; } if ((insert!=0) & (wp->hashvalue!=NOHASH)) { Wset_hash_error(); return NULL; } if (flag == SETFLAG) { if (!PyArg_Parse(args, "O", &key)) { return NULL; } map = 0; } else { if (!PyArg_Parse(args, "(OO)", &key, &map)) { return NULL; } } if (insert == 0) { found = tableMatch(tp, key, map, NOFORCE, -1, -1, -1, NOHASH, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); return PyInt_FromLong((long) found); } else { found = TableGet1(tp, key, map, NOHASH, FORCE, &key, &map); if (found == -1) { return NULL; } Py_INCREF(Py_None); return Py_None; } } static PyObject * Wmember(TableWrapper *wp, PyObject *args) { Dprint(("Wmember\n")); return Wmember1(wp, args, 0); } static PyObject * Waddmember(TableWrapper *wp, PyObject *args) { Dprint(("Waddmember\n")); return Wmember1(wp, args, 1); } /* generate identity graph from a set */ static PyObject * Gidentity(TableWrapper *SourceSet, PyObject *args) { TableWrapper *resultGraph; Table *Graphtp; TableWalker tw; long test; PyObject *d1, *d2; Dprint(("Gidentity\n")); if (!PyArg_Parse(args, "")) { return NULL; } if (SourceSet->rep.flag != SETFLAG) { PyErr_SetString(PyExc_TypeError, "graph identity not defined for table of this type"); return NULL; } /* make a new DICTIONARY for result, may waste space for graphs */ resultGraph = (TableWrapper *) newWrapper(SourceSet->rep.entries/3, DICTFLAG); if (resultGraph == NULL) { return NULL; } Graphtp = &(resultGraph->rep); /* walk through the set */ (void) InitAll(&tw, &(SourceSet->rep)); test = 0; while ((tw.valid == 1) && (test != -1)) { test = TableGet1(Graphtp, tw.key, tw.key, tw.hash, FORCE, &d1, &d2); (void) NextAll(&tw); } if ((test == -1) || (tw.valid == -1)) { Py_DECREF(resultGraph); return NULL; } return (PyObject *) resultGraph; } static PyObject * Greachable(TableWrapper *graph, PyObject *args) { PyObject *key, *d1, *d2; TableWrapper *resultSet, *tempSet, *deltaSet; Table *resulttp, *temptp, *deltatp, *graphtp; TableWalker deltaW, graphW; long test, fail; Dprint(("Greachable\n")); if (graph->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "reachable not defined for this table type"); return NULL; } if ((args == NULL) || (!PyArg_Parse(args, "O", &key))) { PyErr_SetString(PyExc_TypeError, "reachable requires key argument"); return NULL; } /* make result and temporary sets for computation */ resultSet = (TableWrapper *) newWrapper(0, SETFLAG); tempSet = (TableWrapper *) newWrapper(0, SETFLAG); deltaSet = (TableWrapper *) newWrapper(0, SETFLAG); if ((deltaSet == NULL) || (resultSet == NULL) || (tempSet == NULL)) { Py_DECREF(deltaSet); Py_DECREF(resultSet); Py_DECREF(tempSet); return NULL; } propagateDirt(graph, resultSet); /* get table pointers */ resulttp = &(resultSet->rep); temptp = &(tempSet->rep); deltatp = &(deltaSet->rep); graphtp = &(graph->rep); /* initialize deltaSet to contain only the key */ test = TableGet1(deltatp, key, 0, NOHASH, FORCE, &d1, &d2); fail = 0; if (test == -1) { fail = 1; } /* repeat the following loop until delta becomes empty */ while ((deltatp->entries > 0) && (fail == 0)) { /* put all neighbors to delta members in temp */ (void) InitAll(&deltaW, deltatp); while ((deltaW.valid == 1) && (fail == 0)) { /* use this entry in delta to traverse neighbors in graph */ (void) Initbykey(&graphW, graphtp, deltaW.key, deltaW.hash); while ((graphW.valid == 1) && (fail == 0)) { test = TableGet1(temptp, graphW.map, 0, NOHASH, FORCE, &d1, &d2); if (test == -1) { fail = 1; } (void) Nextbykey(&graphW); } if (graphW.valid == -1) { fail = 1; } /* external error */ (void) NextAll(&deltaW); } if (deltaW.valid == -1) { fail = 1; } /* external error */ /* clear delta and reinit to temp-result */ if (fail == 0) { tableClear(deltatp); test = Tintdiff(deltatp, temptp, resulttp, 0, 0); if (test<0) { fail = 1; } } /* now add delta to result and clear temp */ if (fail == 0) { tableClear( temptp ); test = Taugment( resulttp, deltatp ); if (test != 0) { fail = 1; } } } /* endwhile delta has entries... */ /* get rid of temporaries */ Py_DECREF(tempSet); Py_DECREF(deltaSet); if (fail != 0) { Py_DECREF(resultSet); return NULL; } return (PyObject *) resultSet; } /* Clean filter: returns argument if the table is clean, otherwise NULL */ static PyObject * WClean(TableWrapper *wp, PyObject *args) { Dprint(("WClean\n")); if (!PyArg_Parse(args, "")) { return NULL; } if (wp->rep.Dirty) { Py_INCREF(Py_None); return Py_None; } else { Py_INCREF(wp); return (PyObject *) wp; } } /* force a table to be dirty */ static PyObject * WSoil(TableWrapper *wp, PyObject *args) { Dprint(("WSoil\n")); if (!PyArg_Parse(args, "")) { return NULL; } wp->rep.Dirty = 1; Py_INCREF(Py_None); return Py_None; } /* force a table to be clean */ static PyObject * WWash(TableWrapper *wp, PyObject *args) { Dprint(("WWash\n")); if (!PyArg_Parse(args, "")) { return NULL; } wp->rep.Dirty = 0; Py_INCREF(Py_None); return Py_None; } /* remap remaps a dictionary using a table which represents key rename pairs. Can be used to duplicate and/or project mappings. If the result is "dirty" (ie, if name/value collisions) Py_None is returned. */ static PyObject * Dremap(TableWrapper *wp, PyObject *args) { TableWrapper *remapper, *result; long count; Dprint(("Dremap\n")); if (!is_kjDictobject(wp)) { PyErr_SetString(PyExc_TypeError, "remap only defined for kjDicts"); return NULL; } if (args == NULL) { PyErr_SetString(PyExc_TypeError, "remap requires equality table argument"); return NULL; } if (!PyArg_Parse(args, "O", &remapper)) { return NULL; } if ( !is_kjTable(remapper)) { PyErr_SetString(PyExc_TypeError, "remap defined only between kj-tables"); return NULL; } /* don't assume anything about size of result */ result = (TableWrapper *) newWrapper(0, DICTFLAG); if (result == NULL) { return NULL; } /* allocation error */ propagateDirt(wp, result); propagateDirt(remapper, result); /* return NONE if result is dirty (save some work) */ if (result->rep.Dirty != 0) { Py_DECREF(result); Py_INCREF(Py_None); return Py_None; } count = Tcompose( &(result->rep), &(remapper->rep), &(wp->rep), 0, 0); if (count<0) { Py_DECREF(result); return NULL; /* error */ } /* return NONE if result is dirty after composition */ if (result->rep.Dirty != 0) { Py_DECREF(result); Py_INCREF(Py_None); return Py_None; } return (PyObject *) result; } /* forward declarations needed below */ static PyObject * kjDict_subscript(TableWrapper *Set, PyObject *key); static long kjDict_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing); /* for dumping a dictionary to a tuple */ /* D.dump(tup) produces D[tup[0]] if tup of len 1 or (D[tup[0]], D[tup[1]],...) if tup of len > 1 or keyerror if keys aren't present. */ static PyObject * kjDictDump(TableWrapper *wp, PyObject *args) { PyObject *result, *input, *key, *map; long valid, index, length; Dprint(("kjDictDump\n")); if (!is_kjDictobject(wp) && !is_kjGraphobject(wp)) { PyErr_SetString(PyExc_TypeError, "dump only defined for kjDicts"); return NULL; } if (args == NULL) { PyErr_SetString(PyExc_TypeError, "dictionary dump requires tuple argument"); return NULL; } valid = PyArg_Parse(args, "O", &input); if (valid && (PyTuple_Check(input))) { length = PyTuple_Size(input); if (length < 1) { PyErr_SetString(PyExc_TypeError, "dictionary dump requires nonempty tuple arg"); return NULL; } if (length == 1) { /* return D[input[0]] */ key = PyTuple_GetItem(input, 0); return kjDict_subscript(wp, key); /* incref done by function */ } else { /* return ( D[input[0]], D[input[1]], ..., D[input[n]] ) */ result = PyTuple_New(length); if (result == NULL) { return NULL; } /* failure to allocate */ for (index = 0; index1 and thing of same len, or error */ static PyObject * kjUndumpToDict(PyObject *self, PyObject *args) { TableWrapper *result; PyObject *tup, *thing, *key, *map; long valid, index, length; Dprint(("kjUndump\n")); if (args == NULL) { PyErr_SetString(PyExc_TypeError, "kjUndump called with no args"); return NULL; } valid = PyArg_Parse(args, "(OO)", &tup, &thing); if (valid) { valid = PyTuple_Check(tup); } if (valid) { length = PyTuple_Size(tup); if (length<1) { PyErr_SetString(PyExc_ValueError, "kjUndump: tuple must be non-empty"); return NULL; } /* try to save a little space */ result = (TableWrapper *) newWrapper(length/2, DICTFLAG); if (result == NULL) { return NULL; } /* allocation failure */ if (length == 1) { /* return D[tup[0]] = thing */ key = PyTuple_GetItem(tup, 0); valid = kjDict_ass_subscript((PyObject *) result, key, thing); if (valid == -1) { Py_DECREF(result); return NULL; } return (PyObject *) result; } else { /* return for i in len(tup): D[tup[i]] = thing[i] */ if (PyTuple_Check(thing)) { if (PyTuple_Size(thing) != length) { PyErr_SetString(PyExc_TypeError,"kjUndump -- tuple lengths don't match"); return NULL; } for (index = 0; indexrep.flag; /* make no assumption about size of result */ result = (TableWrapper *) newWrapper(0, flag); if (result == NULL) { return NULL; } /* allocation failure */ /* heuristic: walk through restrictor if much smaller than self otherwise walk through self */ tp = &(wp->rep); resulttp = &(result->rep); comparetp = &(compare->rep); if (tp->entries > 4 * comparetp->entries) { /* walk through the restrictor */ (void) InitAll(&compareWalker, comparetp); test = compareWalker.valid; while ((compareWalker.valid == 1) && (test!=-1)) { /* walk through matches for key in tp */ /* (if many matches for same key, may not be efficient) */ (void) Initbykey(&wpWalker, tp, compareWalker.key, compareWalker.hash); while ((wpWalker.valid == 1) && (test != -1)) { /* put member from wpWalker in result */ test = TableGet1(resulttp, wpWalker.key, wpWalker.map, wpWalker.hash, FORCE, &d1, &d2); if (test!=-1) { (void) Nextbykey(&wpWalker); } if (wpWalker.valid == -1) { test = -1; } } if (test!=-1) { (void) NextAll(&compareWalker); } if (compareWalker.valid == -1) { test = -1; } } } else { /* walk through tp */ (void) InitAll(&wpWalker, tp); test = wpWalker.valid; while ((wpWalker.valid == 1) && (test!=-1)) { /* see if there is a match in compare */ (void) Initbykey(&compareWalker, comparetp, wpWalker.key, wpWalker.hash); /* if there, insert elt in result */ if (compareWalker.valid == 1) { test = TableGet1(resulttp, wpWalker.key, wpWalker.map, wpWalker.hash, FORCE, &d1, &d2); } if (compareWalker.valid == -1) { test = -1; } if (test != -1) { (void) NextAll(&wpWalker); } if (wpWalker.valid == -1) { test = -1; } } } /* test for error cases */ if (test == -1) { Py_DECREF(result); return NULL; } /* otherwise just return result */ return (PyObject *) result; } /* special function for retrieving from dict-dumped indices "same as" def x.dget(dict, dumper): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) return x.neighbors(d) except PyExc_KeyError: return Py_None x is kjDict or kjGraph dict is kjDict or kjGraph dumper is tuple dump of Py_None is mapped to (Py_None,) to avoid ambiguity elsewhere (may retrieve "too many neighbors" for key of Py_None or (Py_None,) defined benieth following utility function as static PyObject * kjWdget(TableWrapper *wp, PyObject *args) */ /* same as above but if testonly is set, then instead of x.neighbors(d) return 1 if neighbors set is nonempty, else, 0 */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjWdget1(TableWrapper *wp, PyObject *args, long testonly) { PyObject *d, *dumper, *result, *err_type /*, *err_value */; TableWrapper *dict; /* get and verify args */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "dget requires 2 arguments"); return NULL; } if (!PyArg_Parse(args, "(OO)", &dict, &dumper)) { PyErr_SetString(PyExc_TypeError, "dget requires dict, dumper"); return NULL; } if (!((is_kjDictobject(dict)) || (is_kjGraphobject(dict)))) { PyErr_SetString(PyExc_TypeError, "first arg of dget must be kjDict or kjGraph"); return NULL; } if (!PyTuple_Check(dumper)) { PyErr_SetString(PyExc_TypeError, "second arg of dget must be tuple"); return NULL; } /* initialize d */ d = kjDictDump(dict, dumper); if (d == NULL) { /* unable to dump */ /* check that error was a keyerror ??? */ /* err_get(&err_type, &err_value); */ err_type = PyErr_Occurred(); if (err_type != PyExc_KeyError) { /* some other error... abort */ /* PyErr_SetObject(err_type, err_value); */ return NULL; } PyErr_Clear(); /* in case of PyExc_KeyError, just return Py_None */ Py_INCREF(Py_None); return Py_None; } /* if dump was successful, return neighbors */ /* ??? should return d also ??? */ if (testonly == 0) { result = Gneighbors(wp, d); } else { result = Whas_key(wp, d); } Py_DECREF(d); return result; } /* #endif */ /* variant of dget, that just tests for presence in index "same as" def x.dtest(dict, dumper): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) return x.has_key(d) except PyExc_KeyError: return Py_None */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjWdtest(TableWrapper *wp, PyObject *args) { return kjWdget1(wp, args, 1); /* test only */ } /* #endif #ifndef PYTHON1DOT2 */ static PyObject * kjWdget(TableWrapper *wp, PyObject *args) { return kjWdget1(wp, args, 0); /* don't test only */ } /* #endif */ /* miscellaneous methods for these types */ static struct PyMethodDef Wrapper_methods[] = { {"member", (PyCFunction)Wmember}, {"add", (PyCFunction)Waddmember}, {"delete_arc", (PyCFunction)Wdelete_arc}, {"has_key", (PyCFunction)Whas_key}, {"choose_key", (PyCFunction)Wchoose_key}, {"Clean", (PyCFunction)WClean}, {"neighbors", (PyCFunction)Gneighbors}, {"dump", (PyCFunction)kjDictDump}, /* #ifndef PYTHON1DOT2 */ {"dget", (PyCFunction)kjWdget}, {"dtest", (PyCFunction)kjWdtest}, /* #endif */ {"reachable", (PyCFunction)Greachable}, {"subset", (PyCFunction)WSubset}, {"items", (PyCFunction)WrapperItems}, {"keys", (PyCFunction)Wkeys}, {"values", (PyCFunction)Wvalues}, {"ident", (PyCFunction)Gidentity}, {"remap", (PyCFunction)Dremap}, {"restrict", (PyCFunction)kjWRestrict}, {"tclosure", (PyCFunction)Wtransclose}, {"Soil", (PyCFunction)WSoil}, {"Wash", (PyCFunction)WWash}, {NULL, NULL} /* sentinel */ }; /* getattr snarfed from mappingobject.c */ static PyObject * Wrapper_getattr(PyObject *mp, char *name) { return Py_FindMethod(Wrapper_methods, (PyObject *)mp, name); } /* methods for special behaviors as number and mapping */ /* undefined operations */ static PyObject * undefbin(PyObject *v, PyObject *w) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } static PyObject * undefter(PyObject *v, PyObject *w, PyObject *z) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } static PyObject * undefun(PyObject *v) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } /* transpose of non 1:1 dict will have nondeterministic results */ static PyObject *Wtranspose(TableWrapper *source) { TableWrapper *result; long size, test; Dprint(("Wtranspose\n")); if (source->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "Cannot transpose set"); return NULL; } /* conservative estimate of size (may save space, maybe not) */ size = source->rep.entries; size = size/2; result = (TableWrapper *) newWrapper(size, source->rep.flag); if (result == NULL) { return NULL; } /* error */ propagateDirt(source, result); test = Ttranspose( &(result->rep), &(source->rep) ); if (test!=0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } static PyObject *Wunion(TableWrapper *left, TableWrapper *right) { enum BucketFlag flag; TableWrapper *result; long size, test; Dprint(("Wunion\n")); /* Py_None unioned with anything returns Py_None (universal set) */ if (((PyObject *) left == Py_None) || ((PyObject *) right == Py_None)) { Py_INCREF(Py_None); return Py_None; } /* arbitrary size heuristic */ if (left->rep.entries > right->rep.entries) { size = left->rep.entries; } else { size = right->rep.entries; } size = size/2; /* conservative to save space (maybe) */ /* determine coercion if possible, default=more general */ test = FlagCoercion(left->rep.flag, right->rep.flag, &flag, 1); if (test != 1) { PyErr_SetString(PyExc_TypeError, "incompatible types for table union"); return NULL; } /* allocate a wrapper and augment it with both inputs */ result = (TableWrapper *) newWrapper(size, flag); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); test = Taugment( &(result->rep), &(left->rep) ); if (test == 0) { test = Taugment( &(result->rep), &(right->rep) ); } if (test!=0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } /* utility function for intersection and difference */ static PyObject * Wintdiff(TableWrapper *left, TableWrapper *right, long include, enum BucketFlag flagout) { TableWrapper *result; long count; /* determine the size needed */ Dprint(("Wintdiff\n")); count = Tintdiff(NULL, &(left->rep), &(right->rep), include, 1); if (count < 0) { return NULL; } /* error */ /* be conservative, for fun */ count = count / 2; /* allocate a wrapper of this size and initialize it */ result = (TableWrapper *) newWrapper(count, flagout); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); count = Tintdiff(&(result->rep), &(left->rep), &(right->rep), include, 0); if (count < 0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } /* intersection */ static PyObject * Wintersect(TableWrapper *left, TableWrapper *right) { long test; enum BucketFlag flag, lflag, rflag; Dprint(("Wintersect\n")); /* Py_None intersected with anything returns copy of anything... */ if ((PyObject *)left == Py_None) { return Wunion(right, right); } if ((PyObject *)right == Py_None) { return Wunion(left, left); } /* determine flag: default to less general */ rflag = right->rep.flag; lflag = left->rep.flag; /* coerce to more general, unless one arg is a set, in which case coerce to set */ if ( (rflag != lflag) && ((rflag == SETFLAG)||(lflag == SETFLAG)) ) { PyErr_SetString(PyExc_TypeError, "mixed intersection not allowed with kjSet"); return NULL; } test = FlagCoercion(left->rep.flag, right->rep.flag, &flag, -1); if (test!=1) { PyErr_SetString(PyExc_TypeError, "unable to coerce for intersection"); return NULL; } /* iterate over the smaller argument */ if ((left->rep.entries) < (right->rep.entries)) { return Wintdiff(left, right, 1, flag); } else { return Wintdiff(right, left, 1, flag); } } /* difference */ static PyObject * Wdifference(TableWrapper *left, TableWrapper *right) { enum BucketFlag lflag, rflag; /* left cannot be Py_None */ Dprint(("Wdifference\n")); if ((PyObject *)left == Py_None) { PyErr_SetString(PyExc_TypeError, "cannot difference from Py_None"); return NULL; } /* if right is Py_None return empty */ if ((PyObject *)right == Py_None) { return (PyObject *) newWrapper(0, left->rep.flag); } rflag = right->rep.flag; lflag = left->rep.flag; /* diff default coerce to whatever left is, unless one arg is a set, in which case raise an error */ if ( (rflag != lflag) && ((rflag == SETFLAG)||(lflag == SETFLAG)) ) { PyErr_SetString(PyExc_TypeError, "mixed difference not allowed with kjSet"); return NULL; } return Wintdiff(left, right, 0, lflag); } /* composition of two tables */ static PyObject * Wcompose(TableWrapper *left, TableWrapper *right) { enum BucketFlag flag; TableWrapper *result; long test, count; Table *Ltable, *Rtable; Dprint(("Wcompose\n")); /* neither arg may be Py_None */ if (((PyObject *)left == Py_None) || ((PyObject *)right == Py_None)) { PyErr_SetString(PyExc_TypeError, "cannot compose Py_None"); return NULL; } Ltable = &(left->rep); Rtable = &(right->rep); /* find coercion, prefer more general */ test = FlagCoercion(Ltable->flag, Rtable->flag, &flag, 1); if (test!=1) { PyErr_SetString(PyExc_TypeError, "incompatible types for composition"); return NULL; } /* DON'T determine required table size, (not easily done correctly) */ count = 0; /* commented count = Tcompose(0, Ltable, Rtable, 0, 1); if (count<0) { return NULL; } count = count/2; */ /* allocate result */ result = (TableWrapper *) newWrapper(count, flag); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); count = Tcompose(&(result->rep), Ltable, Rtable, 0, 0); if (count < 0) { Py_DECREF(result); return NULL; /* error */ } return (PyObject *) result; } /* coercion: just check that pw is either Py_None, kjSet, kjGraph or kjDict all other logic is at the function level (Py_None == universal set) */ static long Wrapper_coerce(PyObject **pv, PyObject **pw) { PyObject *w; w = *pw; Dprint(("Wcoerce\n")); if ( (w == Py_None) || is_kjTable(w) ) { /* both w and *pv are "returned", hence must be increfed */ Py_INCREF(w); Py_INCREF(*pv); return 0; /* okay */ } return 1; /* Nope! */ } /* the number methods structure for all kjSets, kjDicts, kjGraphs */ static PyNumberMethods kjSet_as_number = { (binaryfunc)Wunion, /*nb_add*/ (binaryfunc)Wdifference, /*nb_subtract*/ (binaryfunc)Wcompose, /*nb_multiply*/ (binaryfunc)undefbin, /*nb_divide*/ (binaryfunc)undefbin, /*nb_remainder*/ (binaryfunc)undefbin, /*nb_divmod*/ (ternaryfunc)undefter, /*nb_power*/ (unaryfunc)undefun, /*nb_negative*/ (unaryfunc)undefun, /*nb_positive*/ (unaryfunc)undefun, /*nb_absolute*/ (inquiry)Wrapper_nonzero, /*nb_nonzero*/ (unaryfunc)Wtranspose, /*nb_invert*/ (binaryfunc)undefbin, /*nb_lshift*/ (binaryfunc)undefbin, /*nb_rshift*/ (binaryfunc)Wintersect, /*nb_and*/ (binaryfunc)undefbin, /*nb_xor*/ (binaryfunc)Wunion, /*nb_or*/ (coercion)Wrapper_coerce, /*nb_coerce*/ (unaryfunc)undefun, /*nb_int*/ (unaryfunc)undefun, /*nb_long*/ (unaryfunc)undefun, /*nb_float*/ (unaryfunc)undefun, /*nb_oct*/ (unaryfunc)undefun, /*nb_hex*/ }; static PyObject * kjSet_subscript(TableWrapper *Set, PyObject *key) { PyObject *mem, *map; long test; Dprint(("kjSet_subscript\n")); test = TableGet1(&(Set->rep), key, 0, NOHASH, NOFORCE, &mem, &map); if (test == -1) { return NULL; } return PyInt_FromLong((long) 1); } static long kjSet_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing) { PyObject *mem, *map; TableWrapper *S; Dprint(("kjSet_ass_subscript\n")); S = (TableWrapper *) Set; if (S->hashvalue != NOHASH) { Wset_hash_error(); return -1; } if (thing == NULL) { /* request to delete */ if (deleteFromTable(&(S->rep), key, 0) == 0) { return -1; } return 0; } else { /* should check for standard value of *thing = long 1 ? */ return TableGet1(&(S->rep), key, 0, NOHASH, FORCE, &mem, &map); } } static PyObject * kjDict_subscript(TableWrapper *Set, PyObject *key) { PyObject *mem, *map; long test; Dprint(("kjDict_subscript\n")); test = TableGet1(&(Set->rep), key, 0, NOHASH, NOFORCE, &mem, &map); if (test == -1) { return NULL; } Py_XINCREF(map); return map; } static long kjDict_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing) { PyObject *mem, *map; TableWrapper *S; Dprint(("kjDict_ass_subscript\n")); S = (TableWrapper *) Set; if (S->hashvalue != NOHASH) { Wset_hash_error(); return -1; } if (thing == NULL) { /* request to delete */ if (deleteFromTable(&(S->rep), key, 0) == 0) { return -1; } return 0; } else { return TableGet1(&(S->rep), key, thing, NOHASH, FORCE, &mem, &map); } } static long Wrapper_length(TableWrapper *W) { Dprint(("Wrapper_length\n")); return W->rep.entries; } /* mapping methods for jkSets */ static PyMappingMethods kjSet_as_mapping = { (inquiry)Wrapper_length, /*mp_length*/ (binaryfunc)kjSet_subscript, /*mp_subscript*/ (objobjargproc)kjSet_ass_subscript, /*mp_ass_subscript*/ }; /* mapping methods for kjDicts AND kjGraphs */ static PyMappingMethods kjDict_as_mapping = { (inquiry)Wrapper_length, /*mp_length*/ (binaryfunc)kjDict_subscript, /*mp_subscript*/ (objobjargproc)kjDict_ass_subscript, /*mp_ass_subscript*/ }; /* THE TYPE OBJECT FOR SETS */ static PyTypeObject kjSettype = { //PyObject_HEAD_INIT(&PyType_Type) (=PA=) PyObject_HEAD_INIT(0) 0, (char *) "kjSet", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)NULL, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)NULL, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)NULL, /*tp_as_sequence*/ (PyMappingMethods *)&kjSet_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ NULL, /*tp_call*/ }; /* THE TYPE OBJECT FOR DICTS */ static PyTypeObject kjDicttype = { //PyObject_HEAD_INIT(&PyType_Type) (=PA=) PyObject_HEAD_INIT(0) 0, (char *) "kjDict", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)0, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)0, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)0, /*tp_as_sequence*/ (PyMappingMethods *)&kjDict_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ 0, /*tp_call*/ }; /* THE TYPE OBJECT FOR GRAPHSS */ static PyTypeObject kjGraphtype = { //PyObject_HEAD_INIT(&PyType_Type) (=PA=) PyObject_HEAD_INIT(0) 0, (char *) "kjGraph", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)0, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)0, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)0, /*tp_as_sequence*/ (PyMappingMethods *)&kjDict_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ 0, /*tp_call*/ }; /* special method for adding to a "dumped index" C implementation of frequently used python code (by me) same as: def kjKeyPut(dict, dumper, index, psuedokey, nullbag): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) pair = (psuedokey, dict) index[d] = pair return d except PyExc_KeyError: nullbag[psuedokey] = dict return Py_None but faster. Returns Py_None only on failure to index. Maps dump of Py_None to (Py_None,) to avoid ambiguity (may cause too many hits for retrieval on (Py_None,).) dict is kjDict or kjGraph dumper is tuple index is kjDict or kjGraph psuedokey is any hashable object (probably integer) nullbag is kjDict or kjGraph */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjKeyPut(PyObject *self, PyObject *args) { long valid; TableWrapper *dict, *index, *nullbag; PyObject *dumper, *psuedokey, *d, *pair, *err_type /*, *err_value */; /* get and verify args */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "KeyPut requires 5 arguments"); return NULL; } if (!PyArg_Parse(args, "(OOOOO)", &dict, &dumper, &index, &psuedokey, &nullbag)) { PyErr_SetString(PyExc_TypeError, "KeyPut requires dict, dumper, index, psuedokey, nullbag"); return NULL; } if (!((is_kjDictobject(dict)) || (is_kjGraphobject(dict)))) { PyErr_SetString(PyExc_TypeError, "first arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!((is_kjDictobject(index)) || (is_kjGraphobject(index)))) { PyErr_SetString(PyExc_TypeError, "third arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!((is_kjDictobject(nullbag)) || (is_kjGraphobject(nullbag)))) { PyErr_SetString(PyExc_TypeError, "fifth arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!PyTuple_Check(dumper)) { PyErr_SetString(PyExc_TypeError, "second arg of KeyPut must be tuple"); return NULL; } /* initialize d */ d = kjDictDump(dict, dumper); if (d == NULL) { /* unable to dump */ /* check that error was a keyerror ??? */ /* err_get(&err_type, &err_value); */ err_type = PyErr_Occurred(); if (err_type != PyExc_KeyError) { /* some other error... abort */ /* PyErr_SetObject(err_type, err_value); */ return NULL; } /* in case of PyExc_KeyError, augment the Nullbag, return Py_None */ PyErr_Clear(); valid = kjDict_ass_subscript((PyObject *) nullbag, psuedokey, (PyObject *) dict); if (valid == -1) { return NULL; } Py_INCREF(Py_None); return Py_None; } /* if dump succeeded... */ /* initialize pair, Py_INCREF components */ pair = PyTuple_New(2); if (pair == NULL) { return NULL; } PyTuple_SetItem(pair, 0, psuedokey); Py_INCREF(psuedokey); PyTuple_SetItem(pair, 1, (PyObject *) dict); Py_INCREF(dict); /* remap Py_None to (Py_None,) if needed */ if (d == Py_None) { /* preserve extra reference to Py_None... */ d = PyTuple_New(1); PyTuple_SetItem(d, 0, Py_None); } /* set index[d] = pair, creates an extra ref to pair */ valid = kjDict_ass_subscript((PyObject *) index, d, pair); if (valid == -1) { Py_DECREF(pair); return NULL; } Py_DECREF(pair); /* dispose of extra ref to pair */ return d; } /* #endif */ /* THE "METHODS" FOR THIS MODULE */ /* These are the basic external interfaces for python to access this module. */ static struct PyMethodDef kjbuckets_methods[] = { {"kjSet", (PyCFunction)makekjSet}, {"kjDict", (PyCFunction)makekjDict}, {"kjGraph", (PyCFunction)makekjGraph}, {"kjUndump", (PyCFunction)kjUndumpToDict}, /* #ifndef PYTHON1DOT2 */ {"kjKeyPut", (PyCFunction)kjKeyPut}, /* #endif */ #ifdef KJBDEBUG {"debug", (PyCFunction)Wdebug}, #endif {NULL, NULL} /* sentinel */ }; void initkjbuckets(void) { kjSettype.ob_type = &PyType_Type; //(=PA=) kjDicttype.ob_type = &PyType_Type; //(=PA=) kjGraphtype.ob_type = &PyType_Type; //(=PA=) Py_InitModule("kjbuckets", kjbuckets_methods); } /* end of kjbuckets module */ gadfly-1.0.0/kjbuckets/2.2/0040755000157700012320000000000007512763043014266 5ustar rjonestechgadfly-1.0.0/kjbuckets/2.2/kjbuckets.pyd0100644000157700012320000013000007465607564016774 0ustar rjonestechMZÿÿ¸@Àº´ Í!¸LÍ!This program cannot be run in DOS mode. $mPBÿ)1,¬)1,¬)1,¬K.?¬+1,¬)1-¬1,¬Ö&¬/1,¬Ö(¬/1,¬Rich)1,¬PELŽJN<à! `@lp°°tN¸pP ¤p¸.textÞ\` `.rdataþpp@@.dataD€ €@À.reloc   @Bƒì ‹D$$SUVW3Ûp3ÿ3í‰\$‰t$‹L$8…ÉtŽ‹ƒù‚‹D$‰D$4ë|‹~‹‰t$4‹ïƒûÿt;L$T$QD$$RL$,PT$4QD$<‹L$LRPSjÿjÿjÿjUWQèpƒÄ8ƒøuG‹D$‹L$@ƒÁ ƒÆƒø‰D$‰L$Œtÿÿÿ_^]¸[ƒÄ ËD$4…É~‹ƒù†‹‹x‹héyÿÿÿ‹¤phÌ…‹Pÿ¨pƒÄ3À_^][ƒÄ ËT$ƒì`SU‹l$lVW‹¼$ˆ3ö‹]‹E;Ö‰\$‰D$‰t$X‰t$h‰t$$‰t$‰t$lu9´$„Œ ƒ¼$€„ ƒ¼$ÿuQ…ÒtM‹B‹ ”p;ÁtQPÿ˜pƒÄ…Àt‹T$x‹R ƒúÿ‰”$u‹L$xQÿœpƒÄƒøÿ‰„$„(‹”$¤‹„$‰‹„$„…Àe‹M‹„$3ÒÇ„$Œÿÿÿÿ÷ñ…Û‹ú‰¼$ˆ‰¼$„t)~SƒûN‹D$ý+×4Љt$X‹ƒøw3ÿ$…%‹D$¿4Љt$X‹ƒøwÿ$…$%V‰T$ëF‰D$‹ƒø„Ø‹”$€…Ò„¨ƒø…:‹}ƒÎÿ;þ‰¼$Œu; QUèكąÀ„Ú ‹T$x‹þ‰´$Œ‰´$„‰¼$ˆ3öéTþÿÿ‹\$t3Ò‹+‹K…ít"~>ƒý9ý+Ç‹4ÁÁƒþw%ÿ$µ4%¿‹4ÁÁƒþwÿ$µD%ƒÀ‰D$D‹Ð‹;׉T$0uÇCÿÿÿÿ3ÿéì3Û…ít+~Eƒý@ý+׋Ñуøw,ÿ$…T%S‰T$8ë¿Á‹Áƒøwÿ$…d%C‰D$8‹D$8…í‹‹p„º~'ƒý"Õ+‹<ÁÁƒÿwÿ$½t%ƒÀ‰D$<…í~:ƒý5õ+ÆÁ‹ƒùw"ÿ$„%¶Á‹ƒùwÿ$”%ƒÀ‰D$@‹L$<‹D$@3ÿ‰q‰‹„$Œ‰;;Àu ‹T$t‹L$0‰J‹D$D‹t$`ƒÀº‹È;ït7Žïƒýæ‹ñéß’‹<ÁÁƒÿw„ÿ$½¤%ƒÀ‰D$<éqÿÿÿ‹ðÇÿÿÿÿ‰xƒÁ ƒÀJu±‹¼$ˆ‹\$3Ò‰t$`‰T$ ‰T$0‰T$(3ö‹Ç‹L$‹è…Ét"~8ƒù3 Å+ÈË‹ ˃ùwÿ$´%€‹ ÃÃùw ÿ$Ä%P‹;Çu¶‹D$…ÀtYŽ’ƒø‰ ý+ÏË‹ ˃ùwuÿ$Ô%ƒÀ‰D$‰D$ ëa;ïŽEÿÿÿƒý<ÿÿÿÇÿÿÿÿ‰~‰~é+ÿÿÿ¿‹ ÓÓƒùwÿ$ä%ƒÀ‰D$‰D$ L­Ë‹ ˃ùwnÿ$ô%ƒÀ‰D$0ë^‹D$…À~'ƒø"í+Å‹ ÃÃùwÿ$&ƒÀ‰D$0‹D$…À~NƒøI‹„$ŒÅ+ЋÓƒø4Ów.ÿ$…&‹„$Œ€4ËÃøwÿ$…$&F‰D$,‰D$(‹L$ ‹D$(‹‰‹D$…ÀÇt~ƒø¹ ë¹ ‹t$‹|$,ó¥‹T$0‹D$‹Œ$Œ‹t$d‹|$ƒÀ‰ ‹Èº3Û;ût ~ƒÿ‹ñ;û~ƒÿÇÿÿÿÿ‰^‰^ë ‹ðÇÿÿÿÿ‰XƒÁ ƒÀJuÈ‹T$ ‹Œ$ˆ‰t$d‰ éäƒø…Ë‹\$t3Ò‹+‹C‹K‰„$Œ…ít4~N‹¼$ˆƒýIý+Ç‹4ÁÁƒþw5ÿ$µ4&ƒÀ‰D$T‹Ðë#‹¼$ˆ¿‹4ÁÁƒþwÿ$µD&‹¼$ˆ‹;׉T$0u‹D$T3ÿƒÀÇCÿÿÿÿ‹Èºé3Û…ít+~>ƒý9ý+׋Ñуøw%ÿ$…T&S‰T$Hë¿Á‹Áƒøwÿ$…d&‹T$H‹‹R…í„¥~'ƒý"4Å+ð‹<ñ4ñƒÿwÿ$½t&ƒÆ‰t$L…í~:ƒý54Õ+ò ñ‹1ƒþw"ÿ$µ„&4’ ñ‹1ƒþwÿ$µ”&ƒÁ‰L$P‹L$L3ÿ‰Q‹T$P‹Œ$Œ‰‹„$ˆ;ȉ;u ‹T$t‹L$0‰J‹D$TºƒÀ‹Èë 4€‹<ñ4ñƒÿw™ÿ$½¤&ƒÆ‰t$Lë‰3ÿ;ït~dƒý_‹ñ‰t$\ë[‰D$\Çÿÿÿÿ‰xƒÁ ƒÀJuÕ‹D$…ÀtT~qƒøl‹”$ˆ‹D$ Õ+Ê È‹ƒøwgÿ$…´&Aljëe‹t$\;ï~®ƒý©Çÿÿÿÿ‰~‰~뛋”$ˆ‹D$ ’ È‹ƒøw ÿ$…Ä&‹L$X‹D$‹”$ˆÇ‰ë‹D$ljë…ÿ}‹Œ$„Ç„$Œÿÿÿÿ‰Œ$ˆ‹”$”‹„$„‹¼$Œ‰‹”$ˆ‹t$3Û;ót2~HƒþC‹D$ Õ+ÊÈ‹ƒùw,ÿ$Ô&ƒÀ‰D$4‰D$$ë‹D$ ’È‹ƒùwÿ$ä&‹D$4‹Œ$˜‰œ$ˆ‰Oƒù‰Œ$ŒíTÈ I‰T$0‰œ$ˆTˆ‰T$X…öt~!ƒþ‹„$ ‹L$X‰ë ‹”$ ‹D$0‰‹Œ$¨3Û3ö3ÿ‰‹L$…Ét~,ƒù'‹¬$ ‹E‹0‹x‹X달$ ‹E‹x‹0‹ß달$ ‹Áƒè„Ht~H…‹‹”$ƒúÿu ;ò…ëƒþÿuƒ¼$€„ré&;ò…‹D$x…À„‹5 pWPÿփąÀ…ý‹D$|…À„ÍPSÿփąÀ„¾éÝ‹„$ƒøÿu;ð…¥ƒ¼$€u+ƒþÿ„Û;Æu‹T$xWRÿ pƒÄ…À„À‹„$ƒøÿ„‹;Æ…ƒ‹D$x…Àt{WPÿ pƒÄ…Àul‹D$|…À„<PSÿ pƒÄ…À„)ëK‹„$ƒþÿuƒøÿt:‹”$€¹;Ñ„ë$ƒøÿ„÷;Æu‹L$xWQÿ pƒÄ…À„Ü‹„$Œ‹t$X‹T$0@ƒÆ ƒÂƒø‰„$Œ‰t$X‰T$0É‹t$éíýÿÿÇ„$ˆÿÿÿÿé ƒúÿtë‹\$x…Ûtã‹|$|…ÿtÛ‹E‹H‹p…Ét‹H‰u‹QQÿR‹”$”ƒÄ…öt‹H‰u‹FVÿP‹”$”ƒÄ‹M3ÀÇÿÿÿÿ‹M‰A‹M‰A‹E‰‹M‰Y‹ A‰ ‹U‰z‹@‰‹„$¨ÇÇ„$ˆ‹Œ$œ‹”$Œ‰‹D$$‹Œ$„‹;Á„þ‹Ð‹„$ˆƒÏÿ…À…¤éjüÿÿ‹D$x…À„ÿÿÿ‹|$|…ÿ„úþÿÿƒþÿu‹Œ$¨ÇëWSÿ pƒÄ…Àt ‹T$tÇB‹E3Û‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹UÇÿÿÿÿ‹E‰X‹M‹„$‰Y‹U‰‹M‹D$x‰A‹A‰‹U‰z‹@‰éüþÿÿ‹U‰‹U‹D$x…À‰Btÿ‹„$¨‰Œ$ˆ‰éÝþÿÿ‹„$ˆ…À…«ƒ¼$€… ‹D$tƒÏÿ9x…Ê‹HRPèD ƒÄ…À„¤‹l$t‹T$x‰¼$Œ‰¼$ˆ‰¼$„3öé½òÿÿ¡¤phä…‹Qÿ¨pƒÄƒÈÿ_^][ƒÄ`ËÆ_^][ƒÄ`ˤph†‹Pÿ¨pƒÄƒÈÿ_^][ƒÄ`Ã}"‹ ¤ph8†‹Rÿ¨pƒÄƒÈÿ_^][ƒÄ`Ë„$¨ƒ8é&‹Ç_^][ƒÄ`Ë\$t3É‹+‹{‹S‰¼$Œ…ít"~Aƒý<ý+Ç‹4ƒþw(ÿ$µô&¿‹4ƒþwÿ$µ'ƒÀ‰„$ˆ‹È‹ ;ωŒ$€u ÇCÿÿÿÿéü3Û…ít'~:ƒý5 ý+Ï‹Êʃøw!ÿ$…'Cë¿‹ƒøwÿ$…$'‹D$x‹0‹@…ít'~?ƒý: õ+΋<Ê Êƒÿw&ÿ$½4'yë ¶‹<Ê Êƒÿw8ÿ$½D'yë0‹|$x…í~<ƒý7 Å+È Ê‹ƒúw$ÿ$•T'ƒÁë‹|$x € Ê‹ƒúwÿ$•d'‹L$x‰G‹„$Œ‰1;ÀÇu‹D$t‹”$€‰P‹Œ$ˆ¾3ÛA‹L$x‹Ð;ët ~ƒý‹Ê;ë~ƒýÇÿÿÿÿ‰Y‰Yë ‹ÈÇÿÿÿÿ‰XƒÂ ƒÀNuÈ‹|$;ût4~RƒÿM‹´$Œ‹D$õ+Ö Ð‹Ѓøw9ÿ$…t'A‹Ðë3‹´$Œ‹T$ ¶‹Ê ʃøwÿ$…„'‹´$Œ‹L$h‹D$x‹T$lÇ‹Œ$„‰ ‹T$$ƒÀ‰œ$„‰2‰„$€‰„$ˆ;ût~ ƒÿ‹„$ ‹Œ$ˆ‰ë ‹”$ ‰‹„$¨‹L$3ö3ÿ3ۅɉt~,ƒù'‹¬$ ‹E‹0‹x‹X달$ ‹E‹x‹0‹ß달$ ‹Áƒè„éHtiH…‹”$ƒúÿ„Oƒþÿ„T;ò…ù‹D$x…À„í‹5 pWPÿփąÀ…Ø‹D$|…À„—PSÿփąÀ„ˆ鸋„$ƒøÿ„˜ƒþÿ„˜;Æ…—‹L$xWQÿ pƒÄ…À„y‹„$;Æuu‹D$x…Àtm‹ÐWRÿ pƒÄ…Àu\‹D$|…À„PSÿ pƒÄ…À„ë;‹„$ƒþÿu ƒøÿ…Ïë$ƒøÿ„æ;Æu‹D$xWPÿ pƒÄ…À„Ë‹Œ$„‹¼$ˆ‹„$€AƒÇ ƒÀƒù‰Œ$„‰¼$ˆ‰„$€Ë‹|$3Ûéþÿÿƒþÿ„šéxƒúÿ„Œ‹\$x…Û„€‹|$|…ÿtx‹E‹H‹p…Ét‹H‰u‹QQÿR‹”$”ƒÄ…öt‹H‰u‹FVÿP‹”$”ƒÄ‹M3ÀÇÿÿÿÿ‹M‰A‹M‰A‹E‰‹M‰Y‹ A‰ ‹U‰z‹@‰éÖƒÈÿéàƒþÿ…Ò‹D$x…Àtç‹|$|…ÿt߃þÿu‹Œ$¨ÇëWSÿ pƒÄ…Àt ‹T$tÇB‹E3Û‹H‹p;Ët‹H‰u ‹AQÿPƒÄ;ót‹H‰u ‹NVÿQƒÄ‹UÇÿÿÿÿ‹E‰X‹M‹„$‰Y‹U‰‹M‹D$x‰A‹A‰‹U‰z‹@‰ë"‹M‰‹U‹D$x…À‰Btÿ‹„$¨Ç¸‹Œ$œ‹”$„…À‰}!¡¤phT†‹Qÿ¨pƒÄ_^]ƒÈÿ[ƒÄ`Ë„$˜‹”$Œ‹Œ$¨‰ƒ9t‹D$tÿ@ _^]¸[ƒÄ`Ã_^]3À[ƒÄ`ÃI>5>>>5>>úúúúúúúúCaCCaCaa››››ÙÙÙÙÙÙÙÙHHHHÊÊÊÊÊÊÊÊIIIIjjjj››››ííííííííííííííííiiiiiiiiººººøøøøøøøøRRRRºººººººº’’’’’’’’%%%%%%%%rrrrrrrr»»»»ÕÕÕÕ        Ê Ê Ê Ê Ê Ê Ê Ê V‹t$‹F‹‹VPQRèƒÄ ÇF ¸^Ãì‹L$3Ò;ʉT$ŽV‹D$S‹\$UV‹ð‹èW‹|$,‰t$‰l$‰L$;Út(~HƒûC‹E‰l$ƒøw7ÿ$…@)E‰D$,ë'M닉t$ƒøwÿ$…P)F‰D$,ëN‰L$,‹D$‹ƒøt ƒø…³‹L$,ÇD$(i‰l$$;Út~ ƒû‹|$$;Ú~HƒûA‹G‹wë‹E‹ý‹ð;Ât‹I‰u ‹PPÿRƒÄ3Ò;òt;Út,‹H‰u ‹FVÿPƒÄ3Ò;Út~ƒûÇÿÿÿÿ‰W‰Wë Çÿÿÿÿ‰W‹t$$‹D$(ƒÆ ƒÅH‰t$$‰D$(…hÿÿÿ‹t$‹l$‹D$ƒÅ8ƒÆ(H‰l$‰t$‰D$…Îþÿÿ_^][ƒÄÃ,(#(,(,(L(C(L(L(ƒì‹T$SUV‹t$$WRV‹^‹‹~‹N3í‰\$‰D$ ‰|$‰L$(ÇD$$èƒÄƒøt _^]3À[ƒÄÃ…ÛÇD$,Ž˜‹ß‹L$…Ét%~Cƒù>‹‹ïƒøw5ÿ$…|*G‰D$(ë%Wë‹‹ëƒøwÿ$…Œ*C‰D$(ëS‰T$(‹Eƒøtƒøu‹D$(VQPèÛåÿÿƒÄ …Àt‹D$,‹L$@ƒÇ8ƒÃ(;Á‰D$,ŒxÿÿÿëÇD$‹|$‹L$‹T$QRWèû‹D$,ƒÄ ‰F‹D$_^][ƒÄÃç)Þ)ç)ç)*ü)**‹D$ƒì(™SƒâUVÂW‹|$<¹Áø‹@3í;È}L ;Íô;È} _^]3À[ƒÄ(Ã4 ;݉O‰w‰o t,~ ƒûVj8ë#¡¤php†‹Qÿ¨pƒÄ3À_^][ƒÄ(ÃVj(ÿ pƒÄ;Åuÿp_^]3À[ƒÄ(Ã;õ~'‹Ð‹È‹þ…Ût ~ ƒû‹éë‹êƒÁ8ƒÂ(OÇEuß‹|$‹D$P‹ƒøÿ…/‹¬$€;ÝuH3Ò‹Å…öt"~8ƒþ3 Å+ÈÏ‹ σùwÿ$ì9€‹ Çǃùw ÿ$ü9P‹;Åu¼‹D$|3ö‰t$‹8‹H‹h3À;þ‰l$h‰D$Tt~ƒÿÝ+ÓÑë›Á‰D$T90„Ú‰t$(‰t$,‹Ã…ÿ‹èt"~8ƒÿ3Å+ÐÑ‹уúwÿ$• :€‹ÁÁƒúw ÿ$•:p‹;Ãuº…ÿ„Ê~'ƒÿ"Ý+ÓÑ‹уúwÿ$•,:ƒÀ‰D$(…ÿ~=ƒÿ8í+ÕÑ‹уúw$ÿ$•<:D­‹ÁÁƒúwÿ$•L:ƒÀ‰D$,‹D$(‹t$,‹l$h‹‰‰‹D$T3öƒýÿÇ…43À;þtXŽƒÿ„Ý+ËÁÁƒúwpÿ$•\:P‰T$4ë`›‹ÁÁƒú‡qÿÿÿÿ$•l:ƒÀ‰D$(é^ÿÿÿ›Á‹ƒùwÿ$|:H‰L$4‹ƒùwwÿ$Œ:H‰L$8ëgP‰T$4ëà…ÿ~xƒÿ"Ý+Ó‹4Ñуþwÿ$µœ:ƒÂ‰T$8…ÿ~MƒÿHÝ+Ó Ñ‹ƒúw5ÿ$•¬:ƒÁ‰L$<ë%P‰T$8‹ƒùwÿ$¼:H‰L$<ëP‰T$<Ç‹D$4‹L$<‰X‰‹D$8‰Xé[;þt"~=ƒÿ8í+ÕÑ‹уúw$ÿ$•Ì:D­‹ÁÁƒúwÿ$•Ü:ƒÀ‰D$3ö…ÿti~'ƒÿ"Ý+Ó‹Ñ4уøwÿ$…ì:F‰D$@…ÿŽÁƒÿ|‹T$‹Å+ÐÑ‹уúwbÿ$•ü:ƒÀ‰D$DëRV‰T$@ëÀ›4Á‹Áƒøwÿ$… ;V‰T$@‹D$‹€Ñ‹уúw<ÿ$•;ƒÀ‰D$Dë,F‰D$@ëÓ…ÿ~<ƒÿ7í+ÅÁ‹ƒùw$ÿ$,;T­Ñ‹ уùwÿ$<;ƒÀ‰D$H‹D$@‹L$ljh‹‹L$H‰‹D$D‰X‹T$|‹l$|‹t$‹|$d‰‹L$‰Z‹T$0‹„$€‹Ù;È…ûÿÿ‹Mƒù~‹E ÁáÅ;Ñ} PUè¥ïÿÿƒÄ_^]¸[ƒÄhÃè4è4è4è4è4è4è4è4¥5¥5¥5¥5¥5¥5¥5¥5<6<6<6<6<6<6<6<6o6o6o6o6°6°6°6°6°6°6°6°6777707070707n7P7n7n7Î7e7Î7Î7›7›7›7›7Å7Å7Å7Å7ì7ã7ì7ì7N8N8N8N8N8N8N8N8»8}8»8»8²8²8²8²89Ù899û8û8û8û8F9F9F9F9F9F9F9F9‹D$‹L$jPQèƒÄ ÃìSUV‹t$0W3À3í3ÿ;ð‰D$‰D$‰D$u ‰D$4é‹@pD$4Ph¸†VÿÓƒÄ …À…ßÿu¡Xphä‹Qÿ¨pƒÄƒÈÿ_^ƒÄ ÃT$PRèaÞÿÿ‹D$ƒÄƒøuã…ÿuG‹T$$D$,L$0P‹D$ QjRWPVèÔÚÿÿƒÄ…Àu¸L$,T$0QRjjÿP‹D$4PVè³ÚÿÿƒÄ…Àu—ë)‹D$L$,T$0Q‹L$$RjjÿPQVèŒÚÿÿƒÄ…À…lÿÿÿT$RèÇÛÿÿ‹D$ƒÄƒø„rÿÿÿ_^ƒÄ álpSUVW‹|$;ø„Û‹l$;è„Ï‹G‹M;Á‹ð‹ñ‹UE_L$‰D$j‹QRP贃ăøt‹ Xphü‹Rÿ¨pƒÄ3À_^][ËD$P‹Æ™+ÂÑøPèlÝÿÿ‹ðƒÄ…öu_^][ËG…ÀtÇF‹E…ÀtÇF~SWèˆÚÿÿƒÄ…Àu‹L$QWèvÚÿÿƒÄ…Àt‹H‰u ‹VVÿRƒÄ_^]3À[ËÆ_^][Ë_A^‰¡lp][ËD$‹T$ ‹L$V‹t$;ȉ~…ö;È}…ö}‰ ¸^álpV‹t$W;ðu‹D$PPè¶þÿÿƒÄ_^Ë|$;øu VVè¡þÿÿƒÄ_^ËG‹N;Át#…Àt…Éuh Ž¡Xp‹Qÿ¨pƒÄ3À_^ÃT$jÿRPQè`ÿÿÿƒÄƒøthLŽëÍ‹V‹G;Ð}‹D$PjWVè)ƒÄ_^ËL$QjVWèƒÄ_^ËD$‹L$ SU‹l$ VWxj]QWSjèþñÿÿƒÄ…À}_^]3À[ËT$ R™+ÂÑøPèÀÛÿÿ‹ðƒÄ…öu_^][ËE…ÀtÇF‹D$‹H…ÉtÇF‹L$jQWVSRè ñÿÿƒÄ…À}‹H‰u ‹FVÿPƒÄ_^]3À[ËÆ_^][álpV‹t$;ðuhpŽë/‹T$ ;Ðu‹VRjè(ÛÿÿƒÄ^ËJ‹F;Èt"…Ét…ÀuhŽ¡Xp‹Qÿ¨pƒÄ3À^ÃPjRVèíþÿÿƒÄ^álpSU‹l$ V;èW„Ÿ‹\$;Ø„“‹K‹UƒÃ}D$jPQRèµýÿÿƒÄƒøthÐŽëo‹T$RjèŠÚÿÿ‹ðƒÄ…ötk‹E…ÀtÇF‹D$‹H…ÉtÇFjjSNWQèBéÿÿƒÄ…À}‹H‰u.‹VVÿRƒÄ3À_^][ËÆ_^][Ãh¸Ž¡Xp‹Qÿ¨pƒÄ_^]3À[ËD$‹ lp‹;Át!‹Hù(‚tùè‚tù¨ƒt¸ËA‰‹L$‹‹A‰3ÀËT$D$L$P‹D$QjjÿjƒÀRPèþÕÿÿƒÄƒøÿu3ÀÃjÿdpƒÄËD$ƒx ÿt èAêÿÿƒÈÿËL$ …Éu‹L$jƒÀQPèEÈÿÿƒÄ ÷ØÀ÷ØHÃT$L$R‹T$ QjjÿjƒÀRPèŒÕÿÿƒÄËT$D$L$P‹D$QjjÿjƒÀRPè^ÕÿÿƒÄƒøÿu3ÀËD$…Àt ‹A‰‹D$ËD$ƒx ÿt è¡éÿÿƒÈÿËL$ …ÉuQ‹L$ ƒÀQPè¦ÇÿÿƒÄ ÷ØÀ÷ØHÃT$ƒÀRT$RjjÿQ‹L$QPèîÔÿÿƒÄËD$‹@ËD$ƒì…ÀUVWu hôŽé¬T$L$ RT$QRL$T$0QRhPÿ@pƒÄ…Àuhëy‹T$$‹B=è‚t=¨ƒthPë]‹T$‹B=è‚t=¨ƒth€ëA‹T$‹B=è‚t=¨ƒth°ë%‹T$‹ 0p‹B;Át.QPÿ˜pƒÄ…Àuhà¡Xp‹Qÿ¨pƒÄ3À_^]ƒÄËT$‹D$$RPèƒðÿÿ‹øƒÄ…ÿuZÿtp‹ „p;t _^3À]ƒÄÃÿQ>Z>x>}>†>œ>¡>ª> ?!?n?ˆ?·?Ñ?ý? ,0Æ0í0ç1C2k2†2Ã2ó3·4¼4Å4555 5$5(5,5054585<5@5D5H5L5P5T5X5\5`5d5h5l5p5t5x5|5€5„5ˆ5Œ55”5˜5œ5 5¤5¨5¬5°5´5¸5¼5À5Ä5È5Ì5Ð5Ô5Ø5Ü5à5ä5è5ì5ð5ô5ø5ü5666 66666 6$6(6,6064686<6@6D6H6L6P6T6X6\6`6d6h6l6p6t6x6|6€6„6ˆ6Œ66”6˜6œ6 6¤6¨6¬6°6´6¸6¼6À6Ä6È6Ì6Ð6Ô6Ø6Ü6à6ä6è6ì6ð6ô6ø6ü6777 77777 7$7(7,7074787<7@7D7H7L7P7T7X7\7`7d7h7l7p7t7x7|7€7„7ˆ7Œ778?8@9D9H9L9P9T9X9\9Ú9ø9|:€:„:ˆ:Œ::”:˜:ù:þ:;;*;ë;<3<^S>q>“>¿>Ô>? ?$?(?,?0?4?8?> >{>…>Ë?ë?@Ì4181<1@1D1H1L1P1•1¤1¼1Á1Ê1Ø1á1ê12©2%313K3[3ü34$4[4·4ß4ý4W5\5e5¶5½5Ä5Ë5Õ5ñ56,6^6c6l6ƒ66›6¢6¨6Ø6ô67Ì7ô89q9x9Ê9×9Ü9å9: :>:Š:½:$;);:;?;H;L>>/>6>N>U>\>d>i>r>?"?>>%>,>ö?û?`ü0K0P0Y0p0w0„0‰0’0¦0­0µ0º0Ã0Ø0ä0ñ0ö0ÿ0%1+1:1?1J1»1Á1Ñ1Ö1ß1ñ1ö1ÿ1222B2G2P2Ö2Û2ä2¡3ö3û34˜4Ñ455&5I5A6O6€6…6Ž6±6ô6e7j7s7–7¥7­7µ78?9b9i9u9ƒ9Š9‘9Ÿ9¦9­9»9Â9É9Õ9ä9ð9õ9þ9(:.:A:k:x:‚:¡:Ë:Ù:1;?;D;I;N;S;Y;z;‚;ˆ;“; ;¨;¶;»;À;Å;Ð;Ý;ç;ü;<<0 and Oleg Broytmann . This file defines three Python datatypes (kjSet, kjGraph, and kjDict) which share a common representational and procedural infrastructure: a hash table with table driven behavior. [ want to add .keys(n) -- pick n keys, Null == all want to make the deletion algorithm more complicated and faster! ] ================================================ A hint at the table structure: By setting WDEBUGPRINT and recompiling the structure of tables can be examined using python. Below we have a Graph constructed and examined with OVLFACT of 1 and GSIZE 2. >>> G = kjGraph() >>> for i in range(15): G[i%5] = i%3 ... >>> G Table (size=14, basesize=7, entries=15, free=9, GRAPH) 0: ROOT(next=3)Group:Bkt[0, 0, 0] Bkt[0, 0, 1] 1: ROOT(next=13)Group:Bkt[-131071, 1, 1] Bkt[-131071, 1, 0] 2: ROOT(next=7)Group:Bkt[-393213, 3, 0] Bkt[-393213, 3, 2] 3: OVFLW(next=0)Group:Bkt[0, 0, 2] Bkt[-1, NULL, NULL] 4: OVFLW(next=5)Group:Bkt[-262142, 2, 0] Bkt[-1, NULL, NULL] 5: ROOT(next=4)Group:Bkt[-262142, 2, 2] Bkt[-262142, 2, 1] 6: ROOT(next=8)Group:Bkt[-524284, 4, 0] Bkt[-524284, 4, 1] 7: OVFLW(next=2)Group:Bkt[-393213, 3, 1] Bkt[-1, NULL, NULL] 8: OVFLW(next=6)Group:Bkt[-524284, 4, 2] Bkt[-1, NULL, NULL] 9: FREE next=10, prev=12 10: FREE next=11, prev=9 11: FREE next=12, prev=10 12: FREE next=9, prev=11 13: OVFLW(next=1)Group:Bkt[-131071, 1, 2] Bkt[-1, NULL, NULL] >>> The basic unit for archiving is the bucket, which contains a hash value (where -1 represents "No value"), a key object pointer and (for dicts and graphs) a map object pointer. The different behaviors for the tables are determined primarily by the different behaviors of the bucket structures under the appropriate interpretation. Interpretations are indicated by flags from enum BucketFlag. The table is an array of bucket groups, with each bucket group containing 2 (GSIZE) buckets. The table has a base size of 7, so all hash index loops are rooted between indices 0 and 6. Thus an item with hash 23 will be placed in the hash sequence rooted at 23%7 = 2. Hash index loops consist of a root group and possibly one or more linked overflow groups arranged in a circular list (embedded in the array). For example the arcs with source 1 are rooted at index 1 with one overflow group at index 13. The code assumes in several places that any used group with "undefined entries" is the last group in its hash index loop and all undefines are at the higher indices of the group. Dedicated overflow groups: In this case 7 (basesize / OVLFACT) additional groups have been allocated with indices 7..13 which can only be used as overflow groups. Those groups which are not used either as a root or an overflow are kept in a circular free list with head at index 9. This basic table structure has 3 encarnations: kjSet represent "sets of hashable objects." It has a smaller Bucket size which archives only one object. kjDict represents only relations that are "partial functions from hashable objects to objects." kjGraph represents arbitrary relations from hashable objects to objects. Both kjDict's and kjGraph's are indexed "on the left" only. The behavior of tables under the differing interpretations are determined primarily by the behavior of the function BPtrMatch which defines what it means for a Bucket to match a key/map pair under the differing interpretations. */ /* include a bunch of stuff */ #include "Python.h" /* #include "rename2.h" */ /* #include "allobjects.h" */ /* #include "modsupport.h" */ /* #include "ceval.h" */ #ifdef STDC_HEADERS #include #else #include #endif /* THE FOLLOWING IS HISTORICAL AND NOT NEEDED */ /* define this flag to remove stuff which won't link under 1.2 */ /* #define PYTHON1DOT2 1 */ /* PROBLEM FIXED */ /* flag to enable optional debug printing during execution turned on/off by kjbuckets.debug() from python */ /* #define KJBDEBUG 1 */ #ifdef KJBDEBUG static long DebugLevel = 0; /* usage: Dprint(("this is an long %ld",i)); */ #define Dprint(x) if (DebugLevel) printf x #else #define Dprint(x) {} #endif /***************************************************************/ /** local parameters **/ /* if set, this changes printing to show internal structure of table */ /* if undefined, the debug printing will be omitted */ /* #define WDEBUGPRINT 0 */ /* overflow fudge factor, low values mean more fudge array size = basesize + basesize/OVLFACT extra space is used only for overflows */ #define OVLFACT 1 /* group size for each bucket group, smaller means faster/bigger (roughly) */ #define GSIZE 4 /* if you redefine OVLFACT, better rethink the following macro which is designed to force a resize to a size large enough for additional inserts. !!!AN INFINITE RECURSION WILL RESULT IF THE RESULTING TABLE IS NOT LARGE ENOUGH!!! */ #define RESIZEUPSIZE(tp) ( tp->basesize * GSIZE + 1 ) /* resize down when fewer than 1/RESIZEFACTOR buckets are used */ #define RESIZEFACTOR 8 /* don't resize down if size is smaller than this */ #define RESIZETHRESHOLD 16 /* the test for resizing down */ #define RESIZEDOWNTEST(tp) \ ( (tp->size > RESIZETHRESHOLD) && \ ( (tp->entries * RESIZEFACTOR) < (tp->size * GSIZE) ) ) /* group states */ #ifdef OVERFLOW #undef OVERFLOW #endif enum GState { UNKNOWN, FREE, ROOT, OVERFLOW }; /* bucket behaviors, smaller is less general! */ enum BucketFlag { SETFLAG=0, DICTFLAG=1, GRAPHFLAG=2 }; /* special invalid hash value (from python convention) */ #define NOHASH ( (long) -1 ) /* to force or not to force insertions during lookups */ enum ForceFlag { FORCE=1, NOFORCE=0 }; /* macro for getting hash values (snarfed from mappingobject.c) */ #ifdef CACHE_HASH #define GETHASH(hashvalue, object) \ if (!PyString_Check(object) || \ (hashvalue = ((PyStringObject *) object)->ob_shash) == -1)\ hashvalue = PyObject_Hash(object) #else #define GETHASH(hashvalue, object) hashvalue = PyObject_Hash(object) #endif /*********************************************************/ /* bucket methods **/ /* set bucket structure */ typedef struct { long hash; PyObject * member; } SetBucket; /* graph and dict bucket structure */ typedef struct { long hash; PyObject * member; PyObject * map; } DiBucket; /* for passing general buckets around, with external flags */ typedef union { SetBucket * SBucketp; DiBucket * DBucketp; } Bucketptr; /* destructuring a bucket (macroized) */ #define BPtrDestructure(/*Bucketptr*/ Bp, /*enum BucketFlag*/ flag,\ /*long*/ hp, /*PyObject*/ memp, /*PyObject*/ mapp)\ {\ switch (flag) {\ case SETFLAG:\ hp = Bp.SBucketp->hash;\ memp = Bp.SBucketp->member;\ mapp = memp; /* map is copy of memp */\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ hp = Bp.DBucketp->hash;\ memp = Bp.DBucketp->member;\ mapp = Bp.DBucketp->map;\ break;\ }\ } #ifdef WDEBUGPRINT /* testing only */ static long BPtrDump(Bucketptr Bp, enum BucketFlag flag, FILE *fp) { long h; PyObject *mem, *map; BPtrDestructure(Bp, flag, h, mem, map); fprintf(fp, "Bkt[%ld, ",h); if (mem == 0) { fprintf(fp, "NULL"); } /*else { if (PyObject_Print(mem, fp, 0) != 0) { return -1; } }*/ fprintf(fp, "%ld, ",mem); if (map == 0) { fprintf(fp, "NULL"); } /*else { if (PyObject_Print(map, fp, 0) != 0) { return -1; } }*/ fprintf(fp, "%ld] ",map); return 0; } #endif /* setting a bucket Py_INCREFs handled here. assumes initial contents are null or garbage. (macroized) */ /* static long */ #define BPtrSet( \ /* Bucketptr */ Bp, /* enum BucketFlag */ flag,\ /* long */ h, /* PyObject * */mem1, /* PyObject * */map1)\ {\ switch(flag) {\ case SETFLAG:\ if ((mem1==0)&&(h!=NOHASH)) Dprint(("setting mem to 0, hash =%ld\n",h));\ /* ignore map */\ Bp.SBucketp->hash = h;\ Bp.SBucketp->member = mem1;\ if (mem1 != 0) { Py_XINCREF (mem1); }\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ Bp.DBucketp->hash = h;\ Bp.DBucketp->member = mem1;\ if (mem1 != 0) { Py_XINCREF (mem1); }\ Bp.DBucketp->map = map1;\ if (map1 != 0) { Py_XINCREF (map1); }\ break;\ }\ } /* initialization assuming invalid value -- not used. (no decrefs, could macroize) */ /*static long BPtrInit( Bucketptr Bp, enum BucketFlag flag ) { PyObject *dummy; dummy = 0; BPtrSet( Bp, flag, NOHASH, dummy, dummy ); }*/ /* re-initialization assuming valid value Py_DECREFs handled here. to save values in the bucket for use after reinitialization, incref them first and decref after... (macroized) */ /*static void*/ #define BPtrReInit( /*Bucketptr*/ Bp, /*enum BucketFlag*/ flag )\ {\ long hashBBB;\ PyObject *MemberBBB = 0, *MapBBB = 0, *dummyBBB = 0;\ BPtrDestructure( Bp, flag, hashBBB, MemberBBB, MapBBB );\ if ( MemberBBB != 0 ) { Py_DECREF(MemberBBB); }\ /* don't decref map for sets!! */\ if ( (MapBBB != 0) && (flag != SETFLAG) ) { Py_DECREF(MapBBB); }\ dummyBBB = 0;\ BPtrSet( Bp, flag, NOHASH, dummyBBB, dummyBBB );\ } /* returns 1 on match, 0 no match, -1 error newflag is set if new entry, else reset dirtyflag is set if this is a forced overwrite, else left alone */ /* static long */ #define BPtrMatch(/*int*/ result,\ /*Bucketptr*/ Bp, \ /*enum BucketFlag*/ flag,\ /*long*/ h, \ /*PyObject * */ Mm, \ /*PyObject * */ Mp, \ /*enum ForceFlag*/ Force,\ /*long * */ newflag, \ /*long * */ dirtyflag) \ {\ long hashAAA = 0;\ PyObject *MemberAAA = 0, *MapAAA = 0, *dummyAAA = 0;\ newflag = 0; /* default assumption */\ result = 0; /* default: fail */\ BPtrDestructure( Bp, flag, hashAAA, MemberAAA, MapAAA );\ switch (flag) {\ case SETFLAG:\ /* ignore maps */\ if ( ( hashAAA == NOHASH) && (h != NOHASH)) { \ /* force it? */\ if (Force == FORCE) {\ dummyAAA = 0;\ BPtrSet( Bp, flag, h, Mm, dummyAAA );\ newflag = 1; /* entry is new */\ result = 1; /* forced match on empty bucket */\ }\ } else {\ if (hashAAA != NOHASH) {\ /* null match */\ if (h == NOHASH)\ { result = 1; } /* bucket full, hash null == null match */\ else { /* fully defined match */\ if ((h == hashAAA) && (PyObject_Compare(Mm, MemberAAA)==0))\ { result = 1; } /* hash defined, all eq == match */\ }\ }\ }\ break;\ case DICTFLAG:\ /* null match case */\ if ((h == NOHASH) && (hashAAA != NOHASH)) { result = 1; }\ else {\ /* Forced match succeeds if bucket is empty or members match */\ if ((Force == FORCE) &&\ ( (hashAAA == NOHASH) || \ ((h == hashAAA)&&(PyObject_Compare(Mm, MemberAAA)==0)) ) ) {\ if ((Mm == 0) || (Mp == 0)) { result = -1; } /* error */\ else {\ if (hashAAA == NOHASH) { newflag = 1; } /* new if old was empty */\ else {\ if (PyObject_Compare(MapAAA,Mp)!=0) { /* overwriting: dirty */\ dirtyflag = 1;\ }\ }\ BPtrReInit( Bp, flag );\ BPtrSet( Bp, flag, h, Mm, Mp );\ result = 1; /* successful forced match */\ }\ } else {\ if ( (h!=NOHASH) && (h==hashAAA) &&\ (Mm != 0) && (PyObject_Compare(Mm, MemberAAA)==0) &&\ ( ( Mp == 0 ) || (PyObject_Compare(MapAAA,Mp)==0) ) )\ { result = 1; } /* successful unforced match */\ }\ }\ break;\ case GRAPHFLAG:\ if ( ( h == NOHASH ) && (hashAAA != NOHASH) ) { \ Dprint(("graph null match\n")); \ result = 1; /* null match */\ } else {\ /* force only on empty buckets */\ if ( ( hashAAA == NOHASH ) && (Force == FORCE) ) {\ if ( (h==NOHASH) || (Mm==0) || (Mp==0) ) { \ Dprint(("graph match error\n")); \ result = -1; /* error */\ } else {\ Dprint(("graph forced match\n")); \ BPtrReInit( Bp, flag );\ BPtrSet( Bp, flag, h, Mm, Mp );\ newflag = 1;\ result = 1; /* successful forced match */\ }\ } else {\ /* unforced match, can match if Mm is null */\ if (( hashAAA != NOHASH ) && ( hashAAA == h ) &&\ (Mm != 0) && ( PyObject_Compare(Mm,MemberAAA)==0 ) && \ ( (Mp == 0) || ( PyObject_Compare(MapAAA,Mp)==0 ))) {\ Dprint(("graph unforced match\n")); \ result = 1; /* successful unforced match */\ }\ }\ }\ break;\ default:\ /* error case */\ result = -1;\ break;\ }\ } /*************************************************************/ /** group methods **/ /* array types for bucket groupings */ typedef SetBucket SBuckets[GSIZE]; typedef DiBucket DBuckets[GSIZE]; /* free group template */ typedef struct { long Next; long Previous; } FreeGroup; /* DiBucket group template */ typedef struct { long Next; DBuckets Buckets; } DBGroup; /* SetBucket group template */ typedef struct { long Next; SBuckets Buckets; } SBGroup; /* SetGroup structure */ typedef struct { enum GState State; union { FreeGroup free; SBGroup group; } mem; } SetGroup; /* DiGroup structure */ typedef struct { enum GState State; union { FreeGroup free; DBGroup group; } mem; } DiGroup; /* union of different group template pointer types */ typedef union { FreeGroup *fgp; DBGroup *dbp; SBGroup *sbp; } Groupptr; /* get a bucket from a group (macroized) */ /*static Bucketptr*/ #define GetBucket(/*Bucketptr * */ Bp, \ /*Groupptr*/ g, \ /*enum BucketFlag*/ flag, \ /*int*/ index)\ {\ if (index>GSIZE) Dprint((" BAD INDEX IN GETBUCKET %ld \n", index));\ switch(flag){\ case SETFLAG:\ Bp.SBucketp = &(g.sbp->Buckets[index]);\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ Bp.DBucketp = &(g.dbp->Buckets[index]);\ }\ } /* testing for empty group -- assumes correct backfilling (macroized) */ /*static int*/ #define GroupEmpty(/*int*/ Eresult, \ /*Groupptr*/ Eg, /*enum BucketFlag*/ Eflag)\ {\ long Eh = 0;\ PyObject *EMm, *EMp;\ Bucketptr EBp;\ GetBucket(EBp, Eg, Eflag, 0);\ BPtrDestructure(EBp, Eflag, Eh, EMm, EMp);\ if (Eh == NOHASH) { Eresult = 1; }\ else { Eresult = 0; }\ } /* initialize a groupptr to empty, assuming garbage initially (macroized) */ /*static void */ #define Groupinit(/*Groupptr*/ Dg, /*enum BucketFlag*/ Dflag)\ {\ Bucketptr DBp;\ PyObject *Ddummy;\ long Di;\ Ddummy = 0;\ for (Di=0; DiState);\ switch (SGptr->State) {\ case FREE:\ templateptr.fgp = &(SGptr->mem.free);\ Nextptr = &(SGptr->mem.free.Next);\ break;\ case ROOT:\ case OVERFLOW:\ case UNKNOWN:\ templateptr.sbp = &(SGptr->mem.group);\ Nextptr = &(SGptr->mem.group.Next);\ }\ break;\ case DICTFLAG:\ case GRAPHFLAG:\ DGptr = & (g.Dgroups[index]);\ Stateout = &(DGptr->State);\ switch (DGptr->State) {\ case FREE:\ templateptr.fgp = &(DGptr->mem.free);\ Nextptr = &(DGptr->mem.free.Next);\ break;\ case ROOT:\ case OVERFLOW:\ case UNKNOWN:\ templateptr.dbp = &(DGptr->mem.group);\ Nextptr = &(DGptr->mem.group.Next);\ break;\ }\ break;\ }\ } /* free group methods */ /* (macroized) */ /* static void */ #define SetFreeGroup(/*GroupArray*/ Fg, \ /*enum BucketFlag*/ Fflag,\ /*int*/ Fselfindex, \ /*int*/ Fnextindex, \ /*int*/ Fpreviousindex)\ {\ Groupptr Fself, Fnext, Fprev;\ long *Fdummy;\ enum GState *FselfState = 0, *FnextState = 0, *FprevState = 0;\ Dprint(("SetFreeGroup(self=%ld, next=%ld, prev=%ld)\n", \ Fselfindex, Fnextindex, Fpreviousindex));\ GArrayRef(Fg, Fflag, Fselfindex, Fself, FselfState, Fdummy );\ GArrayRef(Fg, Fflag, Fnextindex, Fnext, FnextState, Fdummy );\ GArrayRef(Fg, Fflag, Fpreviousindex, Fprev, FprevState, Fdummy );\ *FselfState = FREE;\ Fself.fgp->Previous = Fpreviousindex;\ Fself.fgp->Next = Fnextindex;\ Fnext.fgp->Previous = Fselfindex;\ Fprev.fgp->Next = Fselfindex;\ } /* get a free group (macroized) */ /*static void*/ #define ExtractFreeGroup(/*GroupArray*/ Gg, \ /*enum BucketFlag*/ Gflag, \ /*int*/ Gindex )\ {\ long Gnextindex, Gpreviousindex, *Gdummy;\ Groupptr Gself, Gnext, Gprev;\ enum GState *GselfState = 0, *GnextState, *GprevState;\ Dprint(("ExtractFreeGroup %ld\n",Gindex));\ GArrayRef(Gg, Gflag, Gindex, Gself, GselfState, Gdummy );\ Gnextindex = Gself.fgp->Next;\ Gpreviousindex = Gself.fgp->Previous;\ GArrayRef(Gg, Gflag, Gnextindex, Gnext, GnextState, Gdummy );\ GArrayRef(Gg, Gflag, Gpreviousindex, Gprev, GprevState, Gdummy );\ Gnext.fgp->Previous = Gpreviousindex;\ Gprev.fgp->Next = Gnextindex;\ *GselfState = UNKNOWN;\ } /* for a non-free group, find previous entry in circular list */ /* macroized */ /* static long */ #define Gprevious( /*int*/ Hresult,\ /* enum BucketFlag */ Hflag, \ /*int*/ Hindex, \ /*GroupArray*/ Harray)\ {\ long Hnext, HHHindex;\ enum GState *HdummyState;\ Groupptr Hdummytemplate;\ long *HNptr = 0;\ Dprint(("Gprevious %ld\n",Hindex));\ HHHindex = Hnext = Hindex;\ do {\ Hresult = Hnext;\ GArrayRef(Harray, Hflag, Hnext, Hdummytemplate, HdummyState, HNptr);\ Hnext = *HNptr;\ Dprint(("Gprevious at %ld %ld %ld\n", Hnext, HHHindex, Hindex));\ } while (Hnext != HHHindex);\ /* return Hresult; */\ } /* remove a group from its circular list */ /* macroized */ /* static void*/ #define Gremove( /*enum BucketFlag*/ Iflag, \ /*int*/ Iindex, \ /*GroupArray*/ Iarray)\ {\ enum GState *IdummyState;\ Groupptr Idummytemplate;\ long *INext = 0, *INextOfPrev = 0;\ long Iprevious;\ Dprint(("Gremove %ld\n",Iindex));\ Gprevious(Iprevious, Iflag, Iindex, Iarray);\ GArrayRef(Iarray, Iflag, Iindex, Idummytemplate, IdummyState, INext);\ GArrayRef(Iarray, Iflag, Iprevious, Idummytemplate, \ IdummyState, INextOfPrev);\ *INextOfPrev = *INext;\ *INext = Iindex;\ } /* Swap out overflow at fromindex contents from its circular list to toindex */ /* assumes toindex is currently on a unary list */ /* macroized */ /* static void */ #define Gswapout(/*GroupArray*/ Jarray, \ /*int*/ Jfromindex, \ /*int*/ Jtoindex,\ /*enum BucketFlag*/ Jflag)\ {\ long *JNext = 0, *JNextOfPrev = 0, *JNextOfOther = 0;\ enum GState *JState, *JOtherState = 0, *JPrevState;\ Groupptr Jg, Jgprev, Jgother;\ long Jprevious;\ Gprevious(Jprevious, Jflag,Jfromindex,Jarray);\ Dprint(("Gswapout %ld --> %ld\n",Jfromindex, Jtoindex));\ GArrayRef(Jarray,Jflag,Jfromindex, Jg, JState, JNext);\ GArrayRef(Jarray,Jflag,Jprevious, Jgprev, JPrevState, JNextOfPrev);\ GArrayRef(Jarray,Jflag,Jtoindex, Jgother, JOtherState, JNextOfOther);\ *JNextOfOther = *JNext;\ *JOtherState = OVERFLOW;\ GroupCopy(Jgother, Jg, Jflag);\ *JNextOfPrev = Jtoindex;\ Groupinit(Jg, Jflag);\ /* *JState = ROOT; */\ *JNext = Jfromindex;\ } /******************************************************************/ /** table methods **/ /* table structure */ typedef struct { enum BucketFlag flag; /* bucket behavior */ long Dirty; /* should be set if the table has had a "bucket overwrite" ie, if a deletion or entry overwrite has occurred */ long Free; /* head of circular free list */ long entries; /* number of entries used */ long basesize; /* basesize for truncating hash */ long size; /* number of groups (basesize+extras) */ GroupArray groups; /* array of groups of buckets */ } Table; /* place an entry on the free list, assuming it isn't there already */ /* macroized */ /*static void*/ #define FreeTableIndex(/*Table * */ Ktp, /*int*/ Kindex)\ {\ register enum BucketFlag Kflag = tp->flag;\ GroupArray Kgroups = Ktp->groups;\ long Kfreeindex = Ktp->Free;\ Groupptr Kthis, Kfree;\ enum GState *KthisState = 0, *KfreeState = 0;\ long *KNext = 0, *KfreeNext = 0;\ Dprint(("FreeTableIndex %ld\n",Kindex));\ GArrayRef( Kgroups, Kflag, Kindex, Kthis, KthisState, KNext);\ /* extract the group, only if its in a known state */\ if (*KthisState != UNKNOWN) {\ Gremove( Kflag, Kindex, Kgroups );\ }\ *KthisState = FREE;\ if (Kfreeindex == -1) {\ SetFreeGroup( Kgroups, Kflag, Kindex, Kindex, Kindex );\ }\ else {\ GArrayRef( Kgroups, Kflag, Kfreeindex, Kfree, KfreeState, KfreeNext);\ SetFreeGroup( Kgroups, Kflag, Kindex, *KfreeNext, Kfreeindex);\ }\ Ktp->Free = Kindex;\ } /* bucket allocation for table */ static long AllocateBuckets(Table *tp, long numMembers) { register enum BucketFlag flag = tp->flag; long ExpSize = numMembers/GSIZE + 1; long basesize, size, *Next, i; enum GState *State = NULL; Groupptr g; GroupArray groups; Dprint(("AllocateBuckets %ld\n",numMembers)); /* this weird heuristic is chosen arbitrarily (powers of 2 minus 1) */ for (basesize=1; ; basesize += basesize + 1) { if ((basesize <= 0) || (basesize>=ExpSize)) { break; } } if (basesizebasesize = basesize; tp->size = size = basesize + basesize/OVLFACT; tp->entries = 0; switch (flag) { case SETFLAG: groups.Sgroups = (SetGroup *) calloc(sizeof(SetGroup), size); break; case DICTFLAG: case GRAPHFLAG: groups.Dgroups = (DiGroup *) calloc(sizeof(DiGroup), size); break; default: PyErr_SetString(PyExc_SystemError, "invalid internal table behavior flag"); return 0; /* error */ } if (groups.Dgroups == NULL) { PyErr_NoMemory(); return 0; /* error */ } /* initialize all states to unknown */ for (i=0; igroups = groups; tp->Free = -1; /* initialize free groups backwards, to encourage use of dedicated free groups */ for (i=size-1; i>=0; i--) { FreeTableIndex(tp, i); } return 1; } #ifdef WDEBUGPRINT /* printing for testing only */ static long TableDump(Table *tp, FILE *fp) { register enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; Groupptr g; enum GState *State; long size = tp->size; long i, *Next; fprintf(fp, "Table (size=%ld, basesize=%ld, entries=%ld, free=%ld, ", size, tp->basesize, tp->entries, tp->Free); switch (flag) { case SETFLAG: fprintf(fp, "SET)\n"); break; case DICTFLAG: fprintf(fp, "DICT)\n"); break; case GRAPHFLAG: fprintf(fp, "GRAPH)\n"); break; default: fprintf(fp, "!unknown flag!\n"); } for (i=0; iNext, g.fgp->Previous); break; case ROOT: fprintf(fp, "ROOT(next=%ld)",*Next); if (GroupDump(g,flag,fp)!=0) { return -1; } break; case OVERFLOW: fprintf(fp, "OVFLW(next=%ld)",*Next); if (GroupDump(g,flag,fp)!=0) { return -1; } break; default: fprintf(fp, "!invalid GState!\n"); } } return 0; } #endif /* empty out all groups in this table */ static void groupsReinit(GroupArray g, enum BucketFlag flag, long size) { enum GState *State = 0; Groupptr groupp; long i, j, *d; Bucketptr Bp; Dprint(("groupsReinit %ld \n",size)); /* reinit all the groups to properly handle object references */ for (i=0; iflag;\ GroupArray Lgroups = Ltp->groups;\ long Lfreeindex = Ltp->Free;\ long *LNextp = 0, LNextind;\ enum GState *LState;\ Groupptr Lthis;\ Lresult = Lindex;\ Dprint(("UnFreeTableIndex %ldn",Lresult));\ GArrayRef(Lgroups, Lflag, Lresult, Lthis, LState, LNextp);\ /* debug */\ if (*LState != FREE) \ Dprint(("UnFreeTableIndex State=%ld not FREE\n",*LState));\ LNextind = *LNextp; /* save */\ if (LNextind == Lresult) {\ /* free list has one elt, zero after */\ Ltp->Free = -1;\ } else {\ ExtractFreeGroup(Lgroups, Lflag, Lresult);\ if (Lfreeindex == Lresult) { Ltp->Free = LNextind; }\ }\ Groupinit(Lthis,Lflag);\ /*return Lindex;*/\ } /* table initializer could macroize */ static long initTable(Table *tp, enum BucketFlag flag, long numMembers) { tp->flag = flag; tp->Dirty = 0; Dprint(("initTable\n")); return AllocateBuckets(tp, numMembers); } /* forward decl for table resizer */ long tableResize( Table *, long ); /* matching within a table. inputs: tp -- the table member1 -- the member to match map1 -- the map to match (null for set/dict) Force -- whether or not to force an insert on failure rootgroupI -- for reentrance, the rootgroup for current loop lastgroupI -- for reentrance, the current group lastbucketI -- for reentrance, the *previous* bucket to look past. (-1 means none for I* args) hsh -- the hash value if known (NOHASH means not known) outputs: (only valid after a successful search) rtgrp -- index of current root group (for later reenter) nxtgrp -- index of group where found nxtbkt -- index of bucket where found Bp -- Bucketptr to bucket where found hshout -- hash value isnew -- 1 if new entry inserted, 0 otherwise return value 1 (found) 0 (not found) -1 (error occurred) Behaviors: if hsh == NOHASH and Member1 == 0 then rootgroupI should be valid; match any full value past reentrant state else if hsh, rootgroup, etc. not defined compute them. if the rootgroup is currently an overflow swap it out. search in circular list headed at rootgroup for match (if Force and there is space in existing bucket, force insert) if no match found and Force, allocate a new group on this list and force insert the member. */ /* crazy idea: macroize this monster, and use stub only for recursive calls... */ static long tableMatch( Table *tp, PyObject *member1, PyObject *map1, enum ForceFlag Force, long rootgroupI, long lastgroupI, long lastbucketI, long hsh, /*outputs*/ long *rtgrp, long *nxtgrp, long *nxtbkt, Bucketptr *Bp, long *hshout, long *isnew) { register enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; Groupptr root, thisgroup, avail; enum GState *state = 0, *availState = 0; long *Next = 0, *rNext = 0, AvailableI, found, *availNext = 0, *dirtyptr; unsigned long lhsh; /*Dprint(("TableMatch %ld\n",hsh));*/ /* used to mark the table dirty upon "bucket overwrite" */ dirtyptr = &(tp->Dirty); /* sanity checks (comment out later?) */ if ( (member1 == 0) && ( (rootgroupI < 0) || (Force == FORCE) ) ) { PyErr_SetString(PyExc_SystemError, "bug in kjbuckets implementation (tableMatch)"); return -1; } /* compute hash value if absent and needed */ if ((hsh == NOHASH) && (member1 != 0)) { GETHASH(hsh, member1); if (hsh == -1) { return -1; } /* unhashable */ Dprint(("tm: hash = %ld computed\n",hsh)); } /* sanity check */ /*if (tp->Free != -1) { GArrayRef(groups, flag, tp->Free, root, state, rNext); if (*state != FREE) { PyErr_SetString(PyExc_SystemError, "free index not free in table"); return -1; } }*/ *hshout = hsh; /* return value */ lhsh = /*(unsigned long)*/ hsh; /* find the root group if needed */ if (rootgroupI < 0) { rootgroupI = lastgroupI = lhsh % tp->basesize; lastbucketI = -1; /* swap out or free root group if needed */ GArrayRef(groups, flag, rootgroupI, root, state, rNext); if (*state != ROOT) { /* failure, unless forced insert */ if (Force == NOFORCE) { return 0; } /* lastgroup and lastbucket must be none */ lastgroupI = lastbucketI = -1; /* otherwise must force an insert, need root group... */ if (*state == OVERFLOW) { /* swap out the overflow group */ Dprint(("root is overflow %ld\n",rootgroupI)); if (tp->Free == -1) { /* nowhere to swap, must resize up */ Dprint(("tm: resizing for root\n")); if (tableResize(tp, RESIZEUPSIZE(tp)) == 0) { return -1; /* failure to resize */ } return tableMatch(tp, member1, map1, Force, -1, -1, -1, hsh, rtgrp, nxtgrp, nxtbkt, Bp, hshout, isnew); } UnFreeTableIndex(AvailableI, tp, tp->Free); Gswapout(groups, rootgroupI, AvailableI, flag); } else { if (*state == FREE) { Dprint(("unfreeing rootgroup %ld\n", rootgroupI)); UnFreeTableIndex(rootgroupI, tp, rootgroupI); } else { PyErr_SetString(PyExc_SystemError, "bad rootgroup state in tablematch"); return -1; /* error */ } } /* set the next of new root group to self */ /* paranioa: technically the structure may have changed... (omit?) */ GArrayRef(groups, flag, rootgroupI, root, state, rNext); *state = ROOT; *rNext = rootgroupI; } } if (lastgroupI<0) { lastgroupI = rootgroupI; lastbucketI=-1; } *rtgrp = rootgroupI; /*Dprint(("tm: lg = %ld, rg = %ld, lb = %ld\n",\ lastgroupI, rootgroupI, lastbucketI));*/ /* look in circular list until looped or found */ do { Dprint(("tm: looking %ld\n", lastgroupI)); GArrayRef(groups, flag, lastgroupI, thisgroup, state, Next); *nxtgrp = lastgroupI; groupmatch(found, thisgroup, flag, hsh, member1, map1,\ Force, lastbucketI, (*nxtbkt), \ (*Bp), (*isnew), (*dirtyptr)); if (*Next == rootgroupI) { break; } lastgroupI = *Next; lastbucketI = -1; } while (found == 0); /* success if found */ if (found != 0) { Dprint(("tm: found = %ld\n",found)); if (found<0) { PyErr_SetString(PyExc_SystemError, "groupmatch abnormal return"); return -1; } if (*isnew != 0) { tp->entries++; } Dprint(("tm: success, rg=%ld, ng=%ld, nb=%ld, ho=%ld, in=%ld", \ *rtgrp, *nxtgrp, *nxtbkt, *hshout, *isnew)); return 1; } /* otherwise force an insert into a new group, if requested */ if (Force == FORCE) { Dprint(("tm: trying to force insert to overflow\n")); if (tp->Free == -1) { /* no room, no room (mad hatter) */ Dprint(("tm: resizing for overflow\n")); if (tableResize(tp, RESIZEUPSIZE(tp)) == 0) { return -1; /* failure to resize */ } return tableMatch(tp, member1, map1, Force, -1, -1, -1, hsh, rtgrp, nxtgrp, nxtbkt, Bp, hshout, isnew); } UnFreeTableIndex(AvailableI, tp, tp->Free); GArrayRef(groups, flag, AvailableI, avail, availState, availNext); *availState = OVERFLOW; *availNext = rootgroupI; *Next = AvailableI; groupmatch(found, avail,flag,hsh,member1,map1, Force, -1, (*nxtbkt), (*Bp), (*isnew), (*dirtyptr)); if (found<0) { PyErr_SetString(PyExc_SystemError, "groupmatch abnormal return"); return -1; } *nxtgrp = AvailableI; if (*isnew != 0) { tp->entries++; } return 1; /* successful insert */ } return 0; /* not found */ } /* some simple uses of table matching */ /* find (or set) a matching pair */ static long TableGet1( Table *tp, PyObject *member1, PyObject *map1, long hash, enum ForceFlag Force, PyObject **memout, PyObject **mapout) { long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; enum BucketFlag flag = tp->flag; if (member1 == NULL) { PyErr_SetString(PyExc_SystemError, "TableGet1 called with NULL??"); return -1; } Dprint(("tg1: calling tablematch\n")); found = tableMatch(tp, member1, map1, Force, -1, -1, -1, hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } if (found == 0) { PyErr_SetObject(PyExc_KeyError, member1); return -1; } BPtrDestructure(Bp, flag, hashout, *memout, *mapout); return 0; } /* utility function for resizing a table: reinserting a group */ /* could macroize */ long ReInsertGroup( Groupptr g, enum BucketFlag flag, Table *tp) { PyObject *Member = 0, *Map = 0; long i, rt, nxt, nxtb, isnew, test; long hash = 0, h; Bucketptr Bp, Bpdummy; for (i=0; igroups, tp->flag, tp->size ); tp->entries = 0; return 1; } long tableResize( Table *tp, long expected ) { long i, *Next; enum GState *State = 0; Groupptr g; long size = tp->size; enum BucketFlag flag = tp->flag; GroupArray oldgroups = tp->groups; long DirtyVal = tp->Dirty; long success = 1; /* assume success */ Dprint(("tresize: resizing %ld\n",expected)); /* allocate a new Table */ if (AllocateBuckets(tp, expected) != 1) { return 0; } /* for debug */ /*if (tp->Free!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint(("free ptr %ld corrupted in resize/alloc, State=%ld not %ld\n",\ tp->Free,*State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (1)"); return 0; } }*/ /* now reinsert all former contents */ for (i=0; iFree!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint((\ "free ptr %ld corrupted in resize/reinsert %ld, State=%ld not %ld\n",\ tp->Free,i,*State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (2)"); return 0; }*/ } } /* deallocate the old groups */ groupsDealloc(oldgroups, flag, size); tp->Dirty = DirtyVal; /* use old dirty value... (paranoia) */ /* for debug */ /*if (tp->Free!=-1) { GArrayRef(tp->groups, flag, tp->Free, g, State, Next); if (*State != FREE) { Dprint(("free ptr %ld corrupted in resize, State=%ld not %ld\n",tp->Free,\ *State,FREE)); PyErr_SetString(PyExc_SystemError, "resize fail (3)"); return 0; }*/ if (success==0) Dprint(("failing in tableresize\n")); return success; } /* deleting a member from a group, deletes *all* matching members */ long deleteFromTable(Table *tp, PyObject *member1, PyObject *map1) { PyObject *M = 0, *Mp = 0; enum BucketFlag flag = tp->flag; GroupArray groups = tp->groups; long hash, bhash; long test, rtgrp, nxtgrp, nxtbkt, isnew, found, grp, *N = 0, brt, bnxtgrp, bnxtbkt, bisnew, bfound, rtg1, rtg2; Bucketptr Bp, bBp; Groupptr g; enum GState *State; /* find first match */ found = tableMatch(tp, member1, map1, NOFORCE, -1, -1, -1, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* external error */ if (found == 0) { PyErr_SetObject(PyExc_KeyError, member1); return 0; } /* mark the table as dirty */ tp->Dirty = 1; /* delete all such matches */ while (found) { BPtrReInit(Bp, flag); tp->entries--; found = tableMatch(tp, member1, map1, NOFORCE, rtgrp, nxtgrp, nxtbkt, hash, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* external error */ } /* back fill nulled entries in circular list (could be faster?) */ found = tableMatch(tp, 0, 0, NOFORCE, rtgrp, rtgrp, -1, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } /* error */ brt = bnxtgrp = rtgrp; bnxtbkt = -1; while (found) { BPtrDestructure(Bp, flag, hash, M, Mp); tp->entries--; /* !!! NOTE: since BPtrReInit Py_DECREFs the contents, must Py_INCREF contents here to prevent deallocation of the members and decref after reinstallation in the table !!! (kinda subtle python thing!) !!! */ Py_XINCREF(M); Py_XINCREF(Mp); BPtrReInit(Bp,flag); bfound = tableMatch(tp, M, Mp, FORCE, brt, bnxtgrp, bnxtbkt, hash, &brt, &bnxtgrp, &bnxtbkt, &bBp, &bhash, &bisnew); Py_DECREF(M); Py_DECREF(Mp); if (found != 1) { PyErr_SetString(PyExc_SystemError, "?? cannot backfill on delete"); return 0; } found = tableMatch(tp, 0, 0, NOFORCE, rtgrp, nxtgrp, nxtbkt, NOHASH, &rtgrp, &nxtgrp, &nxtbkt, &Bp, &hash, &isnew); if (found == -1) { return 0; } } /* now free up any groups on this cycle that are left empty */ /* this will only delete the rootgroup if there is nothing in the cycle */ grp = rtgrp; do { GArrayRef(groups, flag, grp, g, State, N); nxtgrp = *N; GroupEmpty(test, g,flag); if (test) { if (grp == rtgrp) { rtg1 = rtg2 = rtgrp; /* nasty macro bug fixed here */ Gprevious(rtg1,flag,rtg2,groups); /* for termination */ } FreeTableIndex(tp,grp); } grp = nxtgrp; } while (grp != rtgrp); /* finally, resize if too few entries */ if (RESIZEDOWNTEST(tp)) { tableResize(tp, tp->entries); } return 1; } /***********************************************************/ /** table walker methods **/ /* TableWalkers are used for *strictly local and temporary* walking of table structure in two ways: - by key - by all values in table (things like increfs and decrefs aren't done since use is temporary). */ typedef struct { Table *tp; long valid; /* 1 means okay, 0 means done, -1 means error */ long root; long lastgroup; long lastbucket; PyObject *key; PyObject *map; long hash; } TableWalker; /* methods for walking by all values */ static long NextAll(TableWalker *twp) { Bucketptr Bp; Groupptr g; enum BucketFlag flag; enum GState *State = 0; long *Next, size, found, isnew, dirtyptr; PyObject *dummy; Table *tp = twp->tp; size = tp->size; flag = tp->flag; if (twp->lastgroup > size) { twp->valid = 0; return 0; /* failure return */ } if ((twp->lastgroup == -1) || (twp->lastbucket>GSIZE)){ twp->lastbucket = -1; twp->lastgroup++; } found = 0; do { GArrayRef(tp->groups, flag, twp->lastgroup, g, State, Next); if ((*State==ROOT) || (*State==OVERFLOW)) { dummy = 0; groupmatch(found, g, flag, NOHASH, dummy, dummy, NOFORCE,\ (twp->lastbucket), (twp->lastbucket), \ Bp, isnew, dirtyptr); } if (found==0) { twp->lastgroup++; twp->lastbucket = -1; } } while ( (found == 0) && (twp->lastgroup < size) ); if (found == 0) { twp->valid = 0; return 0; /* failure return */ } /* success: find the hash, key and map values */ BPtrDestructure(Bp, flag, (twp->hash), (twp->key), (twp->map)); twp->valid = 1; /*printf("allwalker: item found with hash %ld\n",twp->hash);*/ return 1; /* successful return */ } /* could macroize */ static void InitAll(TableWalker *twp, Table *tp) { twp->lastgroup = -1; twp->lastbucket = -1; twp->tp = tp; twp->valid = NextAll(twp); } /* methods for walking my key NOHASH may be used as an "unknown" hash value */ static long Nextbykey(TableWalker *twp) { Bucketptr Bp; PyObject *dummyk; long dummyh; long isnew; Dprint(("Nextbykey\n")); twp->valid = tableMatch(twp->tp, twp->key, 0, NOFORCE, twp->root, twp->lastgroup, twp->lastbucket, twp->hash, &(twp->root), &(twp->lastgroup), &(twp->lastbucket), &Bp, &(twp->hash), &isnew); if (twp->valid == 1) { BPtrDestructure(Bp, twp->tp->flag, dummyh, dummyk, (twp->map)); } return twp->valid; } /* could macroize */ static void Initbykey(TableWalker *twp, Table *tp, PyObject *key, long hash) { Dprint(("Initbykey\n")); twp->tp = tp; twp->root = -1; twp->lastgroup = -1; twp->lastbucket = -1; twp->key = key; twp->hash = hash; twp->valid = Nextbykey(twp); } /*******************************************************************/ /** methods for combining tables **/ /* augmenting one table using another, assuming types are compatible */ static long Taugment(Table *target, Table *source) { long test; TableWalker tw; PyObject *d1, *d2; /* walk through the source */ (void) InitAll(&tw, source); while (tw.valid == 1) { Dprint(("taug: TableGet1\n")); test = TableGet1(target, tw.key, tw.map, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error return */ (void) NextAll(&tw); } return tw.valid; /* 0 for success, -1 for error */ } /* transpose a table (can't be a set!) if target is a dictionary result may be nondeterministic unless source is 1:1. if target is a set result will be set of all targets+dests (nodes) */ static long Ttranspose(Table *target, Table *source) { long test; TableWalker tw; PyObject *d1, *d2; enum BucketFlag tflag = target->flag; /* source flag cannot be set */ if (source->flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "Cannot transpose set"); return -1; /* error return */ } /* walk through the source */ (void) InitAll(&tw, source); while (tw.valid == 1) { if (tflag == SETFLAG) { /* add mem and map separately to target */ test = TableGet1(target, tw.key, 0, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ test = TableGet1(target, tw.map, 0, NOHASH, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } else { /* add inversion */ test = TableGet1(target, tw.map, tw.key, NOHASH, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } /* advance cursor */ (void) NextAll(&tw); } return tw.valid; /* 0 for success, -1 for error */ } /* Compose a dict/graph with a dict/graph and put the result in another. If mask is non-null mask out any members of mask (for tclosure). Table types assumed to be sensible. target = ( (left o right) - mask ) long returned is number of inserts or -1 on error. if prelim is set only counting will be done, no inserts (target may be null). */ static long Tcompose(Table *target, Table *left, Table *right, Table *mask, long prelim) { TableWalker lwalker, rwalker; PyObject *d1, *d2; long test, count, exclude, rt, nxt, nxtb, isnew; Bucketptr Bp; long hashout; enum BucketFlag lflag = left->flag; /* walk through left */ (void) InitAll(&lwalker, left); Dprint(("Tcompose: lwalker initialized\n")); count = 0; while (lwalker.valid == 1) { /* walk through members of right matching lwalker.map */ /* if left is a set then don't recompute the hash value */ if (lflag == SETFLAG) { (void) Initbykey(&rwalker, right, lwalker.key, lwalker.hash); } else { (void) Initbykey(&rwalker, right, lwalker.map, NOHASH); } Dprint(("Tcompose: rwalker initialized\n")); while (rwalker.valid == 1) { exclude = 0; if (mask != 0) { Dprint(("Tcompose: computing exclude\n")); exclude = tableMatch(mask, lwalker.key, rwalker.map, NOFORCE, -1, -1, -1, lwalker.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); } if (exclude==0) { if (prelim==0) { test = TableGet1(target, lwalker.key, rwalker.map, lwalker.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } count++; } (void) Nextbykey(&rwalker); } if (rwalker.valid == -1) { return -1; } /* error */ (void) NextAll(&lwalker); } if (lwalker.valid == -1) { return -1; } /* error */ return count; } /* Add the intersection or difference of two tables to another table. On error returns -1, else returns count of inserts. Invoke with a nonzero prelim value to get just count of inserts without inserting, in this case target may be null. */ static long Tintdiff(Table *target, Table *left, Table *right, long include, long prelim) { long hashout; long test, rt, nxt, nxtb, isnew, found, count; Bucketptr Bp; TableWalker tw; PyObject *d1, *d2; /* walk through left */ (void) InitAll(&tw, left); count = 0; while (tw.valid == 1) { /* is current in right? */ found = tableMatch(right, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } /* error */ /* maybe either include or exclude the member based on flag value */ if ( ((include==1)&&(found==1)) || ((include==0)&&(found==0)) ) { if (prelim == 0) { test = TableGet1(target, tw.key, tw.map, tw.hash, FORCE, &d1, &d2); if (test!=0) { return -1; } /* error */ } count++; } /* advance cursor */ (void) NextAll(&tw); } if (tw.valid == -1) { return -1; } /* error */ return count; /* success */ } /* Utility function for comparisons: find the "smallest" pair in left that is not in right return 1 if found, else 0 (-1 on error). */ static long Tmindiff(Table *left, Table *right, PyObject **mem, PyObject **map, long *hash) { long hashout; long gotit, rt, nxt, nxtb, isnew, found, cmp; Bucketptr Bp; TableWalker tw; /* walk through left */ (void) InitAll(&tw, left); gotit = 0; while (tw.valid == 1) { /* is current in right? */ found = tableMatch(right, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return -1; } /* error */ /* if it wasn't in right test it for minimality */ if (found == 0) { if (gotit == 0) { *mem = tw.key; *map = tw.map; *hash = tw.hash; gotit = 1; } else { cmp = *hash - tw.hash; if (cmp == 0) { cmp = PyObject_Compare( tw.key, *mem ); } if ((cmp>0) || ((cmp==0) && (tw.map!=0) && (PyObject_Compare( tw.map, *map )>0))) { *mem = tw.key; *map = tw.map; *hash = tw.hash; } } } (void) NextAll(&tw); } if (tw.valid == -1) { return -1; } /* error */ return gotit; } /* for coercing table types: Dict intersect Graph is Dict, Dict union Graph is Graph, etc. generality should be positive (nonzero) to default to more general negative to default to less general */ static long FlagCoercion(enum BucketFlag flag1, enum BucketFlag flag2, enum BucketFlag *fp, long Generality) { *fp = flag2; if ( ((flag1 > flag2) && (Generality>0) ) || ((flag1 < flag2) && (Generality<0) ) ) { *fp = flag1; } return 1; /* always succeed */ } /*********************************************/ /* python data structures and interfaces... */ /*********************************************/ /* general structure for all table behaviors */ typedef struct { PyObject_VAR_HEAD /* the hash flag */ /* IF THIS IS NOT NOHASH THE TABLE SHOULD BE IMMUTABLE */ long hashvalue; /* the flag in member rep determines behaviors */ Table rep; } TableWrapper; /* predeclarations of type objects */ staticforward PyTypeObject kjSettype; staticforward PyTypeObject kjDicttype; staticforward PyTypeObject kjGraphtype; /* type test macros */ #define is_kjSetobject(op) ((op)->ob_type == &kjSettype) #define is_kjDictobject(op) ((op)->ob_type == &kjDicttype) #define is_kjGraphobject(op) ((op)->ob_type == &kjGraphtype) #define is_kjTable(op) \ ( is_kjSetobject(op) || is_kjDictobject(op) || is_kjGraphobject(op) ) /* for algebraic operations that may be using a tainted argument propagate the taintedness... (requires ending semicolon!) */ #define propagateDirt(in,out) \ if (in->rep.Dirty!=0) out->rep.Dirty = 1 /* internal allocation function for table wrappers */ static PyObject * newWrapper(long expectedsize, enum BucketFlag flag) { /* allocate one wrapper */ TableWrapper *wp; Dprint(("WnewWrapper\n")); wp = PyMem_NEW(TableWrapper, 1); if (wp == NULL) { return PyErr_NoMemory(); /* allocation failure */ } switch (flag) { case SETFLAG: wp->ob_type = &kjSettype; break; case DICTFLAG: wp->ob_type = &kjDicttype; break; case GRAPHFLAG: wp->ob_type = &kjGraphtype; break; default: PyErr_SetString(PyExc_SystemError, "invalid internal table flag"); return NULL; } /* initialize the internal table */ if (initTable(&(wp->rep), flag, expectedsize) == 0) { /* initialization failed, assume an appropriate error is set */ PyMem_Del(wp); return NULL; } Dprint(("WnewWrapper: table initialized\n")); wp->hashvalue = NOHASH; /* INITIALIZE THE REFERENCE COUNT FOR THE NEW OBJECT */ _Py_NewReference(wp); return (PyObject *) wp; } /* *almost* an external python constructor for wrappers */ static PyObject * makeWrapper(PyObject *module, PyObject *args, enum BucketFlag flag) { TableWrapper *result, *initWrapper; PyObject *initlist, *pair, *key, *map, *d1, *d2; long len = 0, members, valid, index, islist, iskjtable, istuple; Table *tp; islist = 0; iskjtable = 0; istuple = 0; initlist = NULL; initWrapper = NULL; Dprint(("makeWrapper\n")); /* no args: allocate a smallest table: */ if (args == NULL) { members = 0; } else { /* some args: check it and determine its length */ valid = PyArg_Parse(args, "i", &members); if (!valid) { PyErr_Clear(); valid = PyArg_Parse(args, "O", &initlist); if (valid) { islist = PyList_Check(initlist); if (islist) { Dprint(("makeWrapper from list\n")); len = PyList_Size(initlist); } else { iskjtable = is_kjTable(initlist); if (iskjtable) { Dprint(("makeWrapper from kj-table\n")); initWrapper = (TableWrapper *) initlist; len = initWrapper->rep.entries; } else { istuple = PyTuple_Check(initlist); if (istuple) { Dprint(("makeWrapper from tuple\n")); len = PyTuple_Size(initlist); } else { valid = 0; } } } } if (!valid) { PyErr_SetString(PyExc_TypeError, "initializer must be integer or list or tuple or kj-Table"); return NULL; } members = len/2; /* try to conserve space when initializing from list */ } } result = (TableWrapper *) newWrapper(members, flag); if (result == NULL) { return NULL; } /* error */ /* use initialization list if there is one */ if (initlist != NULL) { /* if its a Python list or tuple, initialize from it... */ if ( islist || istuple ) { Dprint(("makeWrapper unpacking Python sequence\n")); tp = &(result->rep); for (index = 0; indexrep), &(initWrapper->rep) ); if (valid!=0) { Py_DECREF(result); return NULL; } } } return (PyObject *) result; } /* specialization for sets */ static PyObject * makekjSet(PyObject *module, PyObject *args) { return makeWrapper(module, args, SETFLAG); } /* specialization for graphs */ static PyObject * makekjGraph(PyObject *module, PyObject *args) { return makeWrapper(module, args, GRAPHFLAG); } /* specialization for dicts */ static PyObject * makekjDict(PyObject *module, PyObject *args) { return makeWrapper(module, args, DICTFLAG); } #ifdef KJBDEBUG static PyObject * Wdebug( PyObject *m, PyObject *a) { if (DebugLevel) { DebugLevel = 0; } else { DebugLevel = 1; } Py_INCREF(Py_None); return Py_None; } #endif static void WrapperDeallocate(TableWrapper *wp) { /* must properly decref references... */ groupsDealloc( wp->rep.groups, wp->rep.flag, wp->rep.size ); PyMem_Del(wp); } /* hash value: symmetrical on members, a symmetrical within pairs */ static long Wrapper_hash(TableWrapper *wp) { enum BucketFlag flag = wp->rep.flag; long this, that; long result = 121345; /* silly init value */ TableWalker tw; Dprint(("Whash\n")); if (wp->hashvalue != NOHASH) { /* memoized hash value */ return wp->hashvalue; } result *= (wp->rep.entries+1); (void) InitAll(&tw, &(wp->rep)); while (tw.valid == 1) { this = tw.hash; /* bug/feature: structures that differ only on unhashable maps will have the same hash value. I don't know whether to keep this of "fix" it. Hmmm. */ if ( (flag != SETFLAG) &&(tw.map != 0)) { GETHASH(that,tw.map); if (that == -1) { PyErr_Clear(); } this += (that*23); } result ^= this; (void) NextAll(&tw); } if (tw.valid == -1) { return NOHASH; } /* error */ if (result == -1) { result = 973; } wp->hashvalue = result; return result; } static PyObject * WrapperItems1(TableWrapper *wp, PyObject *args, long dokey, long domap) { PyObject *resultlist, *membertuple; TableWalker tw; long index, entries; Dprint(("WItems1\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ entries = wp->rep.entries; /* make a list for all entries */ resultlist = PyList_New( entries ); if (resultlist == NULL) { return NULL; } /* error */ /* walk through the table */ (void) InitAll(&tw, &(wp->rep)); index = 0; while (tw.valid == 1) { /* sanity check */ if (index >= entries) { Py_DECREF(resultlist); PyErr_SetString(PyExc_SystemError, "loop overflowing in WrapperItems"); return NULL; /* error */ } /* get only the key, if requested */ if ((dokey != 0) && (domap == 0)) { Py_XINCREF(tw.key); PyList_SetItem(resultlist, index, tw.key); } else { /* get only the map, if requested */ if ((domap != 0) && (dokey == 0)) { Py_XINCREF(tw.map); PyList_SetItem(resultlist, index, tw.map); } else { /* otherwise get both */ membertuple = PyTuple_New(2); if (membertuple == NULL) { Py_DECREF(resultlist); return NULL; /* error */ } Py_XINCREF(tw.key); PyTuple_SetItem(membertuple, 0, tw.key); Py_XINCREF(tw.map); PyTuple_SetItem(membertuple, 1, tw.map); PyList_SetItem(resultlist, index, membertuple); } } index++; (void) NextAll(&tw); } if (tw.valid == -1) { Py_DECREF(resultlist); return NULL; /* error */ } return resultlist; } static PyObject * WrapperItems(TableWrapper *wp, PyObject *args) { Dprint(("WItems\n")); if (wp->rep.flag == SETFLAG) { /* for sets do key only */ return WrapperItems1(wp, args, 1, 0); } else { /* for others, get both */ return WrapperItems1(wp, args, 1, 1); } } /* prlong function with debug option */ static long WrapperPrint(TableWrapper *wp, FILE *fp, long flags) { PyObject * items; #ifdef WDEBUGPRINT if (WDEBUGPRINT) { return TableDump((wp->rep), fp); } #endif switch (wp->rep.flag) { case SETFLAG: fprintf(fp, "kjSet("); break; case DICTFLAG: fprintf(fp, "kjDict("); break; case GRAPHFLAG: fprintf(fp, "kjGraph("); break; default: fprintf(fp, "??unknown table type??\n"); } items = WrapperItems(wp, NULL); if (items == NULL) { fprintf(fp, "??couldn't allocate items??\n"); return -1; } if (PyObject_Print(items, fp, 0) != 0) { return -1; } Py_DECREF(items); fprintf(fp, ")"); return 0; } static PyObject* WrapperRepr(TableWrapper *wp) { PyObject *items, *result, *itemstring; char buf[256]; switch (wp->rep.flag) { case SETFLAG: sprintf(buf, "kjSet("); break; case DICTFLAG: sprintf(buf, "kjDict("); break; case GRAPHFLAG: sprintf(buf, "kjGraph("); break; default: PyErr_SetString(PyExc_SystemError, "Bad flag in table"); return NULL; } result = PyString_FromString(buf); items = WrapperItems(wp, NULL); if (items == NULL) { return NULL; } itemstring = PyObject_Repr(items); Py_DECREF(items); PyString_ConcatAndDel(&result, itemstring); PyString_ConcatAndDel(&result, PyString_FromString(")")); return result; } /* nonzero testing */ static long Wrapper_nonzero(TableWrapper *wp) { Dprint(("Wnonzero\n")); return (wp->rep.entries != 0); } /* comparison: if w1 and w2 are of same type then w1rep); rtable = &(right->rep); lentries = ltable->entries; rentries = rtable->entries; if (lentries 0) { return 1; } /* mems are identical, try maps */ if ( (lmap != 0) && (rmap != 0) ) { /* if we get this far the following shouldn't return 0, ever. */ return PyObject_Compare(lmap,rmap); } /* this should be an error, but it can't be done?? */ return 0; } static PyObject * Whas_key(TableWrapper *wp, PyObject *args) { long test, rt, nxt, nxtb, isnew; long hashout; Bucketptr Bp; PyObject *key; Dprint(("Whas_key\n")); if ((args == NULL) || !PyArg_Parse(args, "O", &key)) { PyErr_SetString(PyExc_TypeError, "table method has_key requires an argument"); return NULL; } test = tableMatch(&(wp->rep), key, 0, NOFORCE, -1, -1, -1, NOHASH, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (test == -1) { return NULL; } /* error */ return PyInt_FromLong((long) test); } /* Get the neighbors of a node in a graph. */ static PyObject *Gneighbors(TableWrapper *wp, PyObject *args) { PyObject *key, *resultlist; Table *tp; TableWalker tw; long count, index; Dprint(("Gneighbors\n")); if ((args == NULL) || !PyArg_Parse(args, "O", &key)) { PyErr_SetString(PyExc_TypeError, "table method neighbors requires an argument"); return NULL; } tp = &(wp->rep); if ( tp->flag == SETFLAG ) { PyErr_SetString(PyExc_TypeError, "neighbors not defined for table of this type"); return NULL; } /* find out how many neighbors there are */ count = 0; (void) Initbykey(&tw, tp, key, NOHASH); Dprint(("Gneighbors: counting neighbors\n")); while (tw.valid == 1) { count++; (void) Nextbykey(&tw); } if (tw.valid == -1) { return NULL; } /* error */ /* make a list large enough */ Dprint(("Gneighbors: making resultlist\n")); resultlist = PyList_New( count ); if (resultlist == NULL) { return NULL; } /* failure to allocate */ /* record neighbors in list */ (void) Initbykey(&tw, tp, key, NOHASH); index = 0; Dprint(("Gneighbors: storing results\n")); while (tw.valid == 1) { if (index >= count) { Py_DECREF(resultlist); PyErr_SetString(PyExc_SystemError, "loop overflow in neighbors calculation"); return NULL; } Py_XINCREF(tw.map); PyList_SetItem(resultlist, index, tw.map); index++; (void) Nextbykey(&tw); } if (tw.valid == -1) { Py_DECREF(resultlist); return NULL; } return resultlist; } /* utility function for extracting keys or values if domaps is set this will get maps uniquely *only if all maps are hashable!* */ static PyObject *Wparts(TableWrapper *wp, PyObject *args, long domaps) { TableWalker tw; Table *tp, *Settp; TableWrapper *tempSet; PyObject *mem, *map, *items; long test; Dprint(("Wparts\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ tp = &(wp->rep); if (tp->flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "keys/values not defined for sets"); return NULL; } /* initialize a temp set to hold the keys */ /* try to save a little space here, may actually waste space sometimes */ tempSet = (TableWrapper *) newWrapper(tp->entries/4, SETFLAG); if (tempSet == NULL) { return NULL; } Settp = &(tempSet->rep); /* walk the table and record the keys */ (void) InitAll(&tw, tp); test = 0; while ((tw.valid == 1) && (test != -1)) { if (domaps) { test = TableGet1(Settp, tw.map, 0, NOHASH, FORCE, &mem, &map); } else { test = TableGet1(Settp, tw.key, 0, tw.hash, FORCE, &mem, &map); } (void) NextAll(&tw); } if ((test == -1) || (tw.valid == -1)) { Py_DECREF(tempSet); return NULL; } items = WrapperItems(tempSet, NULL); Py_DECREF(tempSet); return items; } static PyObject *Wkeys(TableWrapper *wp, PyObject *args) { Dprint(("Wkeys\n")); return Wparts(wp, args, 0); } static PyObject *Wvalues(TableWrapper *wp, PyObject *args) { Dprint(("Wvalues\n")); /* return Wparts(wp, args, 1); -- wrong! */ return WrapperItems1(wp, args, 0, 1); } /* choose an arbitrary key from the table or raise an indexerror if none */ static PyObject *Wchoose_key(TableWrapper *wp, PyObject *args) { TableWalker tw; Dprint(("Wchoose_key\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ (void) InitAll(&tw, &(wp->rep)); if (tw.valid == 1) { Py_XINCREF(tw.key); return tw.key; } if (tw.valid == 0) { PyErr_SetString(PyExc_IndexError, "table is empty"); return NULL; } /* external error otherwise (tw.valid == -1) */ return NULL; } static PyObject *WSubset(TableWrapper *subset, PyObject *args) { TableWrapper *superset; long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; TableWalker tw; Table *supertable; Dprint(("WSubset\n")); /* verify argument */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "Subset test requires argument"); return NULL; } if (!PyArg_Parse(args, "O", &superset)) { return NULL; } if ( !is_kjTable(superset)) { PyErr_SetString(PyExc_TypeError, "Subset defined only between kj-tables"); return NULL; } /* walk through subset, test for membership of all members */ (void) InitAll(&tw, &(subset->rep)); supertable = &(superset->rep); while (tw.valid == 1) { found = tableMatch(supertable, tw.key, tw.map, NOFORCE, -1, -1, -1, tw.hash, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); if (found == -1) { return NULL; } /* error */ if (found == 0) { /* subset test fails */ return PyInt_FromLong((long) 0); } (void) NextAll(&tw); } if (tw.valid == -1) { return NULL; } /* error */ /* otherwise, success */ return PyInt_FromLong((long) 1); } /* transitive closure of a graph */ /* algorithm could be made faster, KISS for now. */ static PyObject *Wtransclose(TableWrapper *wp, PyObject *args) { Table *source, *target, Delta; TableWrapper *closure; enum BucketFlag flag; long count, test, abort; Dprint(("Wtransclose\n")); if (!PyArg_Parse(args, "")) { return NULL; } /* error */ source = &(wp->rep); flag = source->flag; if (flag != GRAPHFLAG) { PyErr_SetString(PyExc_TypeError, "transitive closure not defined for this table type"); return NULL; } Dprint(("tc: allocating closure\n")); closure = (TableWrapper *) newWrapper(source->entries, flag); if (closure == NULL) { return NULL; } propagateDirt(wp, closure); target = &(closure->rep); /* closure of source contains source */ Dprint(("tc: augmenting closure\n")); test = Taugment( target, source ); if (test != 0) { Py_DECREF(closure); return NULL; } /* initialize temp table Delta for transitive arcs */ test = initTable(&Delta, flag, 0); /* add all transitive arcs */ abort = 0; do { /* Delta = (source o target) - target */ Dprint(("tc: calling tcompose\n")); count = Tcompose(&Delta, source, target, target, 0); Dprint(("tc: delta computed, count = %ld\n",count)); if (count<0) { abort = 1; } if ((abort == 0) && (count>0)) { /* target = target U Delta */ Dprint(("tc: augmenting target\n")); test = Taugment( target, &Delta ); Dprint(("tc: done augmenting target\n")); if (test!=0) { abort = 1; } tableClear( &Delta ); } Dprint(("tc: loop body done, count=%ld, abort=%ld\n",count,abort)); /* loop terminates when (source o target) subset target */ } while ((count>0) && (abort==0)); Dprint(("tc: deallocating Delta\n")); groupsDealloc(Delta.groups, flag, Delta.size); if (abort != 0) { Py_DECREF(closure); return NULL; } return (PyObject *) closure; } static void Wset_hash_error(void) { PyErr_SetString(PyExc_TypeError, "table has been hashed, it is now immutable"); } static PyObject * Wdelete_arc(TableWrapper *wp, PyObject *args) { PyObject *key, *map; Dprint(("Wdelete_arc\n")); if ((args == NULL) || !PyArg_Parse(args, "(OO)", &key, &map)) { PyErr_SetString(PyExc_TypeError, "delete_arc requires two arguments"); return NULL; } if (wp->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "delete_arc not defined on sets"); return NULL; } if (wp->hashvalue != NOHASH) { Wset_hash_error(); return NULL; } if (deleteFromTable(&(wp->rep), key, map) == 0) { return NULL; } Py_INCREF(Py_None); return Py_None; } /* simple membership test */ static PyObject * Wmember1(TableWrapper *wp, PyObject *args, long insert) { PyObject *key, *map; Table *tp; enum BucketFlag flag; long hashout; long rt, nxt, nxtb, isnew, found; Bucketptr Bp; Dprint(("Wmember1\n")); tp = &(wp->rep); flag = tp->flag; /* determine key and map */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "membership test requires argument(s)"); return NULL; } if ((insert!=0) & (wp->hashvalue!=NOHASH)) { Wset_hash_error(); return NULL; } if (flag == SETFLAG) { if (!PyArg_Parse(args, "O", &key)) { return NULL; } map = 0; } else { if (!PyArg_Parse(args, "(OO)", &key, &map)) { return NULL; } } if (insert == 0) { found = tableMatch(tp, key, map, NOFORCE, -1, -1, -1, NOHASH, &rt, &nxt, &nxtb, &Bp, &hashout, &isnew); return PyInt_FromLong((long) found); } else { found = TableGet1(tp, key, map, NOHASH, FORCE, &key, &map); if (found == -1) { return NULL; } Py_INCREF(Py_None); return Py_None; } } static PyObject * Wmember(TableWrapper *wp, PyObject *args) { Dprint(("Wmember\n")); return Wmember1(wp, args, 0); } static PyObject * Waddmember(TableWrapper *wp, PyObject *args) { Dprint(("Waddmember\n")); return Wmember1(wp, args, 1); } /* generate identity graph from a set */ static PyObject * Gidentity(TableWrapper *SourceSet, PyObject *args) { TableWrapper *resultGraph; Table *Graphtp; TableWalker tw; long test; PyObject *d1, *d2; Dprint(("Gidentity\n")); if (!PyArg_Parse(args, "")) { return NULL; } if (SourceSet->rep.flag != SETFLAG) { PyErr_SetString(PyExc_TypeError, "graph identity not defined for table of this type"); return NULL; } /* make a new DICTIONARY for result, may waste space for graphs */ resultGraph = (TableWrapper *) newWrapper(SourceSet->rep.entries/3, DICTFLAG); if (resultGraph == NULL) { return NULL; } Graphtp = &(resultGraph->rep); /* walk through the set */ (void) InitAll(&tw, &(SourceSet->rep)); test = 0; while ((tw.valid == 1) && (test != -1)) { test = TableGet1(Graphtp, tw.key, tw.key, tw.hash, FORCE, &d1, &d2); (void) NextAll(&tw); } if ((test == -1) || (tw.valid == -1)) { Py_DECREF(resultGraph); return NULL; } return (PyObject *) resultGraph; } static PyObject * Greachable(TableWrapper *graph, PyObject *args) { PyObject *key, *d1, *d2; TableWrapper *resultSet, *tempSet, *deltaSet; Table *resulttp, *temptp, *deltatp, *graphtp; TableWalker deltaW, graphW; long test, fail; Dprint(("Greachable\n")); if (graph->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "reachable not defined for this table type"); return NULL; } if ((args == NULL) || (!PyArg_Parse(args, "O", &key))) { PyErr_SetString(PyExc_TypeError, "reachable requires key argument"); return NULL; } /* make result and temporary sets for computation */ resultSet = (TableWrapper *) newWrapper(0, SETFLAG); tempSet = (TableWrapper *) newWrapper(0, SETFLAG); deltaSet = (TableWrapper *) newWrapper(0, SETFLAG); if ((deltaSet == NULL) || (resultSet == NULL) || (tempSet == NULL)) { Py_DECREF(deltaSet); Py_DECREF(resultSet); Py_DECREF(tempSet); return NULL; } propagateDirt(graph, resultSet); /* get table pointers */ resulttp = &(resultSet->rep); temptp = &(tempSet->rep); deltatp = &(deltaSet->rep); graphtp = &(graph->rep); /* initialize deltaSet to contain only the key */ test = TableGet1(deltatp, key, 0, NOHASH, FORCE, &d1, &d2); fail = 0; if (test == -1) { fail = 1; } /* repeat the following loop until delta becomes empty */ while ((deltatp->entries > 0) && (fail == 0)) { /* put all neighbors to delta members in temp */ (void) InitAll(&deltaW, deltatp); while ((deltaW.valid == 1) && (fail == 0)) { /* use this entry in delta to traverse neighbors in graph */ (void) Initbykey(&graphW, graphtp, deltaW.key, deltaW.hash); while ((graphW.valid == 1) && (fail == 0)) { test = TableGet1(temptp, graphW.map, 0, NOHASH, FORCE, &d1, &d2); if (test == -1) { fail = 1; } (void) Nextbykey(&graphW); } if (graphW.valid == -1) { fail = 1; } /* external error */ (void) NextAll(&deltaW); } if (deltaW.valid == -1) { fail = 1; } /* external error */ /* clear delta and reinit to temp-result */ if (fail == 0) { tableClear(deltatp); test = Tintdiff(deltatp, temptp, resulttp, 0, 0); if (test<0) { fail = 1; } } /* now add delta to result and clear temp */ if (fail == 0) { tableClear( temptp ); test = Taugment( resulttp, deltatp ); if (test != 0) { fail = 1; } } } /* endwhile delta has entries... */ /* get rid of temporaries */ Py_DECREF(tempSet); Py_DECREF(deltaSet); if (fail != 0) { Py_DECREF(resultSet); return NULL; } return (PyObject *) resultSet; } /* Clean filter: returns argument if the table is clean, otherwise NULL */ static PyObject * WClean(TableWrapper *wp, PyObject *args) { Dprint(("WClean\n")); if (!PyArg_Parse(args, "")) { return NULL; } if (wp->rep.Dirty) { Py_INCREF(Py_None); return Py_None; } else { Py_INCREF(wp); return (PyObject *) wp; } } /* force a table to be dirty */ static PyObject * WSoil(TableWrapper *wp, PyObject *args) { Dprint(("WSoil\n")); if (!PyArg_Parse(args, "")) { return NULL; } wp->rep.Dirty = 1; Py_INCREF(Py_None); return Py_None; } /* force a table to be clean */ static PyObject * WWash(TableWrapper *wp, PyObject *args) { Dprint(("WWash\n")); if (!PyArg_Parse(args, "")) { return NULL; } wp->rep.Dirty = 0; Py_INCREF(Py_None); return Py_None; } /* remap remaps a dictionary using a table which represents key rename pairs. Can be used to duplicate and/or project mappings. If the result is "dirty" (ie, if name/value collisions) Py_None is returned. */ static PyObject * Dremap(TableWrapper *wp, PyObject *args) { TableWrapper *remapper, *result; long count; Dprint(("Dremap\n")); if (!is_kjDictobject(wp)) { PyErr_SetString(PyExc_TypeError, "remap only defined for kjDicts"); return NULL; } if (args == NULL) { PyErr_SetString(PyExc_TypeError, "remap requires equality table argument"); return NULL; } if (!PyArg_Parse(args, "O", &remapper)) { return NULL; } if ( !is_kjTable(remapper)) { PyErr_SetString(PyExc_TypeError, "remap defined only between kj-tables"); return NULL; } /* don't assume anything about size of result */ result = (TableWrapper *) newWrapper(0, DICTFLAG); if (result == NULL) { return NULL; } /* allocation error */ propagateDirt(wp, result); propagateDirt(remapper, result); /* return NONE if result is dirty (save some work) */ if (result->rep.Dirty != 0) { Py_DECREF(result); Py_INCREF(Py_None); return Py_None; } count = Tcompose( &(result->rep), &(remapper->rep), &(wp->rep), 0, 0); if (count<0) { Py_DECREF(result); return NULL; /* error */ } /* return NONE if result is dirty after composition */ if (result->rep.Dirty != 0) { Py_DECREF(result); Py_INCREF(Py_None); return Py_None; } return (PyObject *) result; } /* forward declarations needed below */ static PyObject * kjDict_subscript(TableWrapper *Set, PyObject *key); static long kjDict_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing); /* for dumping a dictionary to a tuple */ /* D.dump(tup) produces D[tup[0]] if tup of len 1 or (D[tup[0]], D[tup[1]],...) if tup of len > 1 or keyerror if keys aren't present. */ static PyObject * kjDictDump(TableWrapper *wp, PyObject *args) { PyObject *result, *input, *key, *map; long valid, index, length; Dprint(("kjDictDump\n")); if (!is_kjDictobject(wp) && !is_kjGraphobject(wp)) { PyErr_SetString(PyExc_TypeError, "dump only defined for kjDicts"); return NULL; } if (args == NULL) { PyErr_SetString(PyExc_TypeError, "dictionary dump requires tuple argument"); return NULL; } valid = PyArg_Parse(args, "O", &input); if (valid && (PyTuple_Check(input))) { length = PyTuple_Size(input); if (length < 1) { PyErr_SetString(PyExc_TypeError, "dictionary dump requires nonempty tuple arg"); return NULL; } if (length == 1) { /* return D[input[0]] */ key = PyTuple_GetItem(input, 0); return kjDict_subscript(wp, key); /* incref done by function */ } else { /* return ( D[input[0]], D[input[1]], ..., D[input[n]] ) */ result = PyTuple_New(length); if (result == NULL) { return NULL; } /* failure to allocate */ for (index = 0; index1 and thing of same len, or error */ static PyObject * kjUndumpToDict(PyObject *self, PyObject *args) { TableWrapper *result; PyObject *tup, *thing, *key, *map; long valid, index, length; Dprint(("kjUndump\n")); if (args == NULL) { PyErr_SetString(PyExc_TypeError, "kjUndump called with no args"); return NULL; } valid = PyArg_Parse(args, "(OO)", &tup, &thing); if (valid) { valid = PyTuple_Check(tup); } if (valid) { length = PyTuple_Size(tup); if (length<1) { PyErr_SetString(PyExc_ValueError, "kjUndump: tuple must be non-empty"); return NULL; } /* try to save a little space */ result = (TableWrapper *) newWrapper(length/2, DICTFLAG); if (result == NULL) { return NULL; } /* allocation failure */ if (length == 1) { /* return D[tup[0]] = thing */ key = PyTuple_GetItem(tup, 0); valid = kjDict_ass_subscript((PyObject *) result, key, thing); if (valid == -1) { Py_DECREF(result); return NULL; } return (PyObject *) result; } else { /* return for i in len(tup): D[tup[i]] = thing[i] */ if (PyTuple_Check(thing)) { if (PyTuple_Size(thing) != length) { PyErr_SetString(PyExc_TypeError,"kjUndump -- tuple lengths don't match"); return NULL; } for (index = 0; indexrep.flag; /* make no assumption about size of result */ result = (TableWrapper *) newWrapper(0, flag); if (result == NULL) { return NULL; } /* allocation failure */ /* heuristic: walk through restrictor if much smaller than self otherwise walk through self */ tp = &(wp->rep); resulttp = &(result->rep); comparetp = &(compare->rep); if (tp->entries > 4 * comparetp->entries) { /* walk through the restrictor */ (void) InitAll(&compareWalker, comparetp); test = compareWalker.valid; while ((compareWalker.valid == 1) && (test!=-1)) { /* walk through matches for key in tp */ /* (if many matches for same key, may not be efficient) */ (void) Initbykey(&wpWalker, tp, compareWalker.key, compareWalker.hash); while ((wpWalker.valid == 1) && (test != -1)) { /* put member from wpWalker in result */ test = TableGet1(resulttp, wpWalker.key, wpWalker.map, wpWalker.hash, FORCE, &d1, &d2); if (test!=-1) { (void) Nextbykey(&wpWalker); } if (wpWalker.valid == -1) { test = -1; } } if (test!=-1) { (void) NextAll(&compareWalker); } if (compareWalker.valid == -1) { test = -1; } } } else { /* walk through tp */ (void) InitAll(&wpWalker, tp); test = wpWalker.valid; while ((wpWalker.valid == 1) && (test!=-1)) { /* see if there is a match in compare */ (void) Initbykey(&compareWalker, comparetp, wpWalker.key, wpWalker.hash); /* if there, insert elt in result */ if (compareWalker.valid == 1) { test = TableGet1(resulttp, wpWalker.key, wpWalker.map, wpWalker.hash, FORCE, &d1, &d2); } if (compareWalker.valid == -1) { test = -1; } if (test != -1) { (void) NextAll(&wpWalker); } if (wpWalker.valid == -1) { test = -1; } } } /* test for error cases */ if (test == -1) { Py_DECREF(result); return NULL; } /* otherwise just return result */ return (PyObject *) result; } /* special function for retrieving from dict-dumped indices "same as" def x.dget(dict, dumper): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) return x.neighbors(d) except PyExc_KeyError: return Py_None x is kjDict or kjGraph dict is kjDict or kjGraph dumper is tuple dump of Py_None is mapped to (Py_None,) to avoid ambiguity elsewhere (may retrieve "too many neighbors" for key of Py_None or (Py_None,) defined benieth following utility function as static PyObject * kjWdget(TableWrapper *wp, PyObject *args) */ /* same as above but if testonly is set, then instead of x.neighbors(d) return 1 if neighbors set is nonempty, else, 0 */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjWdget1(TableWrapper *wp, PyObject *args, long testonly) { PyObject *d, *dumper, *result, *err_type /*, *err_value */; TableWrapper *dict; /* get and verify args */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "dget requires 2 arguments"); return NULL; } if (!PyArg_Parse(args, "(OO)", &dict, &dumper)) { PyErr_SetString(PyExc_TypeError, "dget requires dict, dumper"); return NULL; } if (!((is_kjDictobject(dict)) || (is_kjGraphobject(dict)))) { PyErr_SetString(PyExc_TypeError, "first arg of dget must be kjDict or kjGraph"); return NULL; } if (!PyTuple_Check(dumper)) { PyErr_SetString(PyExc_TypeError, "second arg of dget must be tuple"); return NULL; } /* initialize d */ d = kjDictDump(dict, dumper); if (d == NULL) { /* unable to dump */ /* check that error was a keyerror ??? */ /* err_get(&err_type, &err_value); */ err_type = PyErr_Occurred(); if (err_type != PyExc_KeyError) { /* some other error... abort */ /* PyErr_SetObject(err_type, err_value); */ return NULL; } PyErr_Clear(); /* in case of PyExc_KeyError, just return Py_None */ Py_INCREF(Py_None); return Py_None; } /* if dump was successful, return neighbors */ /* ??? should return d also ??? */ if (testonly == 0) { result = Gneighbors(wp, d); } else { result = Whas_key(wp, d); } Py_DECREF(d); return result; } /* #endif */ /* variant of dget, that just tests for presence in index "same as" def x.dtest(dict, dumper): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) return x.has_key(d) except PyExc_KeyError: return Py_None */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjWdtest(TableWrapper *wp, PyObject *args) { return kjWdget1(wp, args, 1); /* test only */ } /* #endif #ifndef PYTHON1DOT2 */ static PyObject * kjWdget(TableWrapper *wp, PyObject *args) { return kjWdget1(wp, args, 0); /* don't test only */ } /* #endif */ /* miscellaneous methods for these types */ static struct PyMethodDef Wrapper_methods[] = { {"member", (PyCFunction)Wmember}, {"add", (PyCFunction)Waddmember}, {"delete_arc", (PyCFunction)Wdelete_arc}, {"has_key", (PyCFunction)Whas_key}, {"choose_key", (PyCFunction)Wchoose_key}, {"Clean", (PyCFunction)WClean}, {"neighbors", (PyCFunction)Gneighbors}, {"dump", (PyCFunction)kjDictDump}, /* #ifndef PYTHON1DOT2 */ {"dget", (PyCFunction)kjWdget}, {"dtest", (PyCFunction)kjWdtest}, /* #endif */ {"reachable", (PyCFunction)Greachable}, {"subset", (PyCFunction)WSubset}, {"items", (PyCFunction)WrapperItems}, {"keys", (PyCFunction)Wkeys}, {"values", (PyCFunction)Wvalues}, {"ident", (PyCFunction)Gidentity}, {"remap", (PyCFunction)Dremap}, {"restrict", (PyCFunction)kjWRestrict}, {"tclosure", (PyCFunction)Wtransclose}, {"Soil", (PyCFunction)WSoil}, {"Wash", (PyCFunction)WWash}, {NULL, NULL} /* sentinel */ }; /* getattr snarfed from mappingobject.c */ static PyObject * Wrapper_getattr(PyObject *mp, char *name) { return Py_FindMethod(Wrapper_methods, (PyObject *)mp, name); } /* methods for special behaviors as number and mapping */ /* undefined operations */ static PyObject * undefbin(PyObject *v, PyObject *w) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } static PyObject * undefter(PyObject *v, PyObject *w, PyObject *z) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } static PyObject * undefun(PyObject *v) { PyErr_SetString(PyExc_TypeError, "op not valid for table of this type"); return NULL; } /* transpose of non 1:1 dict will have nondeterministic results */ static PyObject *Wtranspose(TableWrapper *source) { TableWrapper *result; long size, test; Dprint(("Wtranspose\n")); if (source->rep.flag == SETFLAG) { PyErr_SetString(PyExc_TypeError, "Cannot transpose set"); return NULL; } /* conservative estimate of size (may save space, maybe not) */ size = source->rep.entries; size = size/2; result = (TableWrapper *) newWrapper(size, source->rep.flag); if (result == NULL) { return NULL; } /* error */ propagateDirt(source, result); test = Ttranspose( &(result->rep), &(source->rep) ); if (test!=0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } static PyObject *Wunion(TableWrapper *left, TableWrapper *right) { enum BucketFlag flag; TableWrapper *result; long size, test; Dprint(("Wunion\n")); /* Py_None unioned with anything returns Py_None (universal set) */ if (((PyObject *) left == Py_None) || ((PyObject *) right == Py_None)) { Py_INCREF(Py_None); return Py_None; } /* arbitrary size heuristic */ if (left->rep.entries > right->rep.entries) { size = left->rep.entries; } else { size = right->rep.entries; } size = size/2; /* conservative to save space (maybe) */ /* determine coercion if possible, default=more general */ test = FlagCoercion(left->rep.flag, right->rep.flag, &flag, 1); if (test != 1) { PyErr_SetString(PyExc_TypeError, "incompatible types for table union"); return NULL; } /* allocate a wrapper and augment it with both inputs */ result = (TableWrapper *) newWrapper(size, flag); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); test = Taugment( &(result->rep), &(left->rep) ); if (test == 0) { test = Taugment( &(result->rep), &(right->rep) ); } if (test!=0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } /* utility function for intersection and difference */ static PyObject * Wintdiff(TableWrapper *left, TableWrapper *right, long include, enum BucketFlag flagout) { TableWrapper *result; long count; /* determine the size needed */ Dprint(("Wintdiff\n")); count = Tintdiff(NULL, &(left->rep), &(right->rep), include, 1); if (count < 0) { return NULL; } /* error */ /* be conservative, for fun */ count = count / 2; /* allocate a wrapper of this size and initialize it */ result = (TableWrapper *) newWrapper(count, flagout); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); count = Tintdiff(&(result->rep), &(left->rep), &(right->rep), include, 0); if (count < 0) { Py_DECREF(result); return NULL; } return (PyObject *) result; } /* intersection */ static PyObject * Wintersect(TableWrapper *left, TableWrapper *right) { long test; enum BucketFlag flag, lflag, rflag; Dprint(("Wintersect\n")); /* Py_None intersected with anything returns copy of anything... */ if ((PyObject *)left == Py_None) { return Wunion(right, right); } if ((PyObject *)right == Py_None) { return Wunion(left, left); } /* determine flag: default to less general */ rflag = right->rep.flag; lflag = left->rep.flag; /* coerce to more general, unless one arg is a set, in which case coerce to set */ if ( (rflag != lflag) && ((rflag == SETFLAG)||(lflag == SETFLAG)) ) { PyErr_SetString(PyExc_TypeError, "mixed intersection not allowed with kjSet"); return NULL; } test = FlagCoercion(left->rep.flag, right->rep.flag, &flag, -1); if (test!=1) { PyErr_SetString(PyExc_TypeError, "unable to coerce for intersection"); return NULL; } /* iterate over the smaller argument */ if ((left->rep.entries) < (right->rep.entries)) { return Wintdiff(left, right, 1, flag); } else { return Wintdiff(right, left, 1, flag); } } /* difference */ static PyObject * Wdifference(TableWrapper *left, TableWrapper *right) { enum BucketFlag lflag, rflag; /* left cannot be Py_None */ Dprint(("Wdifference\n")); if ((PyObject *)left == Py_None) { PyErr_SetString(PyExc_TypeError, "cannot difference from Py_None"); return NULL; } /* if right is Py_None return empty */ if ((PyObject *)right == Py_None) { return (PyObject *) newWrapper(0, left->rep.flag); } rflag = right->rep.flag; lflag = left->rep.flag; /* diff default coerce to whatever left is, unless one arg is a set, in which case raise an error */ if ( (rflag != lflag) && ((rflag == SETFLAG)||(lflag == SETFLAG)) ) { PyErr_SetString(PyExc_TypeError, "mixed difference not allowed with kjSet"); return NULL; } return Wintdiff(left, right, 0, lflag); } /* composition of two tables */ static PyObject * Wcompose(TableWrapper *left, TableWrapper *right) { enum BucketFlag flag; TableWrapper *result; long test, count; Table *Ltable, *Rtable; Dprint(("Wcompose\n")); /* neither arg may be Py_None */ if (((PyObject *)left == Py_None) || ((PyObject *)right == Py_None)) { PyErr_SetString(PyExc_TypeError, "cannot compose Py_None"); return NULL; } Ltable = &(left->rep); Rtable = &(right->rep); /* find coercion, prefer more general */ test = FlagCoercion(Ltable->flag, Rtable->flag, &flag, 1); if (test!=1) { PyErr_SetString(PyExc_TypeError, "incompatible types for composition"); return NULL; } /* DON'T determine required table size, (not easily done correctly) */ count = 0; /* commented count = Tcompose(0, Ltable, Rtable, 0, 1); if (count<0) { return NULL; } count = count/2; */ /* allocate result */ result = (TableWrapper *) newWrapper(count, flag); if (result == NULL) { return NULL; } /* error */ propagateDirt( left, result ); propagateDirt( right, result ); count = Tcompose(&(result->rep), Ltable, Rtable, 0, 0); if (count < 0) { Py_DECREF(result); return NULL; /* error */ } return (PyObject *) result; } /* coercion: just check that pw is either Py_None, kjSet, kjGraph or kjDict all other logic is at the function level (Py_None == universal set) */ static long Wrapper_coerce(PyObject **pv, PyObject **pw) { PyObject *w; w = *pw; Dprint(("Wcoerce\n")); if ( (w == Py_None) || is_kjTable(w) ) { /* both w and *pv are "returned", hence must be increfed */ Py_INCREF(w); Py_INCREF(*pv); return 0; /* okay */ } return 1; /* Nope! */ } /* the number methods structure for all kjSets, kjDicts, kjGraphs */ static PyNumberMethods kjSet_as_number = { (binaryfunc)Wunion, /*nb_add*/ (binaryfunc)Wdifference, /*nb_subtract*/ (binaryfunc)Wcompose, /*nb_multiply*/ (binaryfunc)undefbin, /*nb_divide*/ (binaryfunc)undefbin, /*nb_remainder*/ (binaryfunc)undefbin, /*nb_divmod*/ (ternaryfunc)undefter, /*nb_power*/ (unaryfunc)undefun, /*nb_negative*/ (unaryfunc)undefun, /*nb_positive*/ (unaryfunc)undefun, /*nb_absolute*/ (inquiry)Wrapper_nonzero, /*nb_nonzero*/ (unaryfunc)Wtranspose, /*nb_invert*/ (binaryfunc)undefbin, /*nb_lshift*/ (binaryfunc)undefbin, /*nb_rshift*/ (binaryfunc)Wintersect, /*nb_and*/ (binaryfunc)undefbin, /*nb_xor*/ (binaryfunc)Wunion, /*nb_or*/ (coercion)Wrapper_coerce, /*nb_coerce*/ (unaryfunc)undefun, /*nb_int*/ (unaryfunc)undefun, /*nb_long*/ (unaryfunc)undefun, /*nb_float*/ (unaryfunc)undefun, /*nb_oct*/ (unaryfunc)undefun, /*nb_hex*/ }; static PyObject * kjSet_subscript(TableWrapper *Set, PyObject *key) { PyObject *mem, *map; long test; Dprint(("kjSet_subscript\n")); test = TableGet1(&(Set->rep), key, 0, NOHASH, NOFORCE, &mem, &map); if (test == -1) { return NULL; } return PyInt_FromLong((long) 1); } static long kjSet_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing) { PyObject *mem, *map; TableWrapper *S; Dprint(("kjSet_ass_subscript\n")); S = (TableWrapper *) Set; if (S->hashvalue != NOHASH) { Wset_hash_error(); return -1; } if (thing == NULL) { /* request to delete */ if (deleteFromTable(&(S->rep), key, 0) == 0) { return -1; } return 0; } else { /* should check for standard value of *thing = long 1 ? */ return TableGet1(&(S->rep), key, 0, NOHASH, FORCE, &mem, &map); } } static PyObject * kjDict_subscript(TableWrapper *Set, PyObject *key) { PyObject *mem, *map; long test; Dprint(("kjDict_subscript\n")); test = TableGet1(&(Set->rep), key, 0, NOHASH, NOFORCE, &mem, &map); if (test == -1) { return NULL; } Py_XINCREF(map); return map; } static long kjDict_ass_subscript(PyObject *Set, PyObject *key, PyObject *thing) { PyObject *mem, *map; TableWrapper *S; Dprint(("kjDict_ass_subscript\n")); S = (TableWrapper *) Set; if (S->hashvalue != NOHASH) { Wset_hash_error(); return -1; } if (thing == NULL) { /* request to delete */ if (deleteFromTable(&(S->rep), key, 0) == 0) { return -1; } return 0; } else { return TableGet1(&(S->rep), key, thing, NOHASH, FORCE, &mem, &map); } } static long Wrapper_length(TableWrapper *W) { Dprint(("Wrapper_length\n")); return W->rep.entries; } /* mapping methods for jkSets */ static PyMappingMethods kjSet_as_mapping = { (inquiry)Wrapper_length, /*mp_length*/ (binaryfunc)kjSet_subscript, /*mp_subscript*/ (objobjargproc)kjSet_ass_subscript, /*mp_ass_subscript*/ }; /* mapping methods for kjDicts AND kjGraphs */ static PyMappingMethods kjDict_as_mapping = { (inquiry)Wrapper_length, /*mp_length*/ (binaryfunc)kjDict_subscript, /*mp_subscript*/ (objobjargproc)kjDict_ass_subscript, /*mp_ass_subscript*/ }; /* THE TYPE OBJECT FOR SETS */ static PyTypeObject kjSettype = { PyObject_HEAD_INIT(NULL) 0, (char *) "kjSet", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)NULL, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)NULL, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)NULL, /*tp_as_sequence*/ (PyMappingMethods *)&kjSet_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ NULL, /*tp_call*/ }; /* THE TYPE OBJECT FOR DICTS */ static PyTypeObject kjDicttype = { PyObject_HEAD_INIT(NULL) 0, (char *) "kjDict", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)0, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)0, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)0, /*tp_as_sequence*/ (PyMappingMethods *)&kjDict_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ 0, /*tp_call*/ }; /* THE TYPE OBJECT FOR GRAPHSS */ static PyTypeObject kjGraphtype = { PyObject_HEAD_INIT(NULL) 0, (char *) "kjGraph", /*tp_name for printing */ (unsigned int) sizeof(TableWrapper), /*tp_basicsize */ (unsigned int)0, /*tp_itemsize */ (destructor)WrapperDeallocate, /*tp_dealloc*/ (printfunc)WrapperPrint, /*tp_print*/ (getattrfunc)Wrapper_getattr, /*tp_getattr*/ (setattrfunc)0, /*tp_setattr*/ (cmpfunc)Wcompare, /*tp_compare*/ (reprfunc)WrapperRepr, /*tp_repr*/ (PyNumberMethods *)&kjSet_as_number, /*tp_as_number*/ (PySequenceMethods *)0, /*tp_as_sequence*/ (PyMappingMethods *)&kjDict_as_mapping, /*tp_as_mapping*/ (hashfunc)Wrapper_hash, /*tp_hash*/ 0, /*tp_call*/ }; /* special method for adding to a "dumped index" C implementation of frequently used python code (by me) same as: def kjKeyPut(dict, dumper, index, psuedokey, nullbag): try: d = dict.dump(dumper) if d == Py_None: d = (Py_None,) pair = (psuedokey, dict) index[d] = pair return d except PyExc_KeyError: nullbag[psuedokey] = dict return Py_None but faster. Returns Py_None only on failure to index. Maps dump of Py_None to (Py_None,) to avoid ambiguity (may cause too many hits for retrieval on (Py_None,).) dict is kjDict or kjGraph dumper is tuple index is kjDict or kjGraph psuedokey is any hashable object (probably integer) nullbag is kjDict or kjGraph */ /* #ifndef PYTHON1DOT2 */ static PyObject * kjKeyPut(PyObject *self, PyObject *args) { long valid; TableWrapper *dict, *index, *nullbag; PyObject *dumper, *psuedokey, *d, *pair, *err_type /*, *err_value */; /* get and verify args */ if (args == NULL) { PyErr_SetString(PyExc_TypeError, "KeyPut requires 5 arguments"); return NULL; } if (!PyArg_Parse(args, "(OOOOO)", &dict, &dumper, &index, &psuedokey, &nullbag)) { PyErr_SetString(PyExc_TypeError, "KeyPut requires dict, dumper, index, psuedokey, nullbag"); return NULL; } if (!((is_kjDictobject(dict)) || (is_kjGraphobject(dict)))) { PyErr_SetString(PyExc_TypeError, "first arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!((is_kjDictobject(index)) || (is_kjGraphobject(index)))) { PyErr_SetString(PyExc_TypeError, "third arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!((is_kjDictobject(nullbag)) || (is_kjGraphobject(nullbag)))) { PyErr_SetString(PyExc_TypeError, "fifth arg of KeyPut must be kjDict or kjGraph"); return NULL; } if (!PyTuple_Check(dumper)) { PyErr_SetString(PyExc_TypeError, "second arg of KeyPut must be tuple"); return NULL; } /* initialize d */ d = kjDictDump(dict, dumper); if (d == NULL) { /* unable to dump */ /* check that error was a keyerror ??? */ /* err_get(&err_type, &err_value); */ err_type = PyErr_Occurred(); if (err_type != PyExc_KeyError) { /* some other error... abort */ /* PyErr_SetObject(err_type, err_value); */ return NULL; } /* in case of PyExc_KeyError, augment the Nullbag, return Py_None */ PyErr_Clear(); valid = kjDict_ass_subscript((PyObject *) nullbag, psuedokey, (PyObject *) dict); if (valid == -1) { return NULL; } Py_INCREF(Py_None); return Py_None; } /* if dump succeeded... */ /* initialize pair, Py_INCREF components */ pair = PyTuple_New(2); if (pair == NULL) { return NULL; } PyTuple_SetItem(pair, 0, psuedokey); Py_INCREF(psuedokey); PyTuple_SetItem(pair, 1, (PyObject *) dict); Py_INCREF(dict); /* remap Py_None to (Py_None,) if needed */ if (d == Py_None) { /* preserve extra reference to Py_None... */ d = PyTuple_New(1); PyTuple_SetItem(d, 0, Py_None); } /* set index[d] = pair, creates an extra ref to pair */ valid = kjDict_ass_subscript((PyObject *) index, d, pair); if (valid == -1) { Py_DECREF(pair); return NULL; } Py_DECREF(pair); /* dispose of extra ref to pair */ return d; } /* #endif */ /* THE "METHODS" FOR THIS MODULE */ /* These are the basic external interfaces for python to access this module. */ static struct PyMethodDef kjbuckets_methods[] = { {"kjSet", (PyCFunction)makekjSet}, {"kjDict", (PyCFunction)makekjDict}, {"kjGraph", (PyCFunction)makekjGraph}, {"kjUndump", (PyCFunction)kjUndumpToDict}, /* #ifndef PYTHON1DOT2 */ {"kjKeyPut", (PyCFunction)kjKeyPut}, /* #endif */ #ifdef KJBDEBUG {"debug", (PyCFunction)Wdebug}, #endif {NULL, NULL} /* sentinel */ }; void initkjbuckets(void) { kjSettype.ob_type = &PyType_Type; kjDicttype.ob_type = &PyType_Type; kjGraphtype.ob_type = &PyType_Type; Py_InitModule("kjbuckets", kjbuckets_methods); } /* end of kjbuckets module */ gadfly-1.0.0/kjbuckets/setup.py0100644000157700012320000000027307465430476015506 0ustar rjonestech#! /usr/local/bin/python -O from distutils.core import setup, Extension setup (name = "kjbuckets", version = "2.2", ext_modules = [Extension("kjbuckets", ["kjbucketsmodule.c"])]) gadfly-1.0.0/test/0040755000157700012320000000000007512763043012757 5ustar rjonestechgadfly-1.0.0/test/__init__.py0100644000157700012320000000416407466100704015067 0ustar rjonestech# # Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/) # This module is free software, and you may redistribute it and/or modify # under the same terms as Python, so long as this copyright message and # disclaimer are retained in their original form. # # IN NO EVENT SHALL BIZAR SOFTWARE PTY LTD BE LIABLE TO ANY PARTY FOR # DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING # OUT OF THE USE OF THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # BIZAR SOFTWARE PTY LTD SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # # $Id: __init__.py,v 1.3 2002/05/08 00:49:01 anthonybaxter Exp $ import os, tempfile, unittest, shutil os.environ['SENDMAILDEBUG'] = tempfile.mktemp() # figure all the modules available dir = os.path.split(__file__)[0] test_mods = {} for file in os.listdir(dir): if file.startswith('test_') and file.endswith('.py'): name = file[5:-3] test_mods[name] = __import__(file[:-3], globals(), locals(), []) all_tests = test_mods.keys() def go(tests=all_tests): l = [] for name in tests: l.append(test_mods[name].suite()) suite = unittest.TestSuite(l) runner = unittest.TextTestRunner() runner.run(suite) # # $Log: __init__.py,v $ # Revision 1.3 2002/05/08 00:49:01 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # Revision 1.1.1.1 2002/05/06 07:31:09 richard # gadfly-1.0.0/test/gfstest.py0100644000157700012320000001461207467206053015013 0ustar rjonestech """test script for gadfly client and server Usage: This script interacts with the test database generated by gftest.py. To start the server from the directory containing the dbtest directory use: python gfstest.py start THIS WILL ONLY WORK IF YOU CREATED THE test DATABASE IN DIRECTORY dbtest FIRST USING python gftest.py dbtest UNLESS YOU RUN THE SERVER IN THE BACKGROUND THE SERVER WILL HOG THE WINDOW YOU STARTED IT IN AND YOU WILL HAVE TO USE ANOTHER WINDOW UNTIL THE SERVER IS SHUT DOWN (SEE BELOW). Then from *anywhere* (on the same machine) access the database using python gfstest.py restart - restart the server (reread the database) python gfstest.py checkpoint - force checkpoint the server python gfstest.py queries - run some example queries and updates python gfstest.py policy_test - test the policies test and test1 created by the startup function in this module. python gfstest.py bogusshutdown - attempt to shut down the server with a bogus password [should generate an exception] ...and finally python gfstest.py shutdown - shut down the server for real. As mentioned the startup function of this module illustrates how to create a "startup" function for a server and initialize policy objects with named, prepared queries. """ PORT = 2222 DB = "test" DBDIR = "_test_dir" PW = "admin" STARTUP = "gfstest" import sys, socket from gadfly.server import Server, Policy from gadfly.client import gfclient from gadfly.scripts.client import dosimple def main(): argv = sys.argv command = argv[1] machine = 'localhost' #socket.gethostname() port = PORT if command=="start": print "attempting to start the server" print "making a server on", PORT, DB, DBDIR, PW, STARTUP S = Server(PORT, DB, DBDIR, PW, STARTUP) print "initializing the server" S.init() print "starting the server", S.connection S.start() elif command=="shutdown": dosimple("shutdown", PW, machine, port) elif command=="bogusshutdown": print "BOGUS shutdown attempt" dosimple("shutdown", "bad password", machine, port) elif command=="restart": dosimple("restart", PW, machine, port) elif command=="checkpoint": dosimple("checkpoint", PW, machine, port) elif command=="queries": doqueries(machine, port) elif command=="policy_test": policy_test(machine, port) else: print "unknown command", command print __doc__ def policy_test(machine, port): """test the test1 and test policies""" print "testing non-admin policies test and test1" conn = gfclient("test", "test", machine, port) cursor = conn.cursor() print "testing test policy: nan values before:" cursor.execute_prepared("getnan") for x in cursor.fetchall(): print x print "updating nan" cursor.execute_prepared("updatenan", ("pabst", 4)) print "nan after" cursor.execute_prepared("getnan") for x in cursor.fetchall(): print x print "updating nan again" cursor.execute_prepared("updatenan", ("rollingrock", 1)) print "trying an illegal update" try: cursor.execute("delete from frequents") except: print "exception", sys.exc_type, sys.exc_value print "as expected" else: raise "DAMN!", "illegal query apparently completed!!!" print; print "testing policy test1"; print conn = gfclient("test1", "test1", machine, port) cursor = conn.cursor() print "getting norm" cursor.execute_prepared("qlike", ("norm",)) print cursor.description for x in cursor.fetchall(): print x print "trying an illegal query again" try: cursor.execute("create table test(name varchar)") except: print "exception", sys.exc_type, sys.exc_value print "as expected" else: raise "Damn!(2)", "illegal query apparently completed" def startup(admin_policy, connection, Server_instance): """example startup script. add a policies test and test1 passwords same test1 is allowed to query the frequents table by name test is allowed to update likes where drinker='nan' also add prepared query dumpwork to admin_policy. """ admin_policy["dumpwork"] = "select * from work" test1 = Policy("test1", "test1", connection, queries=0) test = Policy("test", "test", connection, queries=0) test1["qlike"] = "select * from likes where drinker=?" test["updatenan"] = """ update likes set beer=?, perday=? where drinker='nan' """ test["getnan"] = """ select * from likes where drinker='nan' """ return {"test": test, "test1": test1} def doqueries(machine, port): print "executing queries and updates" conn = gfclient("admin", PW, machine, port) cursor = conn.cursor() for q in admin_queries: print;print q try: cursor.execute(q) except: print 'exception in execute' import traceback; traceback.print_exc() else: #print "executed" #print q #print "description" print cursor.description #print "results" try: r = cursor.fetchall() #if r is None: #print "no results" #else: #for x in r: #print x except: print "exception in results" import traceback; traceback.print_exc() # try dumpwork print; print; print "dumpwork"; print cursor.execute_prepared("dumpwork") for x in cursor.fetchall(): print x # try dynamic parameters stat = """ select distinct drinker from likes l, serves s where l.beer = s.beer and s.bar=? """ print; print stat; print "dynamic query ?=cheers" cursor.execute(stat, ("cheers",)) for x in cursor.fetchall(): print x admin_queries = [ """select count(*) from work""", """select * from frequents""", """select count(*) from frequents""", """select count(drinker) from frequents""", """insert into frequents(drinker, bar, perweek) values ('sally', 'cheers', 2)""", """select * from frequents""", """select syntax error from work""", """select drinker, count(bar) from frequents group by drinker""", ] if __name__=="__main__": main() gadfly-1.0.0/test/test_gadfly.py0100644000157700012320000012465007471602720015641 0ustar rjonestech# $Id: test_gadfly.py,v 1.8 2002/05/19 01:54:24 richard Exp $ import unittest, os, shutil, time, sys from gadfly import gadfly from gadfly.store import StorageError class harness(unittest.TestCase): def setUp(self): self.connect = gadfly() if os.path.exists('_test_dir'): shutil.rmtree('_test_dir') os.makedirs('_test_dir') self.connect.startup("test", '_test_dir') self.curs = self.connect.cursor() table_creates = ( "frequents (drinker varchar, bar varchar, perweek integer)", "likes (drinker varchar, beer varchar, perday integer)", "serves (bar varchar, beer varchar, quantity integer)", "work (name varchar, hours integer, rate float)", "empty (nothing varchar)", "accesses (page varchar, hits integer, month integer)", ) for x in table_creates: self.curs.execute('create table '+x) self.curs.execute("""Create view nondrinkers(d, b) as select drinker, bar from frequents where drinker not in (select drinker from likes)""") # inserts C = "insert into work (name, hours, rate) values (?, ?, ?)" D = [("sam", 30, 40.2), ("norm", 45, 10.2), ("woody", 80, 5.4), ("diane", 3, 4.4), ("rebecca", 120, 12.9), ("cliff", 26, 200.00), ("carla", 9, 3.5), ] self.curs.execute(C, D) self.curs.execute('select name, hours, rate from work order by name') l = self.curs.fetchall() D.sort() self.assertEquals(l, D) fdata = [ ('adam', 'lolas', 1), ('woody', 'cheers', 5), ('sam', 'cheers', 5), ('norm', 'cheers', 3), ('wilt', 'joes', 2), ('norm', 'joes', 1), ('lola', 'lolas', 6), ('norm', 'lolas', 2), ('woody', 'lolas', 1), ('pierre', 'frankies', 0), ] sdata = [ ('cheers', 'bud', 500), ('cheers', 'samaddams', 255), ('joes', 'bud', 217), ('joes', 'samaddams', 13), ('joes', 'mickies', 2222), ('lolas', 'mickies', 1515), ('lolas', 'pabst', 333), ('winkos', 'rollingrock', 432), ('frankies', 'snafu', 5), ] ldata = [ ('adam', 'bud', 2), ('wilt', 'rollingrock', 1), ('sam', 'bud', 2), ('norm', 'rollingrock', 3), ('norm', 'bud', 2), ('nan', 'sierranevada', 1), ('woody', 'pabst', 2), ('lola', 'mickies', 5), ] dpairs = [("frequents", fdata), ("serves", sdata), ("likes", ldata) ] for table, data in dpairs: ins = "insert into %s values (?, ?, ?)"%table if table!="frequents": for parameters in data: self.curs.execute(ins, parameters) else: self.curs.execute(ins, data) # indexes indices = [ "create index fd on frequents (drinker)", "create index sbb on serves (beer, bar)", "create index lb on likes (beer)", "create index fb on frequents (bar)", ] for ci in indices: self.curs.execute(ci) self.connect.commit() def runQueries(self, queries): for q, p in queries: self.curs.execute(q) self.assertEqual(self.curs.pp(), p) def tearDown(self): self.connect.close() if os.path.exists('_test_dir'): shutil.rmtree('_test_dir') class test_Gadfly(harness): def testIndex(self): # test unique index C = "create unique index wname on work(name)" self.curs.execute(C) C = "insert into work(name, hours, rate) values ('sam', 0, 0)" self.assertRaises(StorageError, self.curs.execute, C) def testIntrospection(self): # introspection itests = ["select 10*4 from dual", "select * from __table_names__", "select * from __datadefs__", "select * from __indices__", "select * from __columns__", "select * from __indexcols__", """ select i.index_name, is_unique, table_name, column_name from __indexcols__ c, __indices__ i where c.index_name = i.index_name""", ] # TODO: compare results for C in itests: self.curs.execute(C) def testComplexLiterals(self): # testing complex, neg literals in insert self.curs.execute('''insert into work(name, hours, rate) values ('jo', -1, 3.1e-44-1e26j)''') self.curs.execute("select name,hours,rate from work where name='jo'") self.assertEquals(self.curs.fetchall(), [('jo', -1, (3.1e-44-1e+26j))]) self.curs.execute("delete from work where name='jo'") def testParameterisedInsert(self): # parameterised inserts C = "insert into accesses(page, month, hits) values (?, ?, ?)" D = [ ("index.html", 1, 2100), ("index.html", 2, 3300), ("index.html", 3, 1950), ("products.html", 1, 15), ("products.html", 2, 650), ("products.html", 3, 98), ("people.html", 1, 439), ("people.html", 2, 12), ("people.html", 3, 665), ] self.curs.execute(C, D) self.curs.execute("""select sum(hits) from accesses where page='people.html'""") self.assertEquals(self.curs.fetchall(), [(439+12+665,)]) self.runQueries([ ("""select month, sum(hits) as totalhits from accesses where month<>1 group by month order by 2""", 'MONTH | TOTALHITS\n=================\n3 | 2713 \n2 | 3962 '), ("""select month, sum(hits) as totalhits from accesses group by month order by 2 desc""", 'MONTH | TOTALHITS\n=================\n2 | 3962 \n3 | 2713 \n1 | 2554 '), ("""select month, sum(hits) as totalhits from accesses group by month having sum(hits)<3000 order by 2 desc""", 'MONTH | TOTALHITS\n=================\n3 | 2713 \n1 | 2554 '), ("select count(distinct month), count(distinct page) from accesses", 'Count(distinct ACCESSES.MONTH) | Count(distinct ACCESSES.PAGE)\n==============================================================\n3 | 3 '), ("select month, hits, page from accesses order by month, hits desc", 'MONTH | HITS | PAGE \n============================\n1 | 2100 | index.html \n1 | 439 | people.html \n1 | 15 | products.html\n2 | 3300 | index.html \n2 | 650 | products.html\n2 | 12 | people.html \n3 | 1950 | index.html \n3 | 665 | people.html \n3 | 98 | products.html'), ]) def testTrivialQueries1(self): self.runQueries([ ("select name, hours from work", 'NAME | HOURS\n===============\nsam | 30 \nnorm | 45 \nwoody | 80 \ndiane | 3 \nrebecca | 120 \ncliff | 26 \ncarla | 9 '), ]) def testTrivialQueries2(self): self.runQueries([ ("select B,D from nondrinkers", 'B | D \n=================\nfrankies | pierre'), ]) def testTrivialQueries3(self): self.runQueries([ ("""select QUANTITY,BAR,BEER from serves""", 'QUANTITY | BAR | BEER \n=================================\n500 | cheers | bud \n255 | cheers | samaddams \n217 | joes | bud \n13 | joes | samaddams \n2222 | joes | mickies \n1515 | lolas | mickies \n333 | lolas | pabst \n432 | winkos | rollingrock\n5 | frankies | snafu '), ]) def testTrivialQueries4(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker = 'norm'""", 'BAR | PERWEEK | DRINKER\n==========================\ncheers | 3 | norm \njoes | 1 | norm \nlolas | 2 | norm '), ]) def testMedian(self): self.runQueries([ ("select median(hours) from work", 'Median(WORK.HOURS)\n==================\n30 ') ]) def testComments(self): self.runQueries([ ("select name,rate,hours from work where name='carla' -- just carla", 'NAME | RATE | HOURS\n====================\ncarla | 3.5 | 9 '), ("""select name, ' ain''t worth ', rate from work -- has more columns where name='carla'""", "NAME | ain't worth | RATE\n============================\ncarla | ain't worth | 3.5 "), ("""select name, -- name of worker hours -- hours worked from work""", 'NAME | HOURS\n===============\nsam | 30 \nnorm | 45 \nwoody | 80 \ndiane | 3 \nrebecca | 120 \ncliff | 26 \ncarla | 9 '), ]) def testSimpleRange(self): self.runQueries([ ("select name, rate from work where rate>=20 and rate<=100", 'NAME | RATE\n===========\nsam | 40.2'), ("select name, rate from work where rate between 20 and 100", 'NAME | RATE\n===========\nsam | 40.2'), ("select name, rate from work where rate not between 20 and 100", 'NAME | RATE \n===============\nnorm | 10.2 \nwoody | 5.4 \ndiane | 4.4 \nrebecca | 12.9 \ncliff | 200.0\ncarla | 3.5 '), ]) def testBetween(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek not between 2 and 5""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \njoes | 1 | norm \nlolas | 6 | lola \nlolas | 1 | woody \nfrankies | 0 | pierre '), ]) def testIn(self): self.runQueries([ ("""select drinker,beer,perday from likes where beer in ('bud', 'pabst')""", 'DRINKER | BEER | PERDAY\n========================\nadam | bud | 2 \nsam | bud | 2 \nnorm | bud | 2 \nwoody | pabst | 2 '), ]) def testNotIn(self): result = 'BEER \n=========\nsamaddams\nsamaddams\nsnafu ' self.runQueries([ ("""select beer from serves where beer not in (select beer from likes)""", result) ]) def testSimpleCalculations1(self): self.runQueries([ ("select name, hours*rate as pay from work order by name", 'NAME | PAY \n================\ncarla | 31.5 \ncliff | 5200.0\ndiane | 13.2 \nnorm | 459.0 \nrebecca | 1548.0\nsam | 1206.0\nwoody | 432.0 '), ]) def testSimpleCalculations2(self): self.runQueries([ ("select name, rate, hours, hours*rate as pay from work", 'NAME | RATE | HOURS | PAY \n================================\nsam | 40.2 | 30 | 1206.0\nnorm | 10.2 | 45 | 459.0 \nwoody | 5.4 | 80 | 432.0 \ndiane | 4.4 | 3 | 13.2 \nrebecca | 12.9 | 120 | 1548.0\ncliff | 200.0 | 26 | 5200.0\ncarla | 3.5 | 9 | 31.5 '), ]) def testSimpleCalculations3(self): self.runQueries([ ("""select name, rate, hours, hours*rate as pay from work where hours*rate>500 and (rate<100 or hours>5)""", 'NAME | RATE | HOURS | PAY \n================================\nsam | 40.2 | 30 | 1206.0\nrebecca | 12.9 | 120 | 1548.0\ncliff | 200.0 | 26 | 5200.0'), ]) def testSimpleCalculations4(self): self.runQueries([ ("""select name, rate, hours, hours*rate as pay from work where hours*rate>500 and rate<100 or hours>5""", 'NAME | RATE | HOURS | PAY \n================================\nsam | 40.2 | 30 | 1206.0\nnorm | 10.2 | 45 | 459.0 \nwoody | 5.4 | 80 | 432.0 \nrebecca | 12.9 | 120 | 1548.0\ncliff | 200.0 | 26 | 5200.0\ncarla | 3.5 | 9 | 31.5 '), ]) def testSimpleCalculations5(self): self.runQueries([ ("""select avg(rate), min(hours), max(hours), sum(hours*rate) as expenses from work""", 'Average(WORK.RATE) | Minimum(WORK.HOURS) | Maximum(WORK.HOURS) | EXPENSES\n=========================================================================\n39.5142857143 | 3 | 120 | 8889.7 '), ]) def testUnion1(self): self.runQueries([ ("""select drinker as x from likes union select beer as x from serves union select drinker as x from frequents""", 'X \n===========\nadam \nwoody \nsam \nnorm \nwilt \nnorm \nlola \nnorm \nwoody \npierre \nbud \nsamaddams \nbud \nsamaddams \nmickies \nmickies \npabst \nrollingrock\nsnafu \nadam \nwilt \nsam \nnorm \nnorm \nnan \nwoody \nlola '), ]) def testUnion2(self): self.runQueries([ ("""select drinker from likes union select drinker from frequents""", 'DRINKER\n=======\nadam \nwoody \nsam \nnorm \nwilt \nnorm \nlola \nnorm \nwoody \npierre \nadam \nwilt \nsam \nnorm \nnorm \nnan \nwoody \nlola '), ]) def testUnionDistinct(self): self.runQueries([ ("""select drinker from likes union distinct select drinker from frequents order by drinker""", 'DRINKER\n=======\nadam \nlola \nnan \nnorm \npierre \nsam \nwilt \nwoody '), ]) def testJoin1(self): self.runQueries([ ("""select f.drinker, s.bar, l.beer from frequents f, serves s, likes l where f.drinker=l.drinker and s.beer=l.beer and s.bar=f.bar""", 'DRINKER | BAR | BEER \n==========================\nsam | cheers | bud \nnorm | cheers | bud \nnorm | joes | bud \nlola | lolas | mickies\nwoody | lolas | pabst '), ]) def testJoin2(self): self.runQueries([ ("""select QUANTITY,BEER,PERWEEK,DRINKER,S.BAR,F.BAR from frequents as f, serves as s where f.bar = s.bar order by QUANTITY,BEER,PERWEEK,DRINKER,S.BAR,F.BAR""", 'QUANTITY | BEER | PERWEEK | DRINKER | BAR | F.BAR \n==============================================================\n5 | snafu | 0 | pierre | frankies | frankies\n13 | samaddams | 1 | norm | joes | joes \n13 | samaddams | 2 | wilt | joes | joes \n217 | bud | 1 | norm | joes | joes \n217 | bud | 2 | wilt | joes | joes \n255 | samaddams | 3 | norm | cheers | cheers \n255 | samaddams | 5 | sam | cheers | cheers \n255 | samaddams | 5 | woody | cheers | cheers \n333 | pabst | 1 | adam | lolas | lolas \n333 | pabst | 1 | woody | lolas | lolas \n333 | pabst | 2 | norm | lolas | lolas \n333 | pabst | 6 | lola | lolas | lolas \n500 | bud | 3 | norm | cheers | cheers \n500 | bud | 5 | sam | cheers | cheers \n500 | bud | 5 | woody | cheers | cheers \n1515 | mickies | 1 | adam | lolas | lolas \n1515 | mickies | 1 | woody | lolas | lolas \n1515 | mickies | 2 | norm | lolas | lolas \n1515 | mickies | 6 | lola | lolas | lolas \n2222 | mickies | 1 | norm | joes | joes \n2222 | mickies | 2 | wilt | joes | joes ') ]) def testJoin3(self): self.runQueries([ ("""select PERDAY,BAR,PERWEEK,BEER,F.DRINKER,L.DRINKER from likes l, frequents f where f.bar='cheers' and l.drinker=f.drinker and l.beer='bud' order by PERDAY,BAR,PERWEEK,BEER,F.DRINKER,L.DRINKER""", 'PERDAY | BAR | PERWEEK | BEER | DRINKER | L.DRINKER\n======================================================\n2 | cheers | 3 | bud | norm | norm \n2 | cheers | 5 | bud | sam | sam '), ]) def testComplex1(self): self.runQueries([ ("""select l.beer, l.drinker, count(distinct s.bar) from likes l, serves s where l.beer=s.beer group by l.beer, l.drinker order by 3 desc, l.beer, l.drinker""", 'BEER | DRINKER | Count(distinct S.BAR)\n=============================================\nbud | adam | 2 \nbud | norm | 2 \nbud | sam | 2 \nmickies | lola | 2 \npabst | woody | 1 \nrollingrock | norm | 1 \nrollingrock | wilt | 1 '), ]) def testComplex2(self): self.runQueries([ ("""select l.beer, l.drinker, count(distinct s.bar) as nbars from likes l, serves s where l.beer=s.beer group by l.beer, l.drinker union distinct select beer, drinker, 0 as nbars from likes where beer not in (select beer from serves) order by 3 desc, l.beer, l.drinker""", 'BEER | DRINKER | NBARS\n==============================\nbud | adam | 2 \nbud | norm | 2 \nbud | sam | 2 \nmickies | lola | 2 \npabst | woody | 1 \nrollingrock | norm | 1 \nrollingrock | wilt | 1 \nsierranevada | nan | 0 ' ), ]) def testAverage(self): self.runQueries([ ("""select avg(perweek) from frequents""", 'Average(FREQUENTS.PERWEEK)\n==========================\n2.6 '), ]) def testAverageSubQuery1(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek <= (select avg(perweek) from frequents)""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \njoes | 2 | wilt \njoes | 1 | norm \nlolas | 2 | norm \nlolas | 1 | woody \nfrankies | 0 | pierre ' ), ]) def testAverageSubQuery2(self): self.runQueries([ ("""select QUANTITY,BAR,BEER from serves s1 where quantity <= (select avg(quantity) from serves s2 where s1.bar=s2.bar)""", 'QUANTITY | BAR | BEER \n=================================\n255 | cheers | samaddams \n217 | joes | bud \n13 | joes | samaddams \n333 | lolas | pabst \n432 | winkos | rollingrock\n5 | frankies | snafu '), ]) def testAverageSubQuery3(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek > (select avg(perweek) from frequents)""", 'BAR | PERWEEK | DRINKER\n==========================\ncheers | 5 | woody \ncheers | 5 | sam \ncheers | 3 | norm \nlolas | 6 | lola '), ]) def testAverageSubQuery4(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents f1 where perweek > ( select avg(perweek) from frequents f2 where f1.drinker = f2.drinker)""", 'BAR | PERWEEK | DRINKER\n==========================\ncheers | 5 | woody \ncheers | 3 | norm '), ]) def testAverageSubQuery5(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek between 2 and (select avg(perweek) from frequents)""", 'BAR | PERWEEK | DRINKER\n=========================\njoes | 2 | wilt \nlolas | 2 | norm '), ]) def testAverageGroup(self): self.runQueries([ ("""select bar, avg(quantity) from serves group by bar order by bar""", 'BAR | Average(SERVES.QUANTITY)\n===================================\ncheers | 377.5 \nfrankies | 5.0 \njoes | 817.333333333 \nlolas | 924.0 \nwinkos | 432.0 '), ]) def testAny1(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek < any (select perweek from frequents)""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \ncheers | 5 | woody \ncheers | 5 | sam \ncheers | 3 | norm \njoes | 2 | wilt \njoes | 1 | norm \nlolas | 2 | norm \nlolas | 1 | woody \nfrankies | 0 | pierre '), ]) def testAny2(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents f1 where perweek < any (select perweek from frequents f2 where f1.drinker = f2.drinker)""", 'BAR | PERWEEK | DRINKER\n=========================\njoes | 1 | norm \nlolas | 2 | norm \nlolas | 1 | woody '), ]) def testAny3(self): result = 'BEER \n===========\nbud \nbud \nmickies \nmickies \npabst \nrollingrock' self.runQueries([ ("""select beer from serves where beer = any (select beer from likes)""", result)]) self.runQueries([ ("""select beer from serves where beer in (select beer from likes)""", result)]) def testAll1(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek >= all (select perweek from frequents)""", 'BAR | PERWEEK | DRINKER\n=========================\nlolas | 6 | lola '), ]) def testAll2(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where perweek <= all (select perweek from frequents)""", 'BAR | PERWEEK | DRINKER\n============================\nfrankies | 0 | pierre '), ]) def testAll3(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents f1 where perweek = all (select perweek from frequents f2 where f1.drinker = f2.drinker)""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \ncheers | 5 | sam \njoes | 2 | wilt \nlolas | 6 | lola \nfrankies | 0 | pierre '), ]) def testAll4(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents f1 where perweek <> all (select perweek from frequents f2 where f1.drinker <> f2.drinker)""", 'BAR | PERWEEK | DRINKER\n============================\ncheers | 3 | norm \nlolas | 6 | lola \nfrankies | 0 | pierre '), ]) def testAll5(self): self.runQueries([ ("""select beer from serves where beer <> all (select beer from likes)""", 'BEER \n=========\nsamaddams\nsamaddams\nsnafu '), ]) def testExcept(self): self.runQueries([ ("""select drinker from likes except select drinker from frequents""", 'DRINKER\n=======\nnan '), ]) def testIntersect(self): self.runQueries([ ("""select drinker from likes intersect select drinker from frequents order by drinker""", 'DRINKER\n=======\nadam \nlola \nnorm \nsam \nwilt \nwoody '), ]) def testStringComparison1(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker>'norm'""", 'BAR | PERWEEK | DRINKER\n============================\ncheers | 5 | woody \ncheers | 5 | sam \njoes | 2 | wilt \nlolas | 1 | woody \nfrankies | 0 | pierre '), ]) def testStringComparison2(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker<='norm'""", 'BAR | PERWEEK | DRINKER\n==========================\nlolas | 1 | adam \ncheers | 3 | norm \njoes | 1 | norm \nlolas | 6 | lola \nlolas | 2 | norm '), ]) def testStringComparison3(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker>'norm' or drinker<'b'""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \ncheers | 5 | woody \ncheers | 5 | sam \njoes | 2 | wilt \nlolas | 1 | woody \nfrankies | 0 | pierre '), ]) def testStringComparison4(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker<>'norm' and 'pierre'<>drinker""", 'BAR | PERWEEK | DRINKER\n==========================\nlolas | 1 | adam \ncheers | 5 | woody \ncheers | 5 | sam \njoes | 2 | wilt \nlolas | 6 | lola \nlolas | 1 | woody '), ]) def testStringComparison5(self): self.runQueries([ ("""select BAR,PERWEEK,DRINKER from frequents where drinker<>'norm'""", 'BAR | PERWEEK | DRINKER\n============================\nlolas | 1 | adam \ncheers | 5 | woody \ncheers | 5 | sam \njoes | 2 | wilt \nlolas | 6 | lola \nlolas | 1 | woody \nfrankies | 0 | pierre '), ]) def testStringComparison6(self): self.runQueries([ ("""select (drinker+' ')*2+bar from frequents where drinker>bar""", '(((FREQUENTS.DRINKER)+( ))*(2))+(FREQUENTS.BAR)\n===============================================\nwoody woody cheers \nsam sam cheers \nnorm norm cheers \nwilt wilt joes \nnorm norm joes \nnorm norm lolas \nwoody woody lolas \npierre pierre frankies '), ]) def testExists1(self): self.runQueries([ ("""select QUANTITY,BEER,PERWEEK,DRINKER,S.BAR,F.BAR from frequents as f, serves as s where f.bar = s.bar and not exists( select l.drinker, l.beer from likes l where l.drinker=f.drinker and s.beer=l.beer) order by QUANTITY,BEER,PERWEEK,DRINKER,S.BAR,F.BAR""", 'QUANTITY | BEER | PERWEEK | DRINKER | BAR | F.BAR \n==============================================================\n5 | snafu | 0 | pierre | frankies | frankies\n13 | samaddams | 1 | norm | joes | joes \n13 | samaddams | 2 | wilt | joes | joes \n217 | bud | 2 | wilt | joes | joes \n255 | samaddams | 3 | norm | cheers | cheers \n255 | samaddams | 5 | sam | cheers | cheers \n255 | samaddams | 5 | woody | cheers | cheers \n333 | pabst | 1 | adam | lolas | lolas \n333 | pabst | 2 | norm | lolas | lolas \n333 | pabst | 6 | lola | lolas | lolas \n500 | bud | 5 | woody | cheers | cheers \n1515 | mickies | 1 | adam | lolas | lolas \n1515 | mickies | 1 | woody | lolas | lolas \n1515 | mickies | 2 | norm | lolas | lolas \n2222 | mickies | 1 | norm | joes | joes \n2222 | mickies | 2 | wilt | joes | joes '), ]) def testExists2(self): self.runQueries([ ("""select QUANTITY,BAR,BEER from serves s where not exists ( select * from likes l, frequents f where f.bar = s.bar and f.drinker=l.drinker and s.beer=l.beer)""", 'QUANTITY | BAR | BEER \n=================================\n255 | cheers | samaddams \n13 | joes | samaddams \n2222 | joes | mickies \n432 | winkos | rollingrock\n5 | frankies | snafu '), ]) def testExists3(self): self.runQueries([ ("""select 'nonbeer drinker '+f.drinker from frequents f where not exists (select l.drinker, l.beer from likes l where l.drinker=f.drinker)""", '(nonbeer drinker )+(F.DRINKER)\n==============================\nnonbeer drinker pierre '), ]) def testExists4(self): self.runQueries([ ("""select l.drinker+' likes '+l.beer+' but goes to no bar' from likes l where not exists (select f.drinker from frequents f where f.drinker=l.drinker)""", '(((L.DRINKER)+( likes ))+(L.BEER))+( but goes to no bar)\n========================================================\nnan likes sierranevada but goes to no bar '), ]) def testDistinct(self): self.runQueries([ ("""select distinct bar from frequents order by bar""", 'BAR \n========\ncheers \nfrankies\njoes \nlolas '), ]) def Aggregations1(self): self.runQueries([ ("""select sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity) from serves""", 'Sum(SERVES.QUANTITY) | Average(SERVES.QUANTITY) | Count(*) | (Sum(SERVES.QUANTITY))/(Count(SERVES.QUANTITY))\n============================================================================================================\n5492 | 610.222222222 | 9 | 610 '), ]) def Aggregations2(self): self.runQueries([ ("""select beer, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity) from serves group by beer""", 'BEER | Sum(SERVES.QUANTITY) | Average(SERVES.QUANTITY) | Count(*) | (Sum(SERVES.QUANTITY))/(Count(SERVES.QUANTITY))\n==========================================================================================================================\npabst | 333 | 333.0 | 1 | 333 \nmickies | 3737 | 1868.5 | 2 | 1868 \nbud | 717 | 358.5 | 2 | 358 \nsnafu | 5 | 5.0 | 1 | 5 \nrollingrock | 432 | 432.0 | 1 | 432 \nsamaddams | 268 | 134.0 | 2 | 134 '), ]) def Aggregations3(self): self.runQueries([ ("""select sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity) from serves where beer<>'bud'""", 'Sum(SERVES.QUANTITY) | Average(SERVES.QUANTITY) | Count(*) | (Sum(SERVES.QUANTITY))/(Count(SERVES.QUANTITY))\n============================================================================================================\n4775 | 682.142857143 | 7 | 682 '), ]) def Aggregations4(self): self.runQueries([ ("""select bar, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity) from serves where beer<>'bud' group by bar having sum(quantity)>500 or count(*)>3 order by 2 desc""", 'BAR | Sum(SERVES.QUANTITY) | Average(SERVES.QUANTITY) | Count(*) | (Sum(SERVES.QUANTITY))/(Count(SERVES.QUANTITY))\n====================================================================================================================\njoes | 2235 | 1117.5 | 2 | 1117 \nlolas | 1848 | 924.0 | 2 | 924 '), ]) def Aggregations5(self): self.runQueries([ ("""select beer, sum(quantity), avg(quantity), count(*) from serves where beer<>'bud' group by beer having sum(quantity)>100 order by 4 desc, beer""", 'BEER | Sum(SERVES.QUANTITY) | Average(SERVES.QUANTITY) | Count(*)\n========================================================================\nmickies | 3737 | 1868.5 | 2 \nsamaddams | 268 | 134.0 | 2 \npabst | 333 | 333.0 | 1 \nrollingrock | 432 | 432.0 | 1 '), ]) def Aggregations6(self): self.runQueries([ ("""select l.drinker, l.beer, count(*), sum(l.perday*f.perweek) from likes l, frequents f where l.drinker=f.drinker group by l.drinker, l.beer order by 4 desc, l.drinker, l.beer""", 'DRINKER | BEER | Count(*) | Sum((L.PERDAY)*(F.PERWEEK))\n==============================================================\nlola | mickies | 1 | 30 \nnorm | rollingrock | 3 | 18 \nnorm | bud | 3 | 12 \nwoody | pabst | 2 | 12 \nsam | bud | 1 | 10 \nadam | bud | 1 | 2 \nwilt | rollingrock | 1 | 2 '), ]) def Aggregations7(self): self.runQueries([ ("""select l.drinker, l.beer, f.bar, l.perday, f.perweek from likes l, frequents f where l.drinker=f.drinker order by l.drinker, l.perday desc, f.perweek desc""", 'DRINKER | BEER | BAR | PERDAY | PERWEEK\n=================================================\nadam | bud | lolas | 2 | 1 \nlola | mickies | lolas | 5 | 6 \nnorm | rollingrock | cheers | 3 | 3 \nnorm | rollingrock | lolas | 3 | 2 \nnorm | rollingrock | joes | 3 | 1 \nnorm | bud | cheers | 2 | 3 \nnorm | bud | lolas | 2 | 2 \nnorm | bud | joes | 2 | 1 \nsam | bud | cheers | 2 | 5 \nwilt | rollingrock | joes | 1 | 2 \nwoody | pabst | cheers | 2 | 5 \nwoody | pabst | lolas | 2 | 1 '), ]) def testDynamicQueries(self): # DYNAMIC QUERIES dynamic_queries = [ ( "select bar from frequents where drinker=?", ("norm",) ), ( "select * from frequents where drinker=? or bar=?", ("norm", "cheers") ) ] for (x,y) in dynamic_queries: self.curs.execute(x, y) all = self.curs.fetchall() def testRepeatQueries(self): # "repeat test" repeats = [ """-- drinkers bars and beers -- where the drinker likes the beer -- the bar serves the beer -- and the drinker frequents the bar select f.drinker, l.beer, s.bar from frequents f, serves s, likes l where f.drinker=l.drinker and s.bar=f.bar and s.beer=l.beer""", """select * from frequents as f, serves as s where f.bar = s.bar and not exists( select l.drinker, l.beer from likes l where l.drinker=f.drinker and s.beer=l.beer)""", """select * from frequents where drinker = 'norm'""", ] for x in repeats: #print "repeating", x #now = time.time() self.curs.execute(x) #print time.time()-now, "first time" #now = time.time() self.curs.execute(x) #print time.time()-now, "second time" #now = time.time() self.curs.execute(x) #print time.time()-now, "third time" def testArgh(self): sqls = (""" select bar, sum(quantity), avg(quantity), count(*), sum(quantity)/count(quantity) from serves where beer<>'bud' group by bar having sum(quantity)>500 or count(*)>3 order by 2 desc """, """ select bar, sum(quantity),count(*) from serves group by bar """, """ select bar, sum(quantity) from serves group by bar having sum(quantity) > 2000 or sum(quantity) > 1 """, """ select bar, sum(quantity) from serves group by bar having sum(quantity) > 2000 or sum(quantity) > 200 """, """ select bar, sum(quantity) from serves group by bar having sum(quantity) > 1000 or sum(quantity) > 1 """, ) for stmt in sqls: self.curs.execute(stmt) class test_GadflyRollback(harness): def test(self): self.connect.autocheckpoint = 0 keep_updates = [ """insert into frequents(drinker, bar, perweek) values ('peter', 'pans', 1)""", """create view alldrinkers as select drinker from frequents union select drinker from likes""", ] for x in keep_updates: self.curs.execute(x) self.connect.commit() # self.connect.dumplog() preresults = [] rollback_queries = [ """select * from likes""", """select * from frequents""", """select * from nondrinkers""", """select * from alldrinkers""", """select * from dummy""", ] for s in rollback_queries: try: self.curs.execute(s) preresults.append(self.curs.fetchall()) except: d = sys.exc_type preresults.append(d) rollback_updates = [ """create table dummy (nothing varchar)""", """insert into frequents(drinker, bar, perweek) values ('nobody', 'nobar', 0)""", """insert into likes(drinker, beer, perday) values ('wally', 'nobar', 0)""", """drop view alldrinkers""", ] for s in rollback_updates: self.curs.execute(s) for dummy in (1,2): postresults = [] for s in rollback_queries: try: self.curs.execute(s) postresults.append(self.curs.fetchall()) except: d = sys.exc_type postresults.append(d) if dummy==1: self.assert_(preresults != postresults) self.connect.rollback() else: self.assert_(preresults == postresults) for s in rollback_updates: self.curs.execute(s) for dummy in (1,2): postresults = [] for s in rollback_queries: try: self.curs.execute(s) postresults.append(self.curs.fetchall()) except: d = sys.exc_type postresults.append(d) if dummy==1: self.assert_(preresults != postresults) # self.connect.dumplog() self.connect.restart() else: self.assert_(preresults == postresults) class test_GadflyReconnect(harness): def testClose(self): self.connect.commit() self.connect.close() self.connect = gadfly("test", '_test_dir') self.curs = self.connect.cursor() self.runTest() def testRestart(self): self.connect.restart() self.curs = self.connect.cursor() self.runTest() def runTest(self): updates = [ """select * from frequents""", """select * from likes""", """select * from serves""", """select count(*), d from nondrinkers group by d""", """insert into frequents (drinker, perweek, bar) values ('billybob', 4, 'cheers')""", """select * from nondrinkers""", """create table templikes (dr varchar, be varchar)""", """select * from templikes""", """insert into templikes(dr, be) select drinker, beer from likes""", """create index tdindex on templikes(dr)""", """create index tbindex on templikes(be)""", """select * from templikes""", """delete from templikes where be='rollingrock' """, """select * from templikes""", """update templikes set dr=dr+'an' where dr='norm' """, """drop index tdindex""", """delete from templikes where dr=(select min(dr) from templikes)""", """insert into templikes (dr, be) select max(dr), min(be) from templikes""", """select * from templikes""", """select * from frequents""", """update frequents set perweek=(select max(perweek) from frequents where drinker='norm') where drinker='woody'""", """select * from frequents""", """create view lazy as select drinker, sum(perweek) as wasted from frequents group by drinker having sum(perweek)>4 order by drinker""", """select * from lazy""", """drop view lazy""", """drop table templikes""", ] for s in updates: self.curs.execute(s) self.connect.commit() def suite(): l = [ unittest.makeSuite(test_Gadfly), unittest.makeSuite(test_GadflyRollback), unittest.makeSuite(test_GadflyReconnect), ] return unittest.TestSuite(l) if __name__ == '__main__': runner = unittest.TextTestRunner() runner.run(suite()) # # $Log: test_gadfly.py,v $ # Revision 1.8 2002/05/19 01:54:24 richard # - close db before removal in tests # # Revision 1.7 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.6 2002/05/08 00:49:01 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.5 2002/05/07 09:58:19 anthonybaxter # all tests pass again. need to make a more thorough test # suite, really. # # Revision 1.4 2002/05/07 04:39:30 anthonybaxter # split out the broken test all by it's lonesome. # # Revision 1.3 2002/05/07 04:03:14 richard # . major cleanup of test_gadfly # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # gadfly-1.0.0/test/test_kjParseBuild.py0100644000157700012320000001762707467104371016763 0ustar rjonestech# $Id: test_kjParseBuild.py,v 1.4 2002/05/11 02:59:05 richard Exp $ import unittest, os, shutil, time, sys from gadfly.kjParseBuild import * from gadfly import kjParseBuild, kjParser class test_kjParseBuild(unittest.TestCase): def test(self): def echo(x): return x # simple grammar stolen from a text LD0 = kjParser.LexDictionary() id = LD0.terminal("id","id",echo) plus = LD0.punctuation("+") star = LD0.punctuation("*") oppar = LD0.punctuation("(") clpar = LD0.punctuation(")") equals = LD0.punctuation("=") E = kjParser.nonterminal("E") T = kjParser.nonterminal("T") Tp = kjParser.nonterminal("Tp") Ep = kjParser.nonterminal("Ep") F = kjParser.nonterminal("F") rule1 = kjParser.ParseRule( E, [ T, Ep ] ) rule2 = kjParser.ParseRule( Ep, [ plus, T, Ep ] ) rule3 = kjParser.ParseRule( Ep, [ ] ) rule4 = kjParser.ParseRule( T, [ F, Tp ] ) rule5 = kjParser.ParseRule( Tp, [ star, F, Tp ] ) rule6 = kjParser.ParseRule( Tp, [ ] ) rule7 = kjParser.ParseRule( F, [ oppar, E, clpar ] ) rule8 = kjParser.ParseRule( F, [ id ] ) rl0 = [ rule1, rule2, rule3, rule4, rule5, rule6, rule7,rule8] rs0 = kjParseBuild.Ruleset(E, rl0) rs0.compFirst() Firstpairs = kjSet.GetPairs(rs0.First) rs0.compFollow() Followpairs = kjSet.GetPairs(rs0.Follow) rs0.compSLRNFA() NFA0 = rs0.SLRNFA rs0.compDFA() rs0.SLRFixDFA() DFA0 = rs0.DFA class dummy: pass ttt0 = dummy() ttt0.STRING = " id + id * id " #ttt.List = kjParser.LexList(LD0, ttt0.STRING) ttt0.Stream = kjParser.LexStringWalker(ttt0.STRING, LD0) ttt0.Stack = [] #{-1:0}# Walkers.SimpleStack() ttt0.ParseObj = kjParser.ParserObj(rl0, ttt0.Stream, DFA0, ttt0. Stack, 1) ttt0.RESULT = ttt0.ParseObj.GO() #ttt0.Stack.Dump(10) # an even simpler grammar S = kjParser.nonterminal("S") M = kjParser.nonterminal("M") A = kjParser.nonterminal("A") rr1 = kjParser.ParseRule( S, [M] ) #rr2 = kjParser.ParseRule( A, [A, plus, M]) #rr3 = kjParser.ParseRule( A, [M], echo) #rr4 = kjParser.ParseRule( M, [M, star, M]) rr5 = kjParser.ParseRule( M, [oppar, M, clpar]) rr6 = kjParser.ParseRule( M, [id]) rl1 = [rr1,rr5,rr6] rs1 = kjParseBuild.Ruleset(S, rl1) rs1.compFirst() rs1.compFollow() rs1.compSLRNFA() rs1.compDFA() rs1.SLRFixDFA() DFA1 = rs1.DFA ttt1=dummy() # def TESTDFA1( STRING , DOREDUCTIONS = 1): # return TESTDFA( STRING, ttt1, DFA1, rl1, DOREDUCTIONS ) X = kjParser.nonterminal("X") Y = kjParser.nonterminal("Y") RX = kjParser.ParseRule( X, [ oppar, Y, clpar ] ) RY = kjParser.ParseRule( Y, [] ) rl2 = [RX,RY] rs2 = kjParseBuild.Ruleset(X, rl2) rs2.compFirst() rs2.compFollow() rs2.compSLRNFA() rs2.compDFA() rs2.SLRFixDFA() DFA2 = rs2.DFA ttt2 = dummy() # def TESTDFA2( STRING, DOREDUCTIONS = 1): # return TESTDFA( STRING, ttt2, DFA2, rl2, DOREDUCTIONS ) # the following grammar should fail to be slr # (Aho,Ullman p. 213) S = kjParser.nonterminal("S") L = kjParser.nonterminal("L") R = kjParser.nonterminal("R") RS1 = kjParser.ParseRule( S, [L, equals, R] ) RS2 = kjParser.ParseRule( S, [R], echo ) RL1 = kjParser.ParseRule( L, [star, R]) RL2 = kjParser.ParseRule( L, [id]) RR1 = kjParser.ParseRule( R, [L] ) rs3 = kjParseBuild.Ruleset(S, [RS1,RS2,RL1,RL2,RR1]) rs3.compFirst() rs3.compFollow() rs3.compSLRNFA() rs3.compDFA() #rs3.SLRFixDFA() # should fail and does. # testing RULEGRAM ObjG = NullCGrammar() ObjG.Addterm("id","id",echo) ObjG.Nonterms("T E Ep F Tp") ObjG.Keywords("begin end") ObjG.punct("+*()") ObjG.comments(["--.*\n"]) # PROBLEM WITH COMMENTS??? Rulestr = """ ## what a silly grammar! T :: @R One :: T >> begin E end @R Three :: E >> @R Two :: E >> E + T @R Four :: E >> ( T ) """ RL = RULEGRAM.DoParse1( Rulestr, ObjG ) class test_Build(unittest.TestCase): ''' test generation of the grammar ''' MARSHALFILE = "SQLTEST_mar" def test(self): #set this to automatically rebuild the grammar. SELECTRULES = """ ## highest level for select statement (not select for update) select-statement :: @R selectR :: select-statement >> SELECT from-clause where-clause group-by-clause having-clause ## generalized to allow null from clause eg: select 2+2 @R fromNull :: from-clause >> @R fromFull :: from-clause >> FROM @R whereNull :: where-clause >> @R whereFull :: where-clause >> WHERE @R groupNull :: group-by-clause >> @R groupFull :: group-by-clause >> GROUP BY @R havingNull :: having-clause >> @R havingFull :: having-clause >> HAVING @R unionNull :: union-clause >> @R unionFull :: union-clause >> UNION """ SELECTNONTERMS = """ select-statement all-distinct select-list table-reference-list where-clause group-by-clause having-clause union-clause maybe-order-by search-condition column-list maybe-all order-by-clause column-name from-clause """ # of these the following need resolution # (select-list) (table-reference-list) # (search-condition) order-by-clause (column-name) SELECTKEYWORDS = """ SELECT FROM WHERE GROUP BY HAVING UNION DISTINCT ALL AS """ SQLG = kjParseBuild.NullCGrammar() SQLG.SetCaseSensitivity(0) SQLG.Keywords(SELECTKEYWORDS) SQLG.Nonterms(SELECTNONTERMS) # no comments yet SQLG.Declarerules(SELECTRULES) SQLG.Compile() outfile = open(self.MARSHALFILE+'.py', "w") SQLG.MarshalDump(outfile) outfile.close() SQLG2 = kjParser.UnMarshalGram(self.MARSHALFILE) def tearDown(self): filename = self.MARSHALFILE+'.py' if os.path.exists(filename): os.remove(filename) if os.path.exists(filename+'c'): os.remove(filename+'c') if os.path.exists(filename+'o'): os.remove(filename+'o') def suite(): l = [ unittest.makeSuite(test_kjParseBuild), unittest.makeSuite(test_Build), ] return unittest.TestSuite(l) if __name__ == '__main__': runner = unittest.TextTestRunner() runner.run(suite()) # # $Log: test_kjParseBuild.py,v $ # Revision 1.4 2002/05/11 02:59:05 richard # Added info into module docstrings. # Fixed docco of kwParsing to reflect new grammar "marshalling". # Fixed bug in gadfly.open - most likely introduced during sql loading # re-work (though looking back at the diff from back then, I can't see how it # wasn't different before, but it musta been ;) # A buncha new unit test stuff. # # Revision 1.3 2002/05/08 00:49:01 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # gadfly-1.0.0/test/test_kjbuckets.py0100644000157700012320000002307607466100704016357 0ustar rjonestech# $Id: test_kjbuckets.py,v 1.4 2002/05/08 00:49:01 anthonybaxter Exp $ import unittest # a simple test for kjbuckets0 stolen from relalg.py in the kjbuckets C # module distro # A simple implementation of the relational algebra using kjbuckets def relFromDictSet(schemeseq, dictSet): result = relation(schemeseq, [] ) result.rowset = dictSet return result class relation: def __init__(self, schemeseq, listofrows): self.schemeseq = schemeseq self.scheme = kjbuckets_module.kjSet(schemeseq) rowset = kjbuckets_module.kjSet() for row in listofrows: rowset.add(kjbuckets_module.kjUndump(schemeseq, row)) self.rowset = rowset def result(self): l = [] for row in self.rowset.items(): l.append(row.dump(self.schemeseq)) return l def addDicts(self, dictseq): # not used... for dict in dictseq: self.rowset.add(dict) def checkUnionCompatible(self,other): if self.scheme != other.scheme: raise ValueError, "operands not union compatible" # relational union def __add__(self, other): self.checkUnionCompatible(other) return relFromDictSet(self.schemeseq, self.rowset + other.rowset) # relational difference def __sub__(self, other): self.checkUnionCompatible(other) return relFromDictSet(self.schemeseq, self.rowset - other.rowset) # natural join (hash based algorithm) def __mul__(self,other): commonatts = self.scheme & other.scheme resultset = kjbuckets_module.kjSet() if commonatts: # do a hash based join dumper = tuple(commonatts.items()) selfgraph = kjbuckets_module.kjGraph() # hash index for self othergraph = kjbuckets_module.kjGraph() # hash index for other for row in self.rowset.items(): selfgraph[row] = row.dump(dumper) for row in other.rowset.items(): othergraph[row.dump(dumper)] = row for (selfrow, otherrow) in (selfgraph * othergraph).items(): resultset.add(selfrow + otherrow) else: # no common attributes: do a cross product otherrows = other.rowset.items() for selfrow in self.rowset.items(): for otherrow in otherrows: resultset.add(selfrow + otherrow) return relFromDictSet( tuple((self.scheme + other.scheme).items()), resultset ) # selection using a att->value pairs (as conjunction) def vSel(pairs, rel): selected = kjbuckets_module.kjSet() selector = kjbuckets_module.kjDict(pairs) if selector.Clean()!=None: for row in rel.rowset.items(): if (row + selector).Clean() != None: selected.add(row) return relFromDictSet(rel.schemeseq, selected) # selection using att = att pairs (as conjunction) def eqSelect(pairs, rel): selected = kjbuckets_module.kjSet() selector = kjbuckets_module.kjGraph(pairs) selector = (selector + ~selector).tclosure() # sym, trans closure for row in rel.rowset.items(): if row.remap(selector) != None: selected.add(row) return relFromDictSet(rel.schemeseq, selected) # projection on attribute sequence (as conjunction) def proj(atts, rel): attset = kjbuckets_module.kjSet(atts) resultset = kjbuckets_module.kjSet() for row in rel.rowset.items(): resultset.add(attset * row) return relFromDictSet(atts, resultset) # renaming using (new,old) pair sequence def rename(pairs, rel): renames = kjbuckets_module.kjDict(pairs) untouched = rel.scheme - kjbuckets_module.kjSet(renames.values()) mapper = renames + untouched resultset = kjbuckets_module.kjSet() for row in rel.rowset.items(): resultset.add(mapper * row) return relFromDictSet(tuple(mapper.keys()), resultset) #=========== end of simple.py # #Now let me show you the "simple" module in use. First we need some relations. #I'll steal C.J.Date's canonical/soporific supplier/parts database: # ## database of suppliers, parts and shipments ## from Date, page 79 (2nd ed) or page 92 (3rd ed) */ class test_kjbuckets0(unittest.TestCase): def setUp(self): global kjbuckets_module import gadfly.kjbuckets0 as kjbuckets_module def test(self): #suppliers S = relation( ('snum', 'sname', 'status', 'city'), [ (1, 'Smith', 20, 'London'), (2, 'Jones', 10, 'Paris'), (3, 'Blake', 30, 'Paris'), (4, 'Clark', 20, 'London'), (5, 'Adams', 30, 'Athens') ]) #parts P = relation( ('pnum', 'pname', 'color', 'weight', 'pcity'), [ (1, 'Nut', 'Red', 12, 'London'), (2, 'Bolt', 'Green', 17, 'Paris' ), (3, 'Screw', 'Blue', 17, 'Rome' ), (4, 'Screw', 'Red', 14, 'London'), (5, 'Cam', 'Blue', 12, 'Paris'), (6, 'Cog', 'Red', 19, 'London') ]) # shipments SP = relation( ('snum', 'pnum', 'qty',), [ (1, 1, 300), (1, 2, 200), (1, 3, 400), (1, 4, 200), (1, 5, 100), (1, 6, 100), (2, 1, 300), (2, 2, 400), (3, 2, 200), (4, 2, 200), (4, 4, 300), (4, 5, 400) ]) # names and cities of suppliers l = proj(("sname","city"),S).result() l.sort() self.assertEquals(l, [('Adams', 'Athens'), ('Blake', 'Paris'), ('Clark', 'London'), ('Jones', 'Paris'), ('Smith', 'London')]) # part names of parts supplied by Blake self.assertEquals(proj(("pname",),vSel( ( ("sname","Blake"), ), S*SP*P)).result(), ['Bolt']) # supplier names and numbers where the supplier doesn't supply screws l = (proj(("sname","snum"), S) - proj(("sname","snum"), vSel((("pname", "Screw"),), P*SP*S)) ).result() l.sort() self.assertEquals(l, [('Adams', 5), ('Blake', 3), ('Jones', 2)]) def test2(self): G = kjbuckets_module.kjGraph() r3 = range(3) r = map(None, r3, r3) for i in range(3): G[i] = i+1 D = kjbuckets_module.kjDict(G) D[9]=0 G[0]=10 S = kjbuckets_module.kjSet(G) S[-1] = 5 #print "%s.remap(%s) = %s" % (D, G, D.remap(G)) for X in (S, D, G, r, tuple(r), 1): for C in (kjbuckets_module.kjGraph, kjbuckets_module.kjSet, kjbuckets_module.kjDict): T = C(X) T2 = C() ALL = (S, D, G) for X in ALL: self.assertEqual(len(X), len(X.items())) cb = X.Clean() del X[2] self.assertNotEqual(cb, X.Clean() or []) self.assert_(X.subset(X), "trivial subset fails") self.assert_(X==X, "trivial cmp fails") self.assert_(not not X, "nonzero fails") if X is S: self.assert_(S.member(0), "huh 1?") self.assert_(not S.member(123), "huh 2?") S.add(999) del S[1] self.assert_(S.has_key(999), "huh 3?") else: self.assertNotEqual(X, ~X, "inverted") self.assert_(X.member(0,1), "member test fails (0,1)") X.add(999,888) X.delete_arc(999,888) self.assert_(not X.member(999,888), "member test fails (999,888)") self.assert_(not X.has_key(999), "has_key fails 999") self.assert_(X.has_key(0), "has_key fails 0") for Y in ALL: #if (X!=S and Y!=S): # print "diff", X, Y # print "%s-%s=%s" % (X,Y,X-Y) #elif X==S: # D = kjbuckets_module.kjSet(Y) # print "diff", X, D # print "%s-%s=%s" % (X,D,X-D) #print "%s+%s=%s" % (X,Y,X+Y) #print "%s&%s=%s" % (X,Y,X&Y) #print "%s*%s=%s" % (X,Y,X*Y) x,y = cmp(X,Y), cmp(Y,X) self.assertEqual(x, -y, "bad cmp!") #print "cmp(X,Y), -cmp(Y,X)", x,-y #print "X.subset(Y)", X.subset(Y) class test_kjbuckets(unittest.TestCase): def setUp(self): global kjbuckets_module import kjbuckets as kjbuckets_module def suite(): l = [ unittest.makeSuite(test_kjbuckets0), ] try: import kjbuckets l.append(unittest.makeSuite(test_kjbuckets)) except ImportError: print 'not running kjbuckets C module test' pass return unittest.TestSuite(l) if __name__ == '__main__': runner = unittest.TextTestRunner() runner.run(suite()) # # $Log: test_kjbuckets.py,v $ # Revision 1.4 2002/05/08 00:49:01 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.3 2002/05/07 04:03:14 richard # . major cleanup of test_gadfly # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # gadfly-1.0.0/test/test_sqlgrammar.py0100644000157700012320000000366007466100704016535 0ustar rjonestech# $Id: test_sqlgrammar.py,v 1.4 2002/05/08 00:49:01 anthonybaxter Exp $ import unittest from gadfly.semantics import Parse_Context from gadfly import sql, bindings sql = sql.getSQL() sql = bindings.BindRules(sql) class test_SQLGrammar(unittest.TestCase): def test(self): tests = [ "select a from x where b=c", "select distinct x.a from x where x.b=c", "select all a from x where b=c", "select a from x, y where b=c or x.d=45", "select a as k from x d, y as m where b=c", "select 1 as n, a from x where b=c", "select * from x", "select a from x where b=c", "select a from x where not b=c or d=1 and e=5", "select a from x where a=1 and (x.b=3 or not b=c)", "select -1 from x", "select -1e6j from x", "insert into table1 (a,b,c) values (-1e6+3j, -34e10, 56j)" ] context = Parse_Context() for test in tests: sql.DoParse1(test, context) def suite(): l = [unittest.makeSuite(test_SQLGrammar), ] return unittest.TestSuite(l) if __name__ == '__main__': runner = unittest.TextTestRunner() runner.run(suite()) # # $Log: test_sqlgrammar.py,v $ # Revision 1.4 2002/05/08 00:49:01 anthonybaxter # El Grande Grande reindente! Ran reindent.py over the whole thing. # Gosh, what a lot of checkins. Tests still pass with 2.1 and 2.2. # # Revision 1.3 2002/05/07 07:06:11 richard # Cleaned up sql grammar compilation some more. # Split up the BigList into its components too. # # Revision 1.2 2002/05/06 23:27:10 richard # . made the installation docco easier to find # . fixed a "select *" test - column ordering is different for py 2.2 # . some cleanup in gadfly/kjParseBuild.py # . made the test modules runnable (remembering that run_tests can take a # name argument to run a single module) # . fixed the module name in gadfly/kjParser.py # # gadfly-1.0.0/CHANGES.txt0100644000157700012320000000223407512762751013614 0ustar rjonestechThis file contains the changes to Gadfly over time. The entries are given with the most recent entry first. 1.0.0 - fixed compilation bug in kjbucketsmodule.c (PyObject_HEAD_INIT) - fixed commit-after-open bug (no working_db) - added more functionality to gfplus: / or !! repeat last command (s|c)/pat/repl repeat last but RE sub pat for repl - corrected gfplus exit code - close db before removal in tests 1.0.0pr1 - updated to use new regular expression engine (regex -> re migration) performed by the fine folk at the Zope Corporation (http://www.zope.com/). - kjbuckets C extension module maintenance and updates (see the kjbuckets documentation for details) - cleanup and reorganisation of the gadfly modules, including: - migration to distutils-based installation - cleanup of SQL grammar marshalling - more strict (in places) unit/regression testing - general cleanup of the code itself - cleanup of networking code (gfclient argument list change) - fixed bug in Average - started to clean up code that might break under future python releases (float division) - removed as many circular imports as possible - documentation cleanup gadfly-1.0.0/COPYRIGHT.txt0100644000157700012320000000261607465430475014122 0ustar rjonestech The following copyright is modified from the python copyright. Copyright Notice ---------------- The gadfly and kjbuckets source is copyrighted, but you can freely use and copy it as long as you don't change or remove the copyright: Copyright Aaron Robert Watters, 1994 All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies and that both that copyright notice and this permission notice appear in supporting documentation. AARON ROBERT WATTERS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL AARON ROBERT WATTERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. Signature (obsolete) -------------------- Aaron Robert Watters Department of Computer and Information Sciences New Jersey Institute of Technology University Heights Newark, NJ 07102 phone (201)596-2666 fax (201)596-5777 home phone (908)545-3367 email: aaron@vienna.njit.edu gadfly-1.0.0/README.txt0100644000157700012320000000106507465602103013471 0ustar rjonestechInstallation ------------ See doc/installation.txt for installation instructions. The impatient may perform these three steps: python setup.py install cd kjbuckets python setup.py install If you don't have a C compiler, skip the kjbuckets install step. If you're also on Windows, then cd to kjbuckets/ and copy the .pyd file to your python installation. If you are using python 2.0, patch the gadly/semantics.py file with the patch in kjbuckets/2.0 Documenation and Demos ---------------------- See doc/index.txt and doc/demo. gadfly-1.0.0/TODO.txt0100644000157700012320000000053607470640643013312 0ustar rjonestechStuff to do some time: . extend gfserver to handle commands - stop, start . introduce a rowid concept for default ordering . have only cached rows in-memory, rest stay on disk . many more unit tests to torture the SQL and semantics code . conform to Python Database API Specification 2.0 http://www.python.org/topics/database/DatabaseAPI-2.0.html gadfly-1.0.0/run_tests0100755000157700012320000000045307465430475013762 0ustar rjonestech#! /usr/bin/env python # $Id: run_tests,v 1.1.1.1 2002/05/06 07:31:09 richard Exp $ from test import go import sys if len(sys.argv) > 1: go(sys.argv[1:]) else: go() # # $Log: run_tests,v $ # Revision 1.1.1.1 2002/05/06 07:31:09 richard # # # # # vim: set filetype=python ts=4 sw=4 et si gadfly-1.0.0/setup.py0100644000157700012320000001121407512762521013506 0ustar rjonestech#!/usr/local/bin/python import os, string, sys from distutils.core import setup from distutils.command.build_scripts import build_scripts from glob import glob # # SQL grammar compilation # # see if we should build the compiled SQL grammar file marfile = os.path.join('gadfly','sql_mar.py') build = 1 if os.path.exists(marfile): mtime = os.stat(marfile)[-2] if mtime > os.stat(os.path.join('gadfly', 'grammar.py'))[-2]: build = 0 if build: print 'building grammar file' # nuke any existing pyc/o for filename in ('sql_mar.pyc', 'sql_mar.pyo'): filename = os.path.join('gadfly', filename) if os.path.exists(filename): os.remove(filename) from gadfly import kjParseBuild from gadfly.grammar import * SQLG = kjParseBuild.NullCGrammar() SQLG.SetCaseSensitivity(0) DeclareTerminals(SQLG) SQLG.Keywords(keywords) SQLG.punct(puncts) SQLG.Nonterms(nonterms) SQLG.comments(["--.*"]) # TODO: should add comments SQLG.Declarerules(sqlrules) SQLG.Compile() SQLG.MarshalDump(open(marfile, "w")) # # Build script files # - stolen from the Roundup setup file (http://roundup.sf.net/) # class build_scripts_create(build_scripts): """ Overload the build_scripts command and create the scripts from scratch, depending on the target platform. You have to define the name of your package in an inherited class (due to the delayed instantiation of command classes in distutils, this cannot be passed to __init__). The scripts are created in an uniform scheme: they start the main() function in the module .scripts. The mangling of script names replaces '-' and '/' characters with '-' and '.', so that they are valid module paths. """ package_name = None def copy_scripts(self): """ Create each script listed in 'self.scripts' """ if not self.package_name: raise Exception("You have to inherit build_scripts_create and" " provide a package name") to_module = string.maketrans('-/', '_.') self.mkpath(self.build_dir) for script in self.scripts: outfile = os.path.join(self.build_dir, os.path.basename(script)) #if not self.force and not newer(script, outfile): # self.announce("not copying %s (up-to-date)" % script) # continue if self.dry_run: self.announce("would create %s" % outfile) continue module = os.path.splitext(os.path.basename(script))[0] module = string.translate(module, to_module) script_vars = { 'python': os.path.normpath(sys.executable), 'package': self.package_name, 'module': module, } self.announce("creating %s" % outfile) file = open(outfile, 'w') try: if sys.platform == "win32": file.write('@echo off\n' 'if NOT "%%_4ver%%" == "" %(python)s -c "from %(package)s.scripts.%(module)s import main; main()" %%$\n' 'if "%%_4ver%%" == "" %(python)s -c "from %(package)s.scripts.%(module)s import main; main()" %%*\n' % script_vars) else: file.write('#! %(python)s\n' 'from %(package)s.scripts.%(module)s import main\n' 'main()\n' % script_vars) finally: file.close() os.chmod(outfile, 0755) class build_scripts_gadfly(build_scripts_create): package_name = 'gadfly' def scriptname(path): """ Helper for building a list of script names from a list of module files. """ script = os.path.splitext(os.path.basename(path))[0] script = string.replace(script, '_', '-') if sys.platform == "win32": script = script + ".bat" return script # build list of scripts from their implementation modules gadfly_scripts = map(scriptname, glob('gadfly/scripts/[!_]*.py')) if __name__ == '__main__': setup( name = 'gadfly', version = '1.0.0', description = 'Gadfly relational database', maintainer = 'Richard Jones', maintainer_email = 'richard@users.sourceforge.net', url = 'http://gadfly.sourceforge.net/', packages = ['gadfly', 'gadfly.scripts'], # Override certain command classes with our own ones cmdclass = { 'build_scripts': build_scripts_gadfly, }, scripts = gadfly_scripts, ) gadfly-1.0.0/PKG-INFO0100644000157700012320000000037207512763043013074 0ustar rjonestechMetadata-Version: 1.0 Name: gadfly Version: 1.0.0 Summary: Gadfly relational database Home-page: http://gadfly.sourceforge.net/ Author: Richard Jones Author-email: richard@users.sourceforge.net License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN