pax_global_header00006660000000000000000000000064134536222420014515gustar00rootroot0000000000000052 comment=ca088132ac7af3f91f2f60b8501a9f6e3b387ab6 pycifrw-4.4/000077500000000000000000000000001345362224200130475ustar00rootroot00000000000000pycifrw-4.4/BUILDING000066400000000000000000000010051345362224200141630ustar00rootroot00000000000000Release procedure for PyCIFRW ============================= After code changes are complete: 1. Edit all relevant text files in top-level directory 2. Build source release: python setup.py sdist 3. Build binary wheel release for Linux: python setup.py bdist_wheel 4. Audit wheel for manylinux1 conformance: python3 check_manylinux_package.py show (uses auditwheel package from pip) 5. For each of Python 2,3 and source,wheel conduct testing procedure outlined in TESTING file. pycifrw-4.4/CONTRIBUTORS000066400000000000000000000003031345362224200147230ustar00rootroot00000000000000Contributors to PyCIFRW ======================= James Hester (initiator and maintainer) Pavol Juhas (bug fixes, cleanups and Python3) CJ Wright (conda integration) Boris Ducek (bug fixes) pycifrw-4.4/INSTALLATION000066400000000000000000000030441345362224200146740ustar00rootroot00000000000000Installation instructions for PyCifRW v 4 ----------------------------------------- PyCifRW may be installed using Conda, Python pip, or directly from source. Using Conda ----------- If you don't have conda install from either Anaconda: https://docs.continuum.io/anaconda/install or miniconda: https://conda.io/docs/install/quick.html Then, conda install pycifrw -c conda-forge Now skip to the "Checking the installation" section at the end of this document. Prerequisites for installation from pip or source ------------------------------------------------- Python 2.7 or greater must already be installed. Self-installing packages for Python (Windows and Mac) are available from www.python.org. All Linux distributions come with Python included in the base setup. Use of the optional dREL functionality requires installation of PLY (Python Lex Yacc): pip install ply and numpy pip install numpy Installation ------------ (All operating systems) The latest version can be obtained from the Python Package Index: pip install pycifrw (From source) 1. Download and unpack the source distribution file PyCifRW-4.3.tar.gz 2. In directory PyCifRW-4.3, execute the command python setup.py install In Windows this can be accomplished by double-clicking the setup.py icon. Note that commented source code (.nw files) are available only in the .tar.gz file. Checking the installation ------------------------- Start an interactive python interpreter. Type "import CifFile". If this command is successful, installation is complete. pycifrw-4.4/LICENSE000066400000000000000000000045641345362224200140650ustar00rootroot00000000000000 PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. pycifrw-4.4/MANIFEST.in000066400000000000000000000007251345362224200146110ustar00rootroot00000000000000recursive-include src *.nw include LICENSE include INSTALLATION include TESTING include BUILDING include CONTRIBUTORS include src/*.html include TestPyCifRW.py include TestDrel.py include RELEASENOTES include README.md include src/Programs/*.py include src/Programs/*.cif include src/Programs/*.html include src/Programs/README include src/Programs/Web.README include src/Changelog include src/Makefile include src/lib/* include docs/*.html include docs/CifFile/*.html pycifrw-4.4/PKG-INFO000066400000000000000000000015131345362224200141440ustar00rootroot00000000000000Metadata-Version: 1.1 Name: PyCifRW Version: 4.4 Summary: CIF/STAR file support for Python Home-page: https://bitbucket.org/jamesrhester/pycifrw/overview Author: James Hester Author-email: jamesrhester@gmail.com License: Python 2.0 Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 2 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics Classifier: Topic :: Software Development :: Libraries :: Python Modules pycifrw-4.4/PyCifRW.egg-info/000077500000000000000000000000001345362224200160245ustar00rootroot00000000000000pycifrw-4.4/PyCifRW.egg-info/PKG-INFO000066400000000000000000000015131345362224200171210ustar00rootroot00000000000000Metadata-Version: 1.1 Name: PyCifRW Version: 4.4 Summary: CIF/STAR file support for Python Home-page: https://bitbucket.org/jamesrhester/pycifrw/overview Author: James Hester Author-email: jamesrhester@gmail.com License: Python 2.0 Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 2 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics Classifier: Topic :: Software Development :: Libraries :: Python Modules pycifrw-4.4/PyCifRW.egg-info/SOURCES.txt000066400000000000000000000040161345362224200177110ustar00rootroot00000000000000BUILDING CONTRIBUTORS INSTALLATION LICENSE MANIFEST.in README.md RELEASENOTES TESTING TestDrel.py TestPyCifRW.py setup.py PyCifRW.egg-info/PKG-INFO PyCifRW.egg-info/SOURCES.txt PyCifRW.egg-info/dependency_links.txt PyCifRW.egg-info/top_level.txt docs/dict_use.html docs/drel_use.html docs/overview.html docs/CifFile/CifFile.m.html docs/CifFile/CifFile_module.m.html docs/CifFile/StarFile.m.html docs/CifFile/StarScan.m.html docs/CifFile/TestDrel.m.html docs/CifFile/TestPyCifRW.m.html docs/CifFile/TypeContentsParser.m.html docs/CifFile/YappsStarParser_1_0.m.html docs/CifFile/YappsStarParser_1_1.m.html docs/CifFile/YappsStarParser_2_0.m.html docs/CifFile/YappsStarParser_DDLm.m.html docs/CifFile/YappsStarParser_STAR2.m.html docs/CifFile/index.html src/CifFile.html src/CifFile_module.nw src/CifFile_module.py src/Makefile src/StarFile.html src/StarFile.m.html src/StarFile.nw src/StarFile.py src/TypeContentsParser.html src/TypeContentsParser.nw src/TypeContentsParser.py src/YappsStarParser.html src/YappsStarParser.nw src/YappsStarParser_1_0.py src/YappsStarParser_1_1.py src/YappsStarParser_2_0.py src/YappsStarParser_STAR2.py src/__init__.py src/cif-lex.html src/cif-yacc.html src/parsetab.py src/yapps3_compiled_rt.py src/Programs/README src/Programs/Web.README src/Programs/add_key_names.py src/Programs/add_spoke.py src/Programs/cif2cell.py src/Programs/cifv_server.html src/Programs/ddl.dic.html src/Programs/loop_example.cif src/Programs/loop_example.py src/Programs/output_asciidoc.py src/Programs/parsetab.py src/Programs/process-validate.py src/Programs/star2_to_cif2.py src/Programs/syd_example.py src/Programs/syd_example_2.py src/Programs/templ_attr.cif src/Programs/templ_enum.cif src/Programs/type_test.cif src/Programs/updated.cif src/Programs/validate_cif.py src/drel/__init__.py src/drel/drel_ast_yacc.py src/drel/drel_lex.py src/drel/drel_runtime.nw src/drel/drel_runtime.py src/drel/parsetab.py src/drel/py_from_ast.nw src/drel/py_from_ast.py src/lib/Makefile src/lib/lex.yy.c src/lib/py_star_scan.c src/lib/star.l src/lib/star_scanner.hpycifrw-4.4/PyCifRW.egg-info/dependency_links.txt000066400000000000000000000000011345362224200220720ustar00rootroot00000000000000 pycifrw-4.4/PyCifRW.egg-info/top_level.txt000066400000000000000000000000101345362224200205450ustar00rootroot00000000000000CifFile pycifrw-4.4/README.md000066400000000000000000000063261345362224200143350ustar00rootroot00000000000000PyCIFRW Readme -------------- Introduction ------------ PyCIFRW provides support for reading and writing CIF (Crystallographic Information Format) files using Python. It was developed at the Australian National Beamline Facility (ANBF), run by the Australian Synchrotron Research Program (ASRP), as part of a larger project to provide CIF input/output capabilities for data collection. It is now (Feb 2018) maintained and developed within the Australian Nuclear Science and Technology Organisation (ANSTO). Conformance ----------- The specifications found in Vol G of the International Tables for Crystallography were used as a reference for CIF 1.0/1.1 syntax. http://dx.doi.org/10.1107/S1600576715021871 was used as the CIF 2.0 reference. PyCifRW has been tested on the IUCr sample CIF1.1 trip files located at http://www.iucr.org/iucr-top/cif/developers/trip and fails or successfully reads as it is supposed to (note that `ciftest5` contains characters now forbidden in CIFs). Supported Platforms ------------------- PyCIFRW is written entirely in Python, and so should run wherever Python runs. Any failures on Mac, Windows or Linux should be communicated to the author, either through the BitBucket issues tracker (http://bitbucket.org/jamesrhester/pycifrw/issues) or email. The source code of a C extension module is also included in the distribution. This module accelerates CIF file reading. From time to time system-dependent installation packages are generated containing precompiled versions of this module. Installation ------------ See file [INSTALLATION](https://bitbucket.org/jamesrhester/pycifrw/src/efd90c5e6dec7caf1e61fac68492a7fd66661d97/INSTALLATION?at=development). License ---------- PyCIFRW is made available using the Python 2.0 license. The full text is [here](https://bitbucket.org/jamesrhester/pycifrw/src/efd90c5e6dec7caf1e61fac68492a7fd66661d97/LICENSE?at=development) Use --- See the various files in the docs directory for details of the interface. Essentially, CIF files look like python dictionaries, with each entry in the dictionary corresponding to a data block. The blocks themselves are also dictionaries, with each data name being a single entry in the dictionary, so for example, `cf['si_std']['_diffrn_meas_wavelength']` will return the value of `_diffrn_meas_wavelength` in the data block named `si_std` of the Cif file object `cf`. Example ------- To read in a CIF: from CifFile import ReadCif cf = ReadCif('jun_01_2.cif') to access information in a CIF wav = cf['si_std']['_diffrn_meas_wavelength'] to set a value cf['si_std']['_diffrn_meas_wavelength'] = 1.54 Example programs ---------------- The file 'TestPyCIFRW.py' in the source distribution contains around 170 tests of PyCIFRW functionality and is a good source of examples for using both simple and advanced PyCIFRW interfaces. The "Programs" directory in the source distribution contains simple example programs. Of interest are `validate_cif.py` which validates a data files against data dictionaries (execute this file at a terminal prompt without arguments for a help message) and `output_asciidoc.py` which will convert a DDLm dictionary into an asciidoc document that can then be converted to HTML or other presentation formats. pycifrw-4.4/RELEASENOTES000066400000000000000000000115031345362224200146630ustar00rootroot00000000000000Release Notes for PyCIFRW-4.4 ============================= The ValidCifFile class has been deprecated and will be removed in the next release. Changes since 4.3 ================= * Fixed Windows BOM handling * Work-around for Python 2.7 bug triggered on import (Pavol Juhas) * Nested save frames no longer accepted when reading non-STAR formats * CifFile.py filename changed to Ciffile_module.py to avoid class-name ambiguity * Many Python 3 fixes and improvements (Pavol Juhas) * `permissive` option added to CIF input to try latin1 encoding as a fallback * Bug fixes: - datanames in blocks that have been read in from a file can once again have their values changed - AddToLoop did not replace looped datanames correctly Changes since 4.2 ================= * Fixed bugs in fast merging * Improved DDLm import behaviour * Improved templating of semicolon-delimited strings * Added non-standard hash functions to dREL * Adjusted DDLm CIF dictionary usage to conform to recent changes * first_block() returns a single block from the file, not necessarily the first one in the file, or the first one added to the CifFile object. * All strings returned by methods are of type unicode. * Single source for Python2 and Python3 Changes since 4.1.1 =================== * Up to two orders of magnitude improvement in reading speed for large files (several Mb). Note that PyCIFRW reads the entire file into memory, so you must have sufficient RAM. * Continuous bugfixes and improvements to dREL system - Allow multiple dREL methods per definition - Detect and handle recursion in dREL methods * Understands DDLm use of _import.get to merge dictionaries * Experimental DDLm attributes for category construction * Fixed bug in embedded triple-quoted string interpretation * Output block order defaults to input block order Incompatible changes (since 3.0 series, unchanged since 4.1) ============================================================ * Function "validate" has been renamed to Validate for consistency * Nested loops (only allowed in STAR files) are no longer supported * Merging of datablocks in 'overlay' mode has been removed. This is relevant only for dictionary merging. * The package now installs in a separate Python site-packages directory. called 'CifFile'. It should thus be imported using 'import CifFile' or 'from CifFile import CifFile'. This may differ from earlier versions. * A single-item loop can no longer be created using the form cf['_dataname'] = [1,2,3,4] To get the old behaviour, create the CifBlock with 'compat_mode = True'. * The line folding protocol is always used when writing CIF files. This is only invoked if you have lines longer than 2048 characters (or your specified maximum length). * The text prefix protocol introduced in CIF2.0 is always used when writing. This is only invoked if a string contains the character sequence '\n;' * The text prefix protocol and line folding protocol are detected and transparently removed from text values when reading. Python 3 notes ============== In moving to unified Python2/3 source for version 4.3, all strings returned by CifFile and StarFile methods are of type unicode in Python 2. Please create an issue on Bitbucket if this causes breakage in your software. * The semantics of key(), items() and values() have not changed for CifFile and CifBlock objects, despite the Python 3 dictionary semantics requiring them to be iterators rather than lists. * In the original Python 2 version, the top-level `CifFile` module imported everything from the `CifFile.py` and `StarFile.py` modules. This meant that it was sufficient for your code to include `from CifFile import x`, where x is anything defined in CifFile.py or StarFile.py. This has been replicated in Python 3 by explicitly importing those classes and functions in CifFile_module.py and StarFile.py required for the tests to succeed. If your code uses a function not included in the tests (note that all classes and therefore their methods are included) you will need to refer to it as `CifFile.CifFile_module.x` (for a function named `x` in `CifFile_module.py`). * Python 3 conversion relied heavily on getting the 170 unit tests to complete successfully. These tests cover about 80% of the code. If your code happens to trigger errors in the remaining code, please create an issue. The code that is not covered well is generally DDL1/2 validation code or unusual error conditions. Deprecated methods ================== * AddCifItem. This unnecessarily convoluted and mind-bending way of adding a whole loop all at once in a single tuple has been deprecated. Just assign dataname values using the usual square brackets and then call CreateLoop with the datanames that belong in a single loop. * AddToLoop. Assign individual values and then call AddLoopName for each * ValidCifFile (whole class). This appears to be unused. pycifrw-4.4/TESTING000066400000000000000000000014451345362224200141130ustar00rootroot00000000000000Testing procedure for PyCIFRW ============================= In Python2 and Python3 virtual environments (create with virtualenv -p python<2,3> test-environment, activate with source local/bin/activate (Python2) or source bin/activate (Python3)): 0. pip install ply; pip install numpy 1. pip install 2. tar -xf 3. cd PyCifRW- 4. ln -s /tests . 5. ln -s /dictionaries . 4. python TestPyCIFRW Additional tests run during development 1. Edit out/in '__unicode_literals__' from TestPyCIFRW.py for Python2 2. in the tests directory, 'python runtests.py' 3. in the unicode-tests directory, 'python runtests.py' Note that the tests and dictionaries directories are not currently bundled. If you would like these, please let me know. pycifrw-4.4/TestDrel.py000066400000000000000000000722721345362224200151610ustar00rootroot00000000000000# Test suite for the dRel parser # # Testing of the PyCif module using the PyUnit framework # # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import import unittest import CifFile from CifFile import StarFile,StarList import numpy from CifFile.drel import drel_lex,drel_ast_yacc,py_from_ast,drel_runtime from copy import copy class dRELRuntimeTestCase(unittest.TestCase): def setUp(self): pass def testListAppend(self): a = [[1,2],[3,4]] b = drel_runtime.aug_append(a,1) c = drel_runtime.aug_append(a,[3]) d = drel_runtime.aug_append(a,[[4,5,6]]) self.failUnless(b == [[1,2],[3,4],1]) self.failUnless(c == [[1,2],[3,4],[3]]) self.failUnless(d == [[1,2],[3,4],[[4,5,6]]]) def testListAdd(self): a = [[1,2],[3,4]] aa = 5 b = drel_runtime.aug_add(a,1) c = drel_runtime.aug_add(a,[[1,2],[7,6]]) d = drel_runtime.aug_add(5,2) self.failUnless((c == numpy.array([[2,4],[10,10]])).all()) self.failUnless((b == numpy.array([[2,3],[4,5]])).all()) self.failUnless(d == 7) def testListUnappend(self): a = [[1,2],[3,4]] c = drel_runtime.aug_remove(a,[1,2]) self.failUnless(c == [[3,4]]) def testListSubtract(self): a = [[1,2],[3,4]] aa = 5 b = drel_runtime.aug_sub(a,1) c = drel_runtime.aug_sub(a,[[1,2],[7,6]]) d = drel_runtime.aug_sub(5,2) self.failUnless((c == numpy.array([[0,0],[-4,-2]])).all()) self.failUnless((b == numpy.array([[0,1],[2,3]])).all()) self.failUnless(d == 3) def testDotProduct(self): """Test that multiplication works correctly""" a = numpy.array([1,2,3]) b = numpy.array([4,5,6]) d = drel_runtime.drel_dot(a,b) self.failUnless(d == 32) def testMatrixMultiply(self): """Test that matrix * matrix works""" a = numpy.matrix([[1,0,0],[0,1,0],[0,0,1]]) b = numpy.matrix([[3,4,5],[6,7,8],[9,10,11]]) c = drel_runtime.drel_dot(a,b) self.failUnless((c == numpy.matrix([[3,4,5],[6,7,8],[9,10,11]])).any()) def testMatVecMultiply(self): """Test that matrix * vec works""" a = numpy.array([0,1,0]) b = numpy.matrix([[3,4,5],[6,7,8],[9,10,11]]) c = drel_runtime.drel_dot(a,b) d = drel_runtime.drel_dot(b,a) self.failUnless((d == numpy.matrix([4,7,10])).any()) self.failUnless((c == numpy.matrix([6,7,8])).any()) def testScalarVecMult(self): """Test that multiplying by a scalar works""" a = [1,2,3] b = 4 c = drel_runtime.drel_dot(b,a) d = drel_runtime.drel_dot(a,b) self.failUnless((c == numpy.matrix([4,8,12])).any()) self.failUnless((d == numpy.matrix([4,8,12])).any()) def testArrayAppend(self): a = numpy.array([0,1,0]) b = numpy.array([1,0,0]) a = drel_runtime.aug_append(a,b) self.failUnless((a == numpy.array([[0,1,0],[1,0,0]])).any()) # Test simple statements class SingleSimpleStatementTestCase(unittest.TestCase): def setUp(self): #create our lexer and parser self.lexer = drel_lex.lexer self.parser = drel_ast_yacc.parser self.dic = CifFile.CifDic("tests/drel/dic_for_tests.dic",grammar="STAR2") def create_test(self,instring,right_value,debug=False,array=False): """Given a string, create and call a function then check result""" if instring[-1]!="\n": instring += '\n' res = self.parser.parse(instring,debug=debug,lexer=self.lexer) if debug: print("%s\n -> \n%r \n" % (instring, res)) realfunc = py_from_ast.make_python_function(res,"myfunc",'_a.b',have_sn=False, cif_dic=self.dic) if debug: print("-> %s" % realfunc) exec(realfunc,globals()) answer = myfunc(self) if debug: print(" -> {!r}".format(answer)) if not array: self.failUnless(answer == right_value) else: try: self.failUnless((answer == right_value).all()) except: self.failUnless(answer == right_value) # as we disallow simple expressions on a separate line to avoid a # reduce/reduce conflict for identifiers, we need at least an # assignment statement def testrealnum(self): """test parsing of real numbers""" self.create_test('_a.b=5.45',5.45) self.create_test('_a.b=.45e-24',.45e-24) def testinteger(self): """test parsing an integer""" resm = [0,0,0,0] checkm = [1230,77,5,473] self.create_test('_a.b = 1230',1230) self.create_test('_a.b = 0x4D',77) self.create_test('_a.b = 0B0101',5) self.create_test('_a.b = 0o731',473) def testcomplex(self): """test parsing a complex number""" self.create_test('_a.b = 13.45j',13.45j) def testList(self): """test parsing a list over two lines""" self.create_test('_a.b = [1,2,\n 3,4,\n 5,6]',StarList([1,2,3,4,5,6])) def testparenth(self): """test parsing a parenthesis over two lines""" self.create_test('_a.b = (1,2,\n3,4)',(1,2,3,4)) def testshortstring(self): """test parsing a one-line string""" jk = "_a.b = \"my pink pony's mane\"" jl = "_a.b = 'my pink pony\"s mane'" self.create_test(jk,jk[8:-1]) self.create_test(jl,jl[8:-1]) # # This fails due to extra indentation introduced when constructing the # enclosing function # def testlongstring(self): """test parsing multi-line strings""" jk = '''_a.b = """ a long string la la la '"' some more end""" ''' jl = """_a.b = ''' a long string la la la '"' some more end''' """ self.create_test(jk,jk[7:-3]) self.create_test(jl,jl[7:-3]) def testmathexpr(self): """test simple maths expressions """ testexpr = (("_a.b = 5.45 + 23.6e05",5.45+23.6e05), ("_a.b = 11 - 45",11-45), ("_a.b = 45.6 / 22.2",45.6/22.2)) for test,check in testexpr: self.create_test(test,check) def testexprlist(self): """test comma-separated expressions""" test = "_a.b = 5,6,7+8.5e2" self.create_test(test,(5,6,7+8.5e2)) def testparen(self): """test parentheses""" test = "_a.b = ('once', 'upon', 6,7j +.5e2)" self.create_test(test,('once' , 'upon' , 6 , 7j + .5e2 )) def testlists(self): """test list parsing""" test = "_a.b = ['once', 'upon', 6,7j +.5e2]" self.create_test(test,StarList(['once' , 'upon' , 6 , 7j + .5e2 ])) def test_multistatements(self): """test multiple statements""" test1 = "_a.b = 1.2\nb = 'abc'\nqrs = 4.4\n" test2 = '\n\nq = _c.d\nnumeric = "01234"\n_a.b=11.2' self.create_test(test1,1.2) #self.create_test(test2,11.2) def test_semicolon_sep(self): """test multiple statements between semicolons""" test = "_a.b = 1.2;b = 'abc';qrs = 4.4" self.create_test(test,1.2) def test_slicing(self): """Test that our slicing is parsed correctly""" test = "b = array([[1,2],[3,4],[5,6]]);_a.b=b[0,1]" self.create_test(test,2) def test_slice_2(self): """Test that first/last slicing works""" test = "b = 'abcdef';_a.b=b[1:3]" self.create_test(test,'bc') def test_paren_balance(self): """Test that multi-line parentheses work """ test = """b = ( (1,2,( 3,4 ) ,5),6 ,7)\n _a.b=b[0][2][0]""" self.create_test(test,3) def test_list_constructor(self): """Test that the list constructor works""" test = """_a.b = List(1,2)""" self.create_test(test,[1,2]) def test_non_python_ops(self): """Test operators that have no direct Python equivalents""" test_expr = (("b = [1,2]; _a.b = [3,4]; _a.b++=b",StarList([3,4,[1,2]])), ("b = [1,2]; _a.b = [3,4]; _a.b+=b",[4,6]), ("b = 3; _a.b = [3,4]; _a.b-=b",[0,1]), ("b = [1,2]; _a.b = [[1,2],[3,4]]; _a.b--=b",[[3,4]])) for one_expr in test_expr: self.create_test(one_expr[0],one_expr[1],debug=True,array=True) def test_tables(self): """Test that tables are parsed correctly""" teststrg = """ c = Table() c['bx'] = 25 _a.b = c """ print("Table test:") res = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False, cif_dic=self.dic) print(realfunc) exec(realfunc,globals()) b = myfunc(self) self.failUnless(b['bx']==25) def test_Tables_2(self): """Test that brace-delimited tables are parsed correctly""" teststrg = """ c = {'hello':1,'goodbye':2} _a.b = c['hello'] """ print("Table test:") res = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False, cif_dic=self.dic) print(realfunc) exec(realfunc,globals()) b = myfunc(self) self.failUnless(b==1) def test_subscription(self): """Test proper list of dependencies is returned""" teststrg = """ m = [15,25,35] _a.b = m [1] """ self.create_test(teststrg,25) def test_list_indices(self): """Test that multi-dimensional indices are accessed correctly""" teststrg = """ m = [[1,2,3],[4,5,6],[7,8,9]] _a.b = m[1,2] """ self.create_test(teststrg,6,debug=True) def test_matrix_indices(self): """Test that multi-dimensional indices work for matrices too""" teststrg = """ m = matrix([[1,2,3],[4,5,6],[7,8,9]]) _a.b = m[1,2] """ self.create_test(teststrg,6,debug=True) class SimpleCompoundStatementTestCase(unittest.TestCase): def setUp(self): #create our lexer and parser self.lexer = drel_lex.lexer self.lexer.lineno = 0 self.parser = drel_ast_yacc.parser self.dic = CifFile.CifDic("tests/drel/dic_for_tests.dic",grammar="STAR2") def create_test(self,instring,right_value,varname="_a.b",debug=False): """Given a string, create and call a function then check result""" if instring[-1]!="\n": instring += "\n" # correct termination res = self.parser.parse(instring,debug=debug,lexer=self.lexer) if debug: print("%s\n -> \n%r \n" % (instring, res)) realfunc = py_from_ast.make_python_function(res,"myfunc",varname,have_sn=False, cif_dic=self.dic) if debug: print("-> %s" % realfunc) exec(realfunc,globals()) self.failUnless(myfunc(self) == right_value) def test_multi_assign(self): """ Test that multiple assignments are parsed """ teststrg = """ f = _a.b p = len(f) q = 0 _a.b = 0 """ res = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc",'_a.b',cif_dic=self.dic) print("-> " + realfunc) def test_do_stmt(self): """Test how a do statement comes out""" teststrg = """ _a.b = 0 dummy = 1 do jkl = 0,20,2 { if (dummy == 1) print 'dummy is 1' _a.b = _a.b + jkl } do emm = 1,5 { _a.b = _a.b + emm } """ self.create_test(teststrg,125) def test_do_stmt_2(self): """Test how another do statement comes out with long suite""" teststrg = """ _a.b = 0 geom_hbond = [(1,2),(2,3),(3,4)] do i= 0,1 { l,s = geom_hbond [i] a = 'hello' c = int(4.5) bb = [1,c,a] _a.b += s } """ self.create_test(teststrg,5) def test_if_stmt(self): """test parsing of if statement""" teststrg = """ dmin = 5.0 d1 = 4.0 rad1 = 2.2 radius_bond = 2.0 If (d1(rad1+radius_bond)) _a.b = 5 """ self.create_test(teststrg,5) def test_double_if_stmt(self): """test parsing of if statement""" teststrg = """ dmin = 5.0 d1 = 4.0 rad1 = 2.2 radius_bond = 2.0 If (d1(rad1+radius_bond)) _a.b = 5 if (d1>dmin or d1<(rad1+radius_bond)) _a.b = 11 if (5 > 6 and 6 < 4) _a.b = -2 """ self.create_test(teststrg,11) def test_if_else(self): """Test that else is properly handled""" teststrg = """drp = 'electron' If (drp == "neutron") _a.b = "femtometres" Else If (drp == "electron") _a.b = "volts" Else _a.b = "electrons" """ self.create_test(teststrg,'volts') def test_for_statement(self): """Test for statement with list""" teststrg = """ _a.b = 0 for [c,d] in [[1,2],[3,4],[5,6]] { _a.b += c + 2*d }""" self.create_test(teststrg,33) def test_funcdef(self): """Test function conversion""" teststrg = """ function Closest( v :[Array, Real], # coord vector to be cell translated w :[Array, Real]) { # target vector d = v - w t = Int( Mod( 99.5 + d, 1.0 ) - d ) q = 1 + 1 Closest = [ v+t, t ] } """ res = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc",None, func_def = True) # print "Function -> \n" + realfunc exec(realfunc,globals()) retval = Closest(0.2,0.8,None) print('Closest 0.2,0.8 returns {!r},{!r}'.format(retval[0], retval[1])) self.failUnless(retval == StarList([1.2,1])) class MoreComplexTestCase(unittest.TestCase): def setUp(self): #create our lexer and parser self.lexer = drel_lex.lexer self.lexer.lineno = 0 self.parser = drel_ast_yacc.parser self.dic = CifFile.CifDic("tests/drel/dic_for_tests.dic",grammar="STAR2") def test_nested_stmt(self): """Test how a nested do statement executes""" teststrg = """ total = 0 _a.b = 0 do jkl = 0,20,2 { total = total + jkl do emm = 1,5 { _a.b = _a.b + 1 } } end_of_loop = -25.6 """ res = self.parser.parse(teststrg + "\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False, cif_dic = self.dic) exec(realfunc,globals()) othertotal = myfunc(self) self.failUnless(othertotal==55) def test_complex_if(self): """Test if with single-statement suite""" teststrg = """ setting = 'triclinic' a = 20.0 b = 20.0 c = 20.0 d = 0.01 alp = 90.0 bet = 90.0 gam = 90.0 warn_len = 'Possible mismatch between cell lengths and cell setting' warn_ang = 'Possible mismatch between cell angles and cell setting' If(setting == 'triclinic') { If( Abs(a-b) \n" + realfunc) exec(realfunc,globals()) # atmass = myfunc(self.testblock) print('test value now {!r}'.format(atmass)) self.failUnless(atmass == [120,280,240]) def test_Lists(self): """Test case found in Cif dictionary """ teststrg = """# Store unique sites as a local list atomlist = List() Loop a as atom_site { axyz = a.fract_xyz cxyz = _atom_sites_Cartn_transform.matrix * axyz radb = _atom_type[a.type_symbol].radius_bond radc = _atom_type[a.type_symbol].radius_contact ls = List ( a.label, "1_555" ) atomlist ++= [ls, axyz, cxyz, radb, radc, 0] } _geom_bond.id = atomlist """ loop_cats = {"atom_site":["label",["fract_xyz","type_symbol","label"]], "atom_type":["id",["id","radius_bond","radius_contact"]]} # Add drel functions for deriving items testdic.initialise_drel() res = self.parser.parse(teststrg + "\n",lexer=self.lexer) realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_geom_bond.id",cat_meth=True, loopable=loop_cats,have_sn=False,depends=True,cif_dic=testdic) print('Simple function becomes:') print(realfunc) print('Depends on: {!r}'.format(dependencies)) exec(realfunc,globals()) b = myfunc(self.testblock) print("subscription returns {!r}".format(b)) def test_with_stmt(self): """Test what comes out of a simple flow statement, including multiple with statements""" teststrg = """ with e as exptl with c as cell { x = 22 j = 25 jj = e.crystals_number px = c.length_a _exptl.method = "single-crystal diffraction" }""" loopable_cats = {} #none looped res = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(res,"myfunc","_exptl.method",cif_dic=self.testdic) print("With statement -> \n" + realfunc) exec(realfunc,globals()) # attach dictionary self.testblock.assign_dictionary(self.testdic) newmeth = myfunc(self.testblock) print('exptl method now %s' % newmeth) self.failUnless(newmeth == "single-crystal diffraction") def test_loop_with_stmt_2(self): """Test with statement on a looped category, no aliasing""" teststrg = """ _atom_type.analytical_mass_percent = _atom_type.number_in_cell * 10 """ loopable_cats = {'atom_type':["id",["id",'number_in_cell','test']]} # ast = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.analytical_mass_percent", loopable=loopable_cats, cif_dic=testdic) print("With statement for looped category -> \n" + realfunc) exec(realfunc,globals()) atmass = myfunc(self.testblock) print('test value now {!r}'.format(atmass)) self.failUnless(atmass == [120,280,240]) def test_subscription(self): """Test proper list of dependencies is returned""" teststrg = """ _model_site.symop = _model_site.id [1] """ loopable_cats = {"model_site":["id",["id","symop"]]} res = self.parser.parse(teststrg,lexer=self.lexer) print(repr(res)) realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_model_site.symop", loopable=loopable_cats,depends=True, cif_dic=testdic) print(realfunc, repr(dependencies)) self.failUnless(dependencies == set(['_model_site.id'])) def test_current_row(self): """Test that methods using Current_Row work properly""" teststrg = """ _atom_type.description = Current_Row() + 1 """ loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass','num']]} # ast = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.description",loopable=loopable_cats, cif_dic=testdic) print("Current row statement -> \n" + realfunc) exec(realfunc,globals()) rownums = myfunc(self.testblock) print('row id now {!r}'.format(rownums)) self.failUnless(rownums == [1,2,3]) def test_loop_statement(self): """Test proper processing of loop statements""" teststrg = """ mass = 0. Loop t as atom_type { mass += t.number_in_cell * t.atomic_mass } _cell.atomic_mass = mass """ loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass']]} # ast = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(ast,"myfunc","_cell.atomic_mass",loopable=loopable_cats, cif_dic=testdic) print("Loop statement -> \n" + realfunc) exec(realfunc,globals()) atmass = myfunc(self.testblock) print('atomic mass now %f' % atmass) self.failUnless(atmass == 552.488) def test_complex_f(self): """This calculation failed during testing""" teststrg = """ With r as refln fc = Complex (0., 0.) h = r.hkl Loop a as atom_site { f = a.site_symmetry_multiplicity * a.occupancy * ( r.form_factor_table [a.type_symbol] + _atom_type_scat[a.type_symbol].dispersion ) Loop s as space_group_symop { t = Exp(-h * s.R * a.tensor_beta * s.RT * h) fc += f * t * ExpImag(TwoPi *( h *( s.R * a.fract_xyz + s.T))) } } _refln.F_complex = fc / _space_group.multiplicity """ loopable_cats = {'space_group_symop':["id",["id","R","RT","T"]], 'atom_site':["id",["id","type_symbol","occupancy","site_symmetry_multiplicity", "tensor_beta","fract_xyz"]], 'atom_type_scat':["id",["id","dispersion"]], 'refln':["hkl",["hkl","form_factor_table"]]} # ast = self.parser.parse(teststrg+"\n",lexer=self.lexer) realfunc = py_from_ast.make_python_function(ast,"myfunc","_refln.F_complex",loopable=loopable_cats, cif_dic=testdic) print("Incoming AST: {!r}".format(ast)) print("F_complex statement -> \n" + realfunc) exec(realfunc,globals()) # This one also doesn't return anything sensible yet, just a generation check def test_fancy_packets(self): """Test that full packets can be dealt with properly""" teststrg = """[label,symop] = _model_site.id a = atom_site[label] s = space_group_symop[SymKey(symop)] _model_site.adp_matrix_beta = s.R * a.tensor_beta * s.RT""" loopable = {"model_site":["id",["id"]], "atom_site":["label",["tensor_beta","label"]], "space_group_symop":["id",["id","RT","R"]]} res = self.parser.parse(teststrg + "\n",lexer=self.lexer) realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.adp_matrix_beta", depends = True,have_sn=False, loopable=loopable,cif_dic=testdic) print('model_site.adp_matrix_beta becomes...') print(realfunc) print(deps) self.failUnless('_space_group_symop.RT' in deps) def test_array_access(self): """Test that arrays are converted and returned correctly""" teststrg = """ _model_site.symop = _model_site.id[1] """ loopable = {"model_site":["id",["id","symop","adp_eigen_system"]], "atom_site":["label",["tensor_beta","label"]], "space_group_symop":["id",["id","RT","R"]]} res = self.parser.parse(teststrg + "\n",lexer=self.lexer) realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.symop", depends = True,have_sn=False, loopable=loopable,cif_dic=testdic) print(realfunc) exec(realfunc,globals()) self.testblock.assign_dictionary(testdic) b = myfunc(self.testblock) print('symops are now {!r}'.format(b)) self.failUnless(b[1] == '1_555') def testIfStatement(self): """Test that we handle optional values appropriately""" teststrg = """ with a as atom_site label = a.label if (a.adp_type == "Uani") { Loop b as atom_site_aniso { If(label == b.label) { UIJ = b.matrix_U Break } } } Else If (a.adp_type == 'bani') { Loop b as atom_site_aniso { If(label == b.label) { UIJ = b.matrix_B / (8 * Pi**2) Break } } } Else { If (a.adp_type == 'uiso') U = a.U_iso_or_equiv Else U = a.B_iso_or_equiv / (8 * Pi**2) UIJ = U * _cell.convert_Uiso_to_Uij } _atom_site.tensor_beta = UIJ """ loopable = { "atom_site":["label",["tensor_beta","label"]], "atom_site_aniso":["label",["label","matrix_B","matrix_U"]], } res = self.parser.parse(teststrg + "\n",lexer=self.lexer) realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_atom_site.tensor_beta", depends = True,have_sn=False, loopable=loopable,cif_dic=testdic) funclines = realfunc.splitlines() for n,l in enumerate(funclines): print("%2d:%s"%(n,l)) #print(realfunc) exec(realfunc,globals()) self.testblock.assign_dictionary(testdic) b = myfunc(self.testblock) print('tensor beta is now {!r}'.format(b)) self.failUnless(b[1][1][1] == 0.031) #U22 for O2 if __name__=='__main__': global testdic testdic = CifFile.CifDic("tests/drel/cif_core.dic",grammar="2.0",do_imports='Contents') unittest.main() #suite = unittest.TestLoader().loadTestsFromTestCase(WithDictTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(SimpleCompoundStatementTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(SingleSimpleStatementTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(MoreComplexTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(dRELRuntimeTestCase) #unittest.TextTestRunner(verbosity=2).run(suite) pycifrw-4.4/TestPyCifRW.py000066400000000000000000002416321345362224200155540ustar00rootroot00000000000000# Testing of the PyCif module using the PyUnit framework # # To maximize python3/python2 compatibility # Note that all tests should pass with and without # unicode literals. from __future__ import print_function #from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import import sys,os #sys.path[0] = '.' import unittest import CifFile from CifFile import StarFile from CifFile.StarFile import StarDict, StarList, StarLengthError import re try: from StringIO import StringIO except: from io import StringIO # Test general string and number manipulation functions class BasicUtilitiesTestCase(unittest.TestCase): def testPlainLineFolding(self): """Test that we can fold a line correctly""" test_string = "1234567890123456789012" outstring = CifFile.apply_line_folding(test_string,5,10) out_lines = outstring.split('\n') #print(outstring) self.failUnless(out_lines[0]=="\\") self.failUnless(len(out_lines[1])==10) def testPreWrappedFolding(self): """Test that pre-wrapped lines are untouched""" test_string = "123456789\n012345678\n9012" outstring = CifFile.apply_line_folding(test_string,5,10) self.failUnless(outstring == test_string) def testManyLineEndings(self): """Test that empty lines are handled OK""" test_string = "123456789\n\n012345678\n\n9012\n\n" outstring = CifFile.apply_line_folding(test_string,5,10) self.failUnless(outstring == test_string) def testOptionalBreak(self): """Test that internal whitespace is used to break""" test_string = "123456 7890123 45678\n90 12\n\n" outstring = CifFile.apply_line_folding(test_string,5,10) #print("\n;" + outstring + "\n;") out_lines = outstring.split('\n') self.failUnless(len(out_lines[1]) == 7) def testCorrectEnding(self): """Make sure that no line feeds are added/removed""" test_string = "123456 7890123 45678\n90 12\n\n" outstring = CifFile.apply_line_folding(test_string,5,10) self.failUnless(outstring[-4:] == "12\n\n") def testFoldingRemoval(self): """Test that we round-trip correctly""" test_string = "123456 7890123 45678\n90 12\n\n" outstring = CifFile.apply_line_folding(test_string,5,10) old_string = CifFile.remove_line_folding(outstring) #print("Test:" + repr(test_string)) #print("Fold:" + repr(outstring)) #print("UnFo:" + repr(old_string)) self.failUnless(old_string == test_string) def testTrickyFoldingRemoval(self): """Try to produce a tough string for unfolding""" test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life" outstring = CifFile.apply_line_folding(test_string,5,10) old_string = CifFile.remove_line_folding(outstring) #print("Test:" + repr(test_string)) #print("Fold:" + repr(outstring)) #print("UnFo:" + repr(old_string)) self.failUnless(old_string == test_string) def testTrailingBackslash(self): """Make sure that a trailing backslash is not removed""" test_string = "\n123\\\n 456\\n\n" outstring = CifFile.apply_line_folding(test_string,5,10) old_string = CifFile.remove_line_folding(outstring) #print("Test:" + repr(test_string)) #print("Fold:" + repr(outstring)) #print("UnFo:" + repr(old_string)) self.failUnless(old_string == test_string) def testFinalBackslash(self): """Make sure that a single final backslash is removed when unfolding""" test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life" folded_string = CifFile.apply_line_folding(test_string,5,10) folded_string = folded_string + "\ " old_string = CifFile.remove_line_folding(folded_string) self.failUnless(old_string == test_string) def testAddIndent(self): """Test insertion of a line prefix""" test_string = "\n12345\n678910\n\n" outstring = CifFile.apply_line_prefix(test_string,"abc>") print("Converted %s to %s " %(test_string,outstring)) self.failUnless(outstring == "abc>\\\nabc>\nabc>12345\nabc>678910\nabc>\nabc>") def testRemoveIndent(self): """Test removal of a line prefix""" test_string = "abc>\\\nabc>12345\nabc>678910\nabc>\nabc>" outstring = CifFile.remove_line_prefix(test_string) print("Removed indent: " + repr(outstring)) self.failUnless(outstring == "12345\n678910\n\n") def testReverseIndent(self): """Test reversible indentation of line""" test_string = "12345\n678910\n\n" outstring = CifFile.apply_line_prefix(test_string,"cif><") newtest = CifFile.remove_line_prefix(outstring) print('Before indenting: ' + repr(test_string)) print('After indenting: ' + repr(outstring)) print('After unindent: ' + repr(newtest)) self.failUnless(newtest == test_string) def testPrefixAndFold(self): """Test reversible folding and indenting""" test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life" outstring = CifFile.apply_line_folding(test_string,5,10) indoutstring = CifFile.apply_line_prefix(outstring,"CIF>") newoutstring = CifFile.remove_line_prefix(indoutstring) newtest_string = CifFile.remove_line_folding(newoutstring) print("%s -> %s -> %s -> %s -> %s" % (repr(test_string),repr(outstring),repr(indoutstring),repr(newoutstring),repr(newtest_string))) self.failUnless(newtest_string == test_string) def testStringiness(self): """Check that we can detect string-valued items correctly""" import numpy self.assertEqual(CifFile.check_stringiness(['1','2','3']),True) self.assertEqual(CifFile.check_stringiness([1,2,'3']),False) self.assertEqual(CifFile.check_stringiness(['1',['2',['3',4,'5'],'6','7'],'8']),False) self.assertEqual(CifFile.check_stringiness(['1',['2',['3','4','5'],'6','7'],'8']),True) p = numpy.array([[1,2,3],[4,5,6]]) self.assertEqual(CifFile.check_stringiness(p),False) def testStarList(self): """Test that starlists allow comma-based access""" p = StarList([StarList([1,2,3]),StarList([4,5,6])]) self.failUnless(p[1,0]==4) # Test basic setting and reading of the CifBlock class BlockRWTestCase(unittest.TestCase): def setUp(self): # we want to get a datablock ready so that the test # case will be able to write a single item # self.cf_old = CifFile.CifBlock(compat_mode=True) self.cf = CifFile.CifBlock() def tearDown(self): # get rid of our test object del self.cf def testTupleNumberSet(self): """Test tuple setting with numbers""" self.cf['_test_tuple'] = (11,13.5,-5.6) self.failUnless([float(a) for a in self.cf['_test_tuple']]== [11,13.5,-5.6]) def testTupleComplexSet(self): """DEPRECATED: Test setting multiple names in loop""" names = (('_item_name_1','_item_name#2','_item_%$#3'),) values = (((1,2,3,4),('hello','good_bye','a space','# 4'), (15.462, -99.34,10804,0.0001)),) self.cf.AddCifItem((names,values)) self.failUnless(tuple(map(float, self.cf[names[0][0]])) == values[0][0]) self.failUnless(tuple(self.cf[names[0][1]]) == values[0][1]) self.failUnless(tuple(map(float, self.cf[names[0][2]])) == values[0][2]) def testStringSet(self): """test string setting""" self.cf['_test_string_'] = 'A short string' self.failUnless(self.cf['_test_string_'] == 'A short string') def testTooLongSet(self): """test setting overlong data names""" dataname = '_a_long_long_'*7 try: self.cf[dataname] = 1.0 except (CifFile.StarError,CifFile.CifError): pass else: self.fail() def testTooLongLoopSet(self): """test setting overlong data names in a loop""" dataname = '_a_long_long_'*7 try: self.cf[dataname] = (1.0,2.0,3.0) except (CifFile.StarError,CifFile.CifError): pass else: self.fail() def testBadStringSet(self): """test setting values with bad characters""" dataname = '_name_is_ok' try: self.cf[dataname] = "eca234\f\vaqkadlf" except CifFile.StarError: pass else: self.fail() def testBadNameSet(self): """test setting names with bad characters""" dataname = "_this_is_not ok" try: self.cf[dataname] = "nnn" except CifFile.StarError: pass else: self.fail() def testMoreBadStrings(self): dataname = "_name_is_ok" val = (b"so far, ok, but now we have a " + bytearray([128])).decode('latin_1') try: self.cf[dataname] = val except CifFile.StarError: pass else: self.fail() def testEmptyString(self): """An empty string is, in fact, legal""" self.cf['_an_empty_string'] = '' # Now test operations which require a preexisting block # class BlockChangeTestCase(unittest.TestCase): def setUp(self): self.cf = CifFile.CifBlock() self.names = (('_item_name_1','_item_name#2','_item_%$#3'),) self.values = (((1,2,3,4),('hello','good_bye','a space','# 4'), (15.462, -99.34,10804,0.0001)),) self.cf.AddCifItem((self.names,self.values)) self.cf['_non_loop_item'] = 'Non loop string item' self.cf['_number_item'] = 15.65 self.cf['_planet'] = 'Saturn' self.cf['_satellite'] = 'Titan' self.cf['_rings'] = 'True' def tearDown(self): del self.cf def testFromBlockSet(self): """Test that we can use a CifBlock to set a CifBlock""" df = CifFile.CifFile() df.NewBlock('testname',self.cf) self.assertEqual(df['testname']['_planet'],'Saturn') self.assertEqual(df['testname']['_item_name#2'],list(self.values[0][1])) def testSimpleRemove(self): """Check item deletion outside loop""" self.cf.RemoveCifItem('_non_loop_item') try: a = self.cf['_non_loop_item'] except KeyError: pass else: self.fail() def testLoopRemove(self): """Check item deletion inside loop""" print("Before:\n") print(self.cf.printsection()) self.cf.RemoveCifItem(self.names[0][1]) print("After:\n") print(self.cf.printsection()) try: a = self.cf[self.names[0][1]] except KeyError: pass else: self.fail() def testFullLoopRemove(self): """Check removal of all loop items""" for name in self.names[0]: self.cf.RemoveCifItem(name) self.failUnless(len(self.cf.loops)==0, repr(self.cf.loops)) # test adding data to a loop. We test straight addition, then make sure the errors # happen at the right time # def testAddToLoop(self): """Test adding to a loop""" adddict = {'_address':['1 high street','2 high street','3 high street','4 high st'], '_address2':['Ecuador','Bolivia','Colombia','Mehico']} self.cf.AddToLoop('_item_name#2',adddict) print(self.cf) newkeys = self.cf.GetLoopNames('_item_name#2') self.failUnless(list(adddict.keys())[0] in newkeys) self.assertEqual(len(self.cf['_item_name#2']),len(self.values[0][0])) def testBadAddToLoop(self): """Test incorrect loop addition""" adddict = {'_address':['1 high street','2 high street','3 high street'], '_address2':['Ecuador','Bolivia','Colombia']} try: self.cf.AddToLoop('_no_item',adddict) except KeyError: pass else: self.fail() try: self.cf.AddToLoop('_item_name#2',adddict) except StarLengthError: pass else: self.fail() def testChangeLoop(self): """Test changing pre-existing item in loop""" # Items should be silently replaced self.cf["_item_name_1"] = (5,6,7,8) # # Test the mapping type implementation # def testGetOperation(self): """Test the get mapping call""" self.cf.get("_item_name_1") self.cf.get("_item_name_nonexist") # # Test case insensitivity # def testDataNameCase(self): """Test same name, different case causes error""" self.assertEqual(self.cf["_Item_Name_1"],self.cf["_item_name_1"]) self.cf["_Item_NaMe_1"] = "the quick pewse fox" self.assertEqual(self.cf["_Item_NaMe_1"],self.cf["_item_name_1"]) class SyntaxErrorTestCase(unittest.TestCase): """Check that files with syntax errors are found""" def tearDown(self): try: os.remove("tests/syntax_check.cif") except: pass def testTripleApostropheCase(self): teststrg = "#\#CIF_2.0\ndata_testblock\n _item_1 ''' ''' '''\n" f = open("tests/syntax_check.cif","w") f.write(teststrg) f.close() self.assertRaises(CifFile.StarError, CifFile.ReadCif,"tests/syntax_check.cif",grammar="2.0") def testTripleQuoteCase(self): teststrg = '#\#CIF_2.0\ndata_testblock\n _item_1 """ """ """\n' f = open("tests/syntax_check.cif","w") f.write(teststrg) f.close() self.assertRaises(CifFile.StarError, CifFile.ReadCif,"tests/syntax_check.cif",grammar="2.0") class LoopBlockTestCase(unittest.TestCase): """Check operations on loop blocks""" def setUp(self): self.cf = CifFile.CifBlock() self.names = (('_Item_Name_1','_item_name#2','_item_%$#3'),) self.values = (((1,2,3,4),('hello','good_bye','a space','# 4'), (15.462, -99.34,10804,0.0001)),) self.cf.AddCifItem((self.names,self.values)) self.cf['_non_loop_item'] = 'Non loop string item' self.cf['_number_item'] = 15.65 self.cf['_planet'] = 'Saturn' self.cf['_satellite'] = 'Titan' self.cf['_rings'] = 'True' # A loop with compound keys self.cf['_ck_1'] = ['1','1','1','2','2','2','3','3','3'] self.cf['_Ck_2'] = ['r','g','b','r','g','b','r','g','b'] self.cf['_stuff'] = ['Q','W','E','R','T','Y','U','I','O'] self.cf.CreateLoop(['_ck_1','_ck_2','_stuff']) def tearDown(self): del self.cf def testLoop(self): """Check GetLoop returns values and names in matching order""" results = self.cf.GetLoop(self.names[0][2]) lowernames = [a.lower() for a in self.names[0]] for key in results.keys(): self.failUnless(key.lower() in lowernames) self.failUnless(tuple(results[key]) == self.values[0][lowernames.index(key.lower())]) def testLoopCharCase(self): """Test that upper/lower case names in loops works correctly""" # Note the wildly varying case for these two names self.cf['_item_name_20'] = ['a','b','c','q'] self.cf.AddLoopName('_item_Name_1','_Item_name_20') self.failUnless(self.cf.FindLoop('_Item_name_1')==self.cf.FindLoop('_Item_Name_20')) def testGetLoopCase(self): """Check that getloop works for any case""" results = self.cf.GetLoop('_Item_Name_1') self.assertEqual(results['_item_name_1'][1],2) def testLoopOutputOrder(self): """Check that an item placed in a loop no longer appears in the output order""" self.cf['_item_name_20'] = ['a','b','c','q'] self.cf.AddLoopName('_item_Name_1','_Item_name_20') self.failUnless('_item_name_20' not in self.cf.GetItemOrder()) def testLoopify(self): """Test changing unlooped data to looped data""" self.cf.CreateLoop(["_planet","_satellite","_rings"]) newloop = self.cf.GetLoop("_rings") self.assertFalse(newloop.has_key("_number_item")) def testLoopifyCif(self): """Test changing unlooped data to looped data does not touch already looped data for a CIF file""" # from IPython.Debugger import Tracer; debug_here = Tracer() # debug_here() self.cf.CreateLoop(["_planet","_satellite","_rings"]) newloop = self.cf.GetLoop("_rings") self.assertTrue(newloop.has_key('_planet')) # Test iteration # def testIteration(self): """We create an iterator and iterate""" testloop = self.cf.GetLoop("_item_name_1") i = 0 for test_pack in testloop: self.assertEqual(test_pack._item_name_1,self.values[0][0][i]) self.assertEqual(getattr(test_pack,"_item_name#2"),self.values[0][1][i]) i += 1 def testPacketContents(self): """Test that body of packet is filled in as well""" testloop = self.cf.GetLoop("_item_name_1") it_order = testloop.GetItemOrder() itn_pos = it_order.index("_item_name_1") for test_pack in testloop: print('Test pack: ' + repr(test_pack)) self.assertEqual(test_pack._item_name_1,test_pack[itn_pos]) def testPacketAttr(self): """Test that packets have attributes""" testloop = self.cf.GetLoop("_item_name_1") self.assertEqual(testloop[1]._item_name_1,2) def testKeyPacket(self): """Test that a packet can be returned by key value""" testpack = self.cf.GetKeyedPacket("_item_name_1",2) self.assertEqual("good_bye",getattr(testpack,"_item_name#2")) def testCompoundKeyPacket(self): """Test that a compound key can also be used""" testpack = self.cf.GetCompoundKeyedPacket({"_ck_1":('2',False),"_ck_2":('b',False)}) self.assertEqual("Y",getattr(testpack,"_stuff")) def testPacketMerge(self): """Test that a packet can be merged with another packet""" bigcf = CifFile.CifFile("tests/C13H22O3.cif") bigcf = bigcf["II"] testpack = bigcf.GetKeyedPacket("_atom_site_label","C4A") newpack = bigcf.GetKeyedPacket("_atom_site_aniso_label","C4A") testpack.merge_packet(newpack) self.assertEqual(getattr(testpack,'_atom_site_aniso_U_22'),'0.0312(15)') self.assertEqual(getattr(testpack,'_atom_site_fract_x'),'0.7192(3)') def testRemovePacket(self): """Test that removing a packet works properly""" print('Before packet removal') print(str(self.cf)) testloop = self.cf.GetLoop("_item_name_1") testloop.RemoveKeyedPacket("_item_name_1",3) print('After packet 3 removal:') jj = testloop.GetKeyedPacket("_item_name_1",2) kk = testloop.GetKeyedPacket("_item_name_1",4) self.assertEqual(getattr(jj,"_item_name#2"),"good_bye") self.assertEqual(getattr(kk,"_item_name#2"),"# 4") self.assertRaises(ValueError,testloop.GetKeyedPacket,"_item_name_1",3) print('After packet removal:') print(str(self.cf)) def testAddPacket(self): """Test that we can add a packet""" import copy testloop = self.cf.GetLoop("_item_name_1") workingpacket = copy.copy(testloop.GetPacket(0)) workingpacket._item_name_1 = '5' workingpacket.__setattr__("_item_name#2", 'new' ) testloop.AddPacket(workingpacket) # note we assume that this adds on to the end, which is not # a CIF requirement self.assertEqual(testloop["_item_name_1"][4],'5') self.assertEqual(testloop["_item_name#2"][4],'new') # # Test changing item order # def testChangeOrder(self): """We move some stuff around""" testloop = self.cf.GetLoop("_item_name_1") self.cf.ChangeItemOrder("_Number_Item",0) testloop.ChangeItemOrder("_Item_Name_1",2) self.assertEqual(testloop.GetItemOrder()[2],"_Item_Name_1".lower()) self.assertEqual(self.cf.GetItemOrder()[0],"_Number_Item".lower()) def testGetOrder(self): """Test that the correct order value is returned""" self.assertEqual(self.cf.GetItemPosition("_Number_Item"),(-1,2)) def testReplaceOrder(self): """Test that a replaced item is at the same position it previously held""" testloop = self.cf.GetLoop("_item_name_1") oldpos = testloop.GetItemPosition('_item_name#2') testloop['_item_name#2'] = ("I'm",' a ','little','teapot') self.assertEqual(testloop.GetItemPosition('_item_name#2'),oldpos) def testAddLoopCaseReplaceColumn(self): """Test that a column is correctly replaced by AddToLoop""" newdata = {"_Ck_2":['ho']*9} self.cf.AddToLoop("_ck_1",newdata) # Case problem occurs when outputting print(str(self.cf)) def testAddLoopLCaseReplaceColumn(self): """Duplicates an error where AddToLoop works incorrectly""" newdata = {"_ck_2":['ho']*9} self.cf.AddToLoop("_ck_1",newdata) # The loop list contains the item twice for l in self.cf.loops: assert(len(set(self.cf.loops[l]))==len(self.cf.loops[l])) # # Test setting of block names # class BlockNameTestCase(unittest.TestCase): def testBlockName(self): """Make sure long block names cause errors""" df = CifFile.CifBlock() cf = CifFile.CifFile() try: cf['a_very_long_block_name_which_should_be_rejected_out_of_hand123456789012345678']=df except CifFile.StarError: pass else: self.fail() def testBlockOverwrite(self): """Upper/lower case should be seen as identical""" df = CifFile.CifBlock() ef = CifFile.CifBlock() cf = CifFile.CifFile(standard=None) df['_random_1'] = 'oldval' ef['_random_1'] = 'newval' print('cf.standard is ' + repr(cf.standard)) cf['_lowercaseblock'] = df cf['_LowerCaseBlock'] = ef assert(cf['_Lowercaseblock']['_random_1'] == 'newval') assert(len(cf) == 1) def testEmptyBlock(self): """Test that empty blocks are not the same object""" cf = CifFile.CifFile() cf.NewBlock('first_block') cf.NewBlock('second_block') cf['first_block']['_test1'] = 'abc' cf['second_block']['_test1'] = 'def' self.assertEqual(cf['first_block']['_test1'],'abc') # # Test reading cases # class FileWriteTestCase(unittest.TestCase): def setUp(self): """Write out a file, then read it in again. Non alphabetic ordering to check order preservation and mixed case.""" # fill up the block with stuff items = (('_item_1','Some data'), ('_item_3','34.2332'), ('_item_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'), ('_item_2','Some_underline_data'), ('_item_empty',''), ('_item_quote',"'ABC"), ('_item_apost','"def'), ('_item_sws'," \n "), ('_item_bad_beg',"data_journal"), (('_item_5','_item_7','_item_6'), ([1,2,3,4], ['a','b','c','d'], [5,6,7,8])), (('_string_1','_string_2'), ([';this string begins with a semicolon', 'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough', ';just_any_old_semicolon-starting-string'], ['a string with a final quote"', 'a string with a " and a safe\';', 'a string with a final \'']))) # save block items as well s_items = (('_sitem_1','Some save data'), ('_sitem_2','Some_underline_data'), ('_sitem_3','34.2332'), ('_sitem_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'), (('_sitem_5','_sitem_6','_sitem_7'), ([1,2,3,4], [5,6,7,8], ['a','b','c','d'])), (('_string_1','_string_2'), ([';this string begins with a semicolon', 'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough', ';just_any_old_semicolon-starting-string'], ['a string with a final quote"', 'a string with a " and a safe\';', 'a string with a final \'']))) self.cf = CifFile.CifBlock(items) cif = CifFile.CifFile(scoping='dictionary',maxoutlength=80) cif['Testblock'] = self.cf # Add some comments self.save_block = CifFile.CifBlock(s_items) cif.NewBlock("test_Save_frame",self.save_block,parent='testblock') self.cfs = cif["test_save_frame"] outfile = open('tests/test.cif','w') outfile.write(str(cif)) outfile.close() self.ef = CifFile.CifFile('tests/test.cif',scoping='dictionary') self.df = self.ef['testblock'] self.dfs = self.ef["test_save_frame"] flfile = CifFile.ReadCif('tests/test.cif',scantype="flex",scoping='dictionary') # test passing a stream directly tstream = open('tests/test.cif') CifFile.CifFile(tstream,scantype="flex") CifFile.ReadCif(tstream,scantype="flex") #different code path self.flf = flfile['testblock'] self.flfs = flfile["Test_save_frame"] def tearDown(self): try: os.remove('tests/test.cif') os.remove('tests/test2.cif') except: pass del self.dfs del self.df del self.cf del self.ef del self.flf del self.flfs def testStringInOut(self): """Test writing short strings in and out""" self.failUnless(self.cf['_item_1']==self.df['_item_1']) self.failUnless(self.cf['_item_2']==self.df['_item_2']) self.failUnless(self.cfs['_sitem_1']==self.dfs['_sitem_1']) self.failUnless(self.cfs['_sitem_2']==self.dfs['_sitem_2']) self.failUnless(self.cfs['_sitem_1']==self.flfs['_sitem_1']) self.failUnless(self.cfs['_sitem_2']==self.flfs['_sitem_2']) def testApostropheInOut(self): """Test correct behaviour for values starting with apostrophes or quotation marks""" self.failUnless(self.cf['_item_quote']==self.df['_item_quote']) self.failUnless(self.cf['_item_apost']==self.df['_item_apost']) self.failUnless(self.cf['_item_quote']==self.flf['_item_quote']) self.failUnless(self.cf['_item_apost']==self.flf['_item_apost']) def testNumberInOut(self): """Test writing number in and out""" self.failUnless(self.cf['_item_3']==(self.df['_item_3'])) self.failUnless(self.cfs['_sitem_3']==(self.dfs['_sitem_3'])) self.failUnless(self.cf['_item_3']==(self.flf['_item_3'])) self.failUnless(self.cfs['_sitem_3']==(self.flfs['_sitem_3'])) def testLongStringInOut(self): """Test writing long string in and out Note that whitespace may vary due to carriage returns, so we remove all returns before comparing""" import re compstring = re.sub('\n','',self.df['_item_4']) self.failUnless(compstring == self.cf['_item_4']) compstring = re.sub('\n','',self.dfs['_sitem_4']) self.failUnless(compstring == self.cfs['_sitem_4']) compstring = re.sub('\n','',self.flf['_item_4']) self.failUnless(compstring == self.cf['_item_4']) compstring = re.sub('\n','',self.flfs['_sitem_4']) self.failUnless(compstring == self.cfs['_sitem_4']) def testEmptyStringInOut(self): """An empty string is in fact kosher""" self.failUnless(self.cf['_item_empty']=='') self.failUnless(self.flf['_item_empty']=='') def testSemiWhiteSpace(self): """Test that white space in a semicolon string is preserved""" self.failUnless(self.cf['_item_sws']==self.df['_item_sws']) self.failUnless(self.cf['_item_sws']==self.flf['_item_sws']) def testLoopDataInOut(self): """Test writing in and out loop data""" olditems = self.cf.GetLoop('_item_5') for key,value in olditems.items(): self.failUnless(tuple(map(str,value))==tuple(self.df[key])) self.failUnless(tuple(map(str,value))==tuple(self.flf[key])) # save frame test olditems = self.cfs.GetLoop('_sitem_5').items() for key,value in olditems: self.failUnless(tuple(map(str,value))==tuple(self.dfs[key])) self.failUnless(tuple(map(str,value))==tuple(self.flfs[key])) def testLoopStringInOut(self): """Test writing in and out string loop data""" olditems = self.cf.GetLoop('_string_1') newitems = self.df.GetLoop('_string_1') flexnewitems = self.flf.GetLoop('_string_1') for key,value in olditems.items(): compstringa = [re.sub('\n','',a) for a in value] compstringb = [re.sub('\n','',a) for a in self.df[key]] compstringc = [re.sub('\n','',a) for a in self.flf[key]] self.failUnless(compstringa==compstringb and compstringa==compstringc) def testGetLoopData(self): """Test the get method for looped data""" newvals = self.df.get('_string_1') self.failUnless(len(newvals)==3) def testCopySaveFrame(self): """Early implementations didn't copy the save frame properly""" jj = CifFile.CifFile(self.ef,scoping='dictionary') #this will trigger a copy self.failUnless(len(jj["test_save_frame"])>0) def testFirstBlock(self): """Test that first_block returns a block""" self.ef.scoping = 'instance' #otherwise all blocks are available jj = self.ef.first_block() self.failUnless(jj==self.df) def testWrongLoop(self): """Test derived from error observed during dREL testing""" teststrg = """data_test loop_ _atom_type.symbol _atom_type.oxidation_number _atom_type.atomic_mass _atom_type.number_in_cell O ? 15.999 12 C ? 12.011 28 H ? 1.008 24 """ q = open("tests/test2.cif","w") q.write(teststrg) q.close() testcif = CifFile.CifFile("tests/test2.cif").first_block() self.failUnless(testcif['_atom_type.symbol']==['O','C','H']) def testDupName(self): """Test that duplicate blocknames are allowed in non-standard mode""" outstr = """data_block1 _data_1 b save_ab1 _data_2 c save_ save_ab1 _data_3 d save_""" b = open("tests/test2.cif","w") b.write(outstr) b.close() testin = CifFile.CifFile("tests/test2.cif",standard=None) def testPrefixProtocol(self): """Test that pathological strings round-trip correctly""" cif_as_text = open('tests/test.cif','r').read() bf = CifFile.CifFile(maxoutlength=80) bb = CifFile.CifBlock() bb['_data_embedded'] = cif_as_text bf['tough_one'] = bb out_f = open('tests/embedded.cif','w') out_f.write(str(bf)) out_f.close() in_emb = CifFile.CifFile('tests/embedded.cif',grammar='2.0') self.assertEqual(in_emb['tough_one']['_data_embedded'],cif_as_text) def testBadBeginning(self): """Test that strings with forbidden beginnings round-trip OK""" self.failUnless(self.cf['_item_bad_beg']==self.df['_item_bad_beg']) def testStrayCharacter(self): """Test that CIF1 fails with non-ASCII characters""" outstr = b"""data_block1 _normal_str 'hello sunshine' _latin1_str abc\xB0efgh""" b = open("tests/test3_latin1.cif","wb") b.write(outstr) b.close() try: testin = CifFile.CifFile("tests/test3_latin1.cif",grammar="1.0",permissive=False) except CifFile.StarError: pass def testPermissiveRead(self): """Test that stray latin-1 characters are accepted in permissive mode""" outstr = b"""data_block1 _normal_str 'hello sunshine' _latin1_str abc\xB0efgh""" b = open("tests/test3_latin1.cif","wb") b.write(outstr) b.close() testin = CifFile.CifFile("tests/test3_latin1.cif",grammar="1.0",permissive=True) def testItemChange(self): """Test that an item from in input file can be changed""" self.flf['_item_quote']= '2.3' self.failUnless(self.flf['_item_quote']=='2.3') class SimpleWriteTestCase(unittest.TestCase): def setUp(self): self.bf = CifFile.CifBlock() self.cf = CifFile.CifFile() self.cf['testblock'] = self.bf self.testfile = "tests/test_3.cif" def tearDown(self): os.remove(self.testfile) def testNumpyArray(self): """Check that an array can be output properly""" import numpy vector = numpy.array([1,2,3]) self.bf['_a_vector'] = vector open(self.testfile,"w").write(self.cf.WriteOut()) df = CifFile.CifFile(self.testfile,grammar="auto").first_block() print('vector is ' + repr(df['_a_vector'])) self.failUnless(df['_a_vector'] == ['1','2','3']) def testNumpyLoop(self): """Check that an array in a loop can be output properly""" import numpy vector_list = [numpy.array([1,2,3]),numpy.array([11,12,13]),numpy.array([-1.0,1.0,0.0])] self.bf['_a_vector'] = vector_list self.bf.CreateLoop(["_a_vector"]) open(self.testfile,"w").write(self.cf.WriteOut()) df = CifFile.CifFile(self.testfile,grammar="auto").first_block() print('vector is ' + repr(df['_a_vector'])) self.failUnless(df['_a_vector'][2] == ['-1.0','1.0','0.0']) class TemplateTestCase(unittest.TestCase): def setUp(self): """Create a template""" template_string = """#\#CIF_2.0 # Template # data_TEST_DIC _dictionary.title DDL_DIC _definition.update 2011-07-27 _description.text ; This dictionary specifies through its layout how we desire to format datanames. It is not a valid dictionary, but it must be a valid CIF file. ; _name.category_id blahblah _name.object_id ALIAS _category.key_id '_alias.definition_id' _category.key_list ['_alias.definition_id'] _type.purpose Key _type.dimension [*] _import.get [{"file":'templ_enum.cif' "save":'units_code'}] loop_ _enumeration_set.state _enumeration_set.detail Dictionary "applies to all defined items in the dictionary" Category "applies to all defined items in the category" Item "applies to a single item definition" _enumeration.default Item """ f = open("tests/cif_template.cif","w") f.write(template_string) f.close() def tearDown(self): try: os.remove("tests/cif_template.cif") os.remove("tests/temp_test_file.cif") os.remove("tests/temp_test_file_new.cif") except: pass def testTemplateInput(self): """Test that an output template is successfully input""" p = CifFile.CifFile() p.SetTemplate("tests/cif_template.cif") #print(p.master_template) self.failUnless(p.master_template[0]['dataname']=='_dictionary.title') self.failUnless(p.master_template[5]['column']==31) self.failUnless(p.master_template[2]['delimiter']=='\n;') self.failUnless(p.master_template[11]['column']==11) self.failUnless(p.master_template[12]['delimiter']=='"') self.failUnless(p.master_template[2]['reformat']==True) self.failUnless(p.master_template[2]['reformat_indent']==5) def testTemplateOutputOrder(self): """Test that items are output in the correct order""" test_file = """## data_test _enumeration.default Item _name.object_id ALIAS _crazy_dummy_dataname 'whahey look at me' loop_ _enumeration_set.detail _enumeration_set.state _enumeration_set.dummy 'applies to all' dictionary 0 'cat only' category 1 'whatever' item 2 _name.category_id blahblah _description.text ;a nice long string that we would like to be formatted really nicely with an appropriate indent and so forth. Note that the template specifies an indent of 5 characters for this particular data item, and we shouldn't have more than two spaces in a row if we want it to work properly. ; """ f = open("tests/temp_test_file.cif","w") f.write(test_file) f.close() p = CifFile.CifFile("tests/temp_test_file.cif") p.SetTemplate("tests/cif_template.cif") f = open("tests/temp_test_file_new.cif","w") f.write(str(p)) f.close() # now read as new file g = CifFile.CifFile("tests/temp_test_file_new.cif").first_block() self.assertEqual(g.item_order[1],'_name.category_id') self.assertEqual(g.loops[1][-1],'_enumeration_set.dummy') self.assertEqual(g.loops[1][0],'_enumeration_set.state') self.assertEqual(g.item_order[-1],'_crazy_dummy_dataname') def testStringInput(self): """Test that it works when passed a stringIO object""" s = open("tests/cif_template.cif","r").read() ss = StringIO(s) p = CifFile.CifFile() p.SetTemplate(ss) self.failUnless(p.master_template[12]['delimiter']=='"') # TODO: check position in loop packets # TODO: check delimiters ###### template tests ##### ############################################################## # # Test alternative grammars (1.0, 2.0, STAR2) # ############################################################## class GrammarTestCase(unittest.TestCase): def setUp(self): """Write out a file, then read it in again.""" teststr1_0 = """ #A test CIF file, grammar version 1.0 conformant data_Test _item_1 'A simple item' _item_2 '(Bracket always ok in quotes)' _item_3 [can_have_bracket_here_if_1.0] """ f = open("tests/test_1.0","w") f.write(teststr1_0) f.close() teststr2_0 = """#\#CIF_2.0 data_Test _item_1 ['a' 'b' 'c' 'd'] _item_2 'ordinary string' _item_3 {'a':2 'b':3} """ f = open("tests/test_2.0","w") f.write(teststr2_0) f.close() teststr_st = """ data_Test _item_1 ['a' , 'b' , 'c' , 'd'] _item_2 'ordinary string' _item_3 {'a':2 , 'b':3} """ f = open("tests/test_star","w") f.write(teststr_st) f.close() def tearDown(self): try: os.remove("tests/test_star") os.remove("tests/test_2.0") os.remove("tests/test_1.0") except: pass def testold(self): """Read in 1.0 conformant file; should not fail""" f = CifFile.ReadCif("tests/test_1.0",grammar="1.0") self.assertEqual(f["test"]["_item_3"],'[can_have_bracket_here_if_1.0]') def testNew(self): """Read in a 1.0 conformant file with 1.1 grammar; should fail""" try: f = CifFile.ReadCif("tests/test_1.0",grammar="1.1") except CifFile.StarError: pass def testCIF2(self): """Read in a 2.0 conformant file""" f = CifFile.ReadCif("tests/test_2.0",grammar="2.0") self.assertEqual(f["test"]["_item_3"]['b'],'3') def testSTAR2(self): """Read in a STAR2 conformant file""" f = CifFile.ReadCif("tests/test_star",grammar="STAR2") self.assertEqual(f["test"]["_item_3"]['b'],'3') def testAuto(self): """Test that grammar is auto-detected""" f = CifFile.CifFile("tests/test_1.0",grammar="auto") self.assertEqual(f["test"]["_item_3"],'[can_have_bracket_here_if_1.0]') h = CifFile.CifFile("tests/test_2.0",grammar="auto") self.assertEqual(h["test"]["_item_1"],StarList(['a','b','c','d'])) def testFlexCIF2(self): """Test that CIF2 grammar is detected with flex tokenizer""" f = CifFile.CifFile("tests/test_2.0",grammar="2.0",scantype="flex") self.assertEqual(f["test"]["_item_3"]['b'],'3') def testFlexSTAR2(self): """Read in a STAR2 conformant file with flex scanner""" f = CifFile.ReadCif("tests/test_star",grammar="STAR2",scantype="flex") self.assertEqual(f["test"]["_item_3"]['b'],'3') def testRoundTrip(self): """Read in STAR2, write out CIF2, read in and check """ f = CifFile.ReadCif("tests/test_star",grammar="STAR2") g = open("tests/star_to_cif2","w") f.set_grammar("2.0") g.write(str(f)) g.close() h = CifFile.ReadCif("tests/star_to_cif2",grammar="2.0") self.assertEqual(f["test"]["_item_3"],h["test"]["_item_3"]) class ParentChildTestCase(unittest.TestCase): def setUp(self): """Write out a multi-save-frame file, read in again""" outstring = """ data_Toplevel _item_1 a save_1 _s1_item1 b save_12 _s12_item1 c save_ save_13 _s13_item1 d save_ save_ _item_2 e save_2 _s2_item1 f save_21 _s21_item1 g save_211 _s211_item1 h save_ save_212 _s212_item1 i save_ save_ save_22 _s22_item1 j save_ save_ save_toplevel _item_1 k save_ """ f = open('tests/save_test.cif','w') f.write(outstring) f.close() self.testcif = CifFile.CifFile('tests/save_test.cif',scoping='dictionary') def testGoodRead(self): """Check that there is a top level block""" self.failUnless('toplevel+' in [a[0] for a in self.testcif.child_table.items() if a[1].parent is None]) self.failUnless(self.testcif.child_table['toplevel'].parent == 'toplevel+') def testGetParent(self): """Check that parent is correctly identified""" self.failUnless(self.testcif.get_parent('212')=='21') self.failUnless(self.testcif.get_parent('12')=='1') def testGetChildren(self): """Test that our child blocks are constructed correctly""" p = self.testcif.get_children('1') self.failUnless(p.has_key('13')) self.failUnless(not p.has_key('1')) self.failUnless(p.get_parent('13')==None) self.failUnless(p['12']['_s12_item1']=='c') def testGetChildrenwithParent(self): """Test that the parent is included if necessary""" p = self.testcif.get_children('1',include_parent=True) self.failUnless(p.has_key('1')) self.failUnless(p.get_parent('13')=='1') def testSetParent(self): """Test that the parent is correctly set""" self.testcif.set_parent('1','211') q = self.testcif.get_children('1') self.failUnless('211' in q.keys()) def testChangeParent(self): """Test that a duplicated save frame is OK if the duplicate name is a data block""" self.failUnless('toplevel+' in self.testcif.keys()) self.failUnless(self.testcif.get_parent('1')=='toplevel+') def testRename1(self): """Test that re-identifying a datablock works""" self.testcif._rekey('2','timey-wimey') self.failUnless(self.testcif.get_parent('21')=='timey-wimey') self.failUnless(self.testcif.has_key('timey-wimey')) self.failUnless(self.testcif['timey-wimey']['_s2_item1']=='f') print(str(self.testcif)) def testRename2(self): """Test that renamng a block works""" self.testcif.rename('2','Timey-wimey') self.failUnless(self.testcif.has_key('timey-wimey')) self.failUnless(self.testcif.child_table['timey-wimey'].block_id=='Timey-wimey') def testUnlock(self): """Test that unlocking will change overwrite flag""" self.testcif['2'].overwrite = False self.testcif.unlock() self.failUnless(self.testcif['2'].overwrite is True) class DDLmTestCase(unittest.TestCase): def setUp(self): """Write out a file, then read it in again.""" teststr1_2 = """ #A test CIF file, grammar version 1.2 nonconformant data_Test _item_1 'A simple item' _item_2 '(Bracket always ok in quotes)' _item_3 (can_have_bracket_here_if_1.2) _item_4 This_is_so_wrong?*~ """ goodstr1_2 = """ #A test CIF file, grammar version 1.2 conformant with nested save frames data_Test _name.category_id CIF_DIC _name.object_id CIF_CORE _import.get [{"save":'EXPERIMENTAL', "file":'core_exptl.dic', "mode":'full' }, {"save":'DIFFRACTION', "file":'core_diffr.dic', "mode":'full' }, {"save":'STRUCTURE', "file":'core_struc.dic', "mode":'full' }, {"save":'MODEL', "file":'core_model.dic', "mode":'full' }, {"save":'PUBLICATION', "file":'core_publn.dic', "mode":'full' }, {"save":'FUNCTION', "file":'core_funct.dic', "mode":'full' }] save_Savelevel1 _item_in_save [1,2,3,4] save_saveLevel2 _item_in_inside_save {"hello":"goodbye","e":"mc2"} save_ save_ _test.1 {"piffle":poffle,"wiffle":3,'''woffle''':9.2} _test_2 {"ping":[1,2,3,4],"pong":[a,b,c,d]} _test_3 {"ppp":{'qqq':2,'poke':{'joke':[5,6,7],'jike':[{'aa':bb,'cc':dd},{'ee':ff,"gg":100}]}},"rrr":[11,12,13]} _triple_quote_test '''The comma is ok if, the quotes are ok''' _underscore_test underscores_are_allowed_inside_text """ f = open("tests/test_1.2","w") f.write(teststr1_2) f.close() f = open("tests/goodtest_1.2","w") f.write(goodstr1_2) f.close() def tearDown(self): try: os.remove("tests/test_1.2") os.remove("tests/goodtest_1.2") os.remove("tests/newgoodtest_1.2.cif") os.remove("tests/cif2goodtest_1.2.cif") except: pass def testold(self): """Read in 1.2 nonconformant file; should fail""" try: f = CifFile.ReadCif("tests/test_1.2",grammar="STAR2") except CifFile.StarError: pass def testgood(self): """Read in 1.2 conformant file: should succeed""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") def testTables(self): """Test that DDLm tables are properly parsed""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") self.failUnless(f["test"]["_test.1"]["wiffle"] == '3') def testTables2(self): """Test that a plain table is properly parsed""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") self.failUnless(f["test"]["_import.get"][0]["file"] == 'core_exptl.dic') def testTables3(self): """Test that a nested structure is properly parsed""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") self.failUnless(f["test"]["_test_3"]["ppp"]["poke"]["jike"][1]["gg"]=='100') def testTripleQuote(self): """Test that triple quoted values are treated correctly""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") print(f["test"]["_triple_quote_test"]) self.failUnless(f["test"]["_triple_quote_test"][:9] == 'The comma') def testRoundTrip(self): """Test that a DDLm file can be read in, written out and read in again""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") g = open("tests/newgoodtest_1.2.cif","w") g.write(str(f)) g.close() h = CifFile.ReadCif("tests/newgoodtest_1.2.cif",grammar="STAR2") #print(h['Test']) #print(h['Test']['_import.get']) #print(h['Test']['_import.get'][2]) #print(h['Test']['_import.get'][2]['file']) self.failUnless(h['Test']['_import.get'][2]['file']=='core_struc.dic') def testUnNest(self): """Test that we can convert a nested save frame STAR2 file to a non-nested file""" f = CifFile.ReadCif("tests/goodtest_1.2",grammar="STAR2") g = open("tests/cif2goodtest_1.2.cif","w") f.set_grammar("2.0") g.write(str(f)) h = CifFile.ReadCif("tests/cif2goodtest_1.2.cif") ########## # # Test DDLm imports # ########## class DDLmImportCase(unittest.TestCase): def setUp(self): self.testdic = CifFile.CifDic("dictionaries/cif_core_ddlm.dic",grammar="2.0",do_dREL=False) self.lightdic = CifFile.CifDic("dictionaries/cif_core_ddlm.dic",grammar="2.0", do_imports='contents',heavy=False,do_dREL=False) def testHeavyImport(self): """Test that enumerated types were imported correctly""" pp = self.testdic['_atom_type.radius_bond'] self.failUnless(pp.has_key('_enumeration_default.index')) c_pos = pp['_enumeration_default.index'].index('C') self.assertEqual(pp['_enumeration_default.value'][c_pos],'0.77') def testLightImport(self): """Test that light importation succeeds""" pp = self.lightdic['_atom_type.radius_bond'] self.failUnless(pp.has_key('_enumeration_default.index')) c_pos = pp['_enumeration_default.index'].index('C') self.assertEqual(pp['_enumeration_default.value'][c_pos],'0.77') def testLightFullImport(self): """Test that we properly import nested dictionaries""" nested_dic = CifFile.CifDic("dictionaries/cif_nested.dic",grammar="2.0", do_imports='Full',do_dREL=False,heavy=False) self.failUnless(nested_dic['_refine_diff.density_max']['_enumeration.range']=='-100.:') self.failUnless('_pd_refln.phase_id' in nested_dic['refln']['_category_key.name']) ############################################################## # # Test dictionary type # ############################################################## #ddl1dic = CifFile.CifDic("dictionaries/cif_core.dic",scantype="flex",do_minimum=True) class DictTestCase(unittest.TestCase): def setUp(self): self.ddldic = CifFile.CifDic("tests/ddl.dic",grammar='2.0',scoping='dictionary',do_minimum=True) #small DDLm dictionary def tearDown(self): try: os.remove("test_dic_write.cif") except: pass def testnum_and_esd(self): """Test conversion of numbers with esds""" testnums = ["5.65","-76.24(3)","8(2)","6.24(3)e3","55.2(2)d4"] res = [CifFile.get_number_with_esd(a) for a in testnums] print(repr(res)) self.failUnless(res[0]==(5.65,None)) self.failUnless(res[1]==(-76.24,0.03)) self.failUnless(res[2]==(8,2)) self.failUnless(res[3]==(6240,30)) self.failUnless(res[4]==(552000,2000)) def testdot(self): """Make sure a single dot is skipped""" res1,res2 = CifFile.get_number_with_esd(".") self.failUnless(res1==None) def testCategoryRename(self): """Test that renaming a category works correctly""" self.ddldic.change_category_name('Description','Opisanie') self.failUnless(self.ddldic.has_key('opisanie')) self.failUnless(self.ddldic['opisanie']['_name.object_id']=='Opisanie') self.failUnless(self.ddldic.has_key('opisanie.text')) self.failUnless(self.ddldic['opisanie.text']['_name.category_id']=='Opisanie') self.failUnless(self.ddldic['opisanie.text']['_definition.id']=='_Opisanie.text') self.failUnless(self.ddldic.has_key('description_example')) def testChangeItemCategory(self): """Test that changing an item's category works""" self.ddldic.change_category('_description.common','type') self.failUnless('_type.common' in self.ddldic) self.failUnless('_description.common' not in self.ddldic) self.failUnless(self.ddldic['_type.common']['_name.category_id'].lower()=='type') self.failUnless(self.ddldic.get_parent('_type.common')=='type') def testChangeCategoryCategory(self): """Test that changing a category's category works""" self.ddldic.change_category('description_example','attributes') self.failUnless(self.ddldic['description_example']['_name.category_id'].lower()=='attributes') self.failUnless(self.ddldic.get_parent('description_example')=='attributes') def testChangeName(self): """Test that changing the object_id works""" self.ddldic.change_name('_description.common','uncommon') self.failUnless('_description.uncommon' in self.ddldic) self.failUnless('_description.common' not in self.ddldic) self.failUnless(self.ddldic['_description.uncommon']['_name.object_id']=='uncommon') self.failUnless(self.ddldic['_description.uncommon']['_definition.id']=='_description.uncommon') def testNewCategory(self): """Test that we can add a new category""" self.ddldic.add_category('brand-new') self.failUnless('brand-new' in self.ddldic) self.failUnless(self.ddldic['brand-new']['_name.object_id']=='brand-new') self.failUnless(self.ddldic.get_parent('brand-new').lower()=='ddl_dic') self.failUnless(self.ddldic['brand-new']['_name.category_id'].lower()=='attributes') def testNewDefinition(self): """Test that we can add a new definition""" realname = self.ddldic.add_definition('_junkety._junkjunk','description') print('Real name for new definition is %s' % realname) self.failUnless('_description.junkjunk' in self.ddldic) self.failUnless(self.ddldic['_description.junkjunk']['_name.category_id'].lower()=='description') self.failUnless(self.ddldic['_description.junkjunk']['_name.object_id']=='junkjunk') self.failUnless(self.ddldic['_description.junkjunk']['_definition.id']=='_description.junkjunk') def testNewDanglerDef(self): """Test that we can add a new definition with external category""" self.ddldic.add_definition('_junkety._junkjunk','external_cat',allow_dangler=True) self.failUnless('_external_cat.junkjunk' in self.ddldic) self.failUnless(self.ddldic['_external_cat.junkjunk']['_name.category_id'].lower()=='external_cat') self.failUnless(self.ddldic['_external_cat.junkjunk']['_name.object_id']=='junkjunk') self.failUnless(self.ddldic['_external_cat.junkjunk']['_definition.id']=='_external_cat.junkjunk') def testNewDanglerCat(self): """Test that we can add a new category with external parent""" self.ddldic.add_category('internal_cat','atom_site',allow_dangler=True) self.failUnless('internal_cat' in self.ddldic) self.failUnless(self.ddldic['internal_cat']['_name.object_id']=='internal_cat') self.failUnless(self.ddldic.get_parent('internal_cat').lower()=='ddl_dic') self.failUnless(self.ddldic['internal_cat']['_name.category_id'].lower()=='atom_site') def testDeleteDefinition(self): """Test that we can delete a definition""" self.ddldic.remove_definition('_alias.deprecation_date') self.failUnless('_alias.deprecation_date' not in self.ddldic) def testDeleteCategory(self): """test that we can delete whole categories""" self.ddldic.remove_definition('description') self.failUnless('description' not in self.ddldic) self.failUnless('description_example' not in self.ddldic) def testWriteDic(self): """Test that we can write a dictionary after adding a category""" self.ddldic.add_definition('_junkety._junkjunk_','description') self.ddldic.set_grammar('2.0') final_str = str(self.ddldic) #should not fail cwd = os.getcwd() ffurl = os.path.join(cwd,"tests/test_dic_write.cif") ff = open(ffurl,"w") ff.write(final_str) ff.close() incif = CifFile.CifDic("file:"+ffurl,grammar='2.0',do_minimum=True) self.failUnless(incif.has_key('_description.junkjunk')) def testSemanticChildren(self): """Test that we can obtain the semantic children of a category""" children = self.ddldic.ddlm_immediate_children('enumeration_set') self.failUnless('_enumeration_set.xref_dictionary' in children) children = self.ddldic.ddlm_immediate_children('enumeration') self.failUnless('enumeration_set' in children) def testDanglers(self): """Test that we correctly locate missing categories""" self.ddldic['_description.text'].overwrite = True self.ddldic['_description.text']['_name.category_id'] = 'NNN' p = self.ddldic.ddlm_danglers() self.failUnless('_description.text' in p) self.failUnless('ddl_dic' not in p) self.failUnless('attributes' not in p) def testAllChildren(self): """Test that we can pick up all children""" children = self.ddldic.ddlm_all_children('description') self.failUnless('_description_example.detail' in children) def testDanglerChildren(self): """Test that danglers are found when outputting""" self.ddldic.add_definition('_junkety._junkjunk','external_cat',allow_dangler=True) self.ddldic.add_category('some_other_cat','atom_site',allow_dangler=True) self.ddldic.add_definition('_xxx.more_junk','some_other_cat') names = self.ddldic.get_full_child_list() self.failUnless('some_other_cat' in names) self.failUnless('_external_cat.junkjunk' in names) self.failUnless('_some_other_cat.more_junk' in names) def testFunnyLayout(self): """Test that having some of the data block at the end is OK""" good_read = CifFile.CifDic("tests/ddl_rearranged.dic",grammar="2.0",scoping="dictionary",do_minimum=True) # now for some value testing class DDLmValueTestCase(unittest.TestCase): def setUp(self): filedata = """ data_testblock _float.value 4.2 _hex.value 0xA2 _list1.value [1.2, 2.3, 4.5] _list2.value [['i',4.2],['j',1.5],['lmnop',-4.5]] _matrix.value [[1,2,3],[4,5,6],[7,8,9]] """ p = open('tests/ddlm_testdata','w') p.write(filedata) p.close() self.testblock = CifFile.CifFile('tests/ddlm_testdata',grammar="STAR2")['testblock'] def tearDown(self): os.remove("tests/ddlm_testdata") def testTypeInterpretation(self): """Test that we decode DDLm type.contents correctly""" import CifFile.TypeContentsParser as t p = t.TypeParser(t.TypeParserScanner('List(Real,Real,Real)')) q = getattr(p,"input")() print(repr(q)) self.failUnless(q == ['Real','Real','Real']) p = t.TypeParser(t.TypeParserScanner('List(Real,List(Integer,Real),Real)')) q = getattr(p,"input")() print(repr(q)) self.failUnless(q == ['Real',['Integer','Real'],'Real']) def testSingleConversion(self): namedef = CifFile.CifBlock() namedef['_type.container'] = 'Single' namedef['_type.contents'] = 'Real' result = CifFile.convert_type(namedef)(self.testblock['_float.value']) self.failUnless(result == 4.2) def testListConversion(self): namedef = CifFile.CifBlock() namedef['_type.container'] = 'List' namedef['_type.contents'] = 'List(Text,Real)' namedef['_type.dimension'] = CifFile.StarList([3]) result = CifFile.convert_type(namedef)(self.testblock['_list2.value']) print('Result: ' + repr(result)) self.failUnless(result == [['i',4.2],['j',1.5],['lmnop',-4.5]]) def testSimpleListConversion(self): namedef = CifFile.CifBlock() namedef['_type.container'] = 'List' namedef['_type.contents'] = 'Real' namedef['_type.dimension'] = CifFile.StarList([3]) result = CifFile.convert_type(namedef)(self.testblock['_list1.value']) self.assertEqual(result, [1.2, 2.3, 4.5]) def testMatrixConversion(self): namedef = CifFile.CifBlock() namedef['_type.container'] = 'Matrix' namedef['_type.contents'] = 'Integer' result = CifFile.convert_type(namedef)(self.testblock['_matrix.value']) self.failUnless(result[1][2] == 6) def testValuesReturned(self): """Test that values are returned transparently converted when a dictionary is supplied""" pass ############################################################## # # Validation testing # ############################################################## # We first test single item checking class DDL1TestCase(unittest.TestCase): def setUp(self): self.ddl1dic = CifFile.CifDic("dictionaries/cif_core.dic") #items = (("_atom_site_label","S1"), # ("_atom_site_fract_x","0.74799(9)"), # ("_atom_site_adp_type","Umpe"), # ("_this_is_not_in_dict","not here")) bl = CifFile.CifBlock() self.cf = CifFile.ValidCifFile(dic=self.ddl1dic) self.cf["test_block"] = bl self.cf["test_block"].AddCifItem(("_atom_site_label", ["C1","Cr2","H3","U4"])) def tearDown(self): del self.cf def testItemType(self): """Test that types are correctly checked and reported""" #numbers self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75" try: self.cf["test_block"]["_diffrn_radiation_wavelength"] = "moly" except CifFile.ValidCifError: pass else: self.fail() def testItemEsd(self): """Test that non-esd items are not allowed with esds""" #numbers try: self.cf["test_block"]["_chemical_melting_point_gt"] = "1325(6)" except CifFile.ValidCifError: pass else: self.fail() def testItemEnum(self): """Test that enumerations are understood""" self.cf["test_block"]["_diffrn_source_target"]="Cr" try: self.cf["test_block"]["_diffrn_source_target"]="2.5" except CifFile.ValidCifError: pass else: self.fail() def testItemRange(self): """Test that ranges are correctly handled""" self.cf["test_block"]["_diffrn_source_power"] = "0.0" self.cf["test_block"]["_diffrn_standards_decay_%"] = "98" def testItemLooping(self): """test that list yes/no/both works""" pass def testListReference(self): """Test that _list_reference is handled correctly""" #can be both looped and unlooped; if unlooped, no need for ref. self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75" try: self.cf["test_block"].AddCifItem((( "_diffrn_radiation_wavelength", "_diffrn_radiation_wavelength_wt"),(("0.75","0.71"),("0.5","0.1")))) except CifFile.ValidCifError: pass else: self.fail() def testUniqueness(self): """Test that non-unique values are found""" # in cif_core.dic only one set is available try: self.cf["test_block"].AddCifItem((( "_publ_body_label", "_publ_body_element"), ( ("1.1","1.2","1.3","1.2"), ("section","section","section","section") ))) except CifFile.ValidCifError: pass else: self.fail() def testParentChild(self): """Test that non-matching values are reported""" self.assertRaises(CifFile.ValidCifError, self.cf["test_block"].AddCifItem, (("_geom_bond_atom_site_label_1","_geom_bond_atom_site_label_2"), [["C1","C2","H3","U4"], ["C1","Cr2","H3","U4"]])) # now we test that a missing parent is flagged # self.assertRaises(CifFile.ValidCifError, # self.cf["test_block"].AddCifItem, # (("_atom_site_type_symbol","_atom_site_label"), # [["C","C","N"],["C1","C2","N1"]])) def testReport(self): CifFile.validate_report(CifFile.Validate("tests/C13H2203_with_errors.cif",dic=self.ddl1dic)) class DDLmDicTestCase(unittest.TestCase): """Test validation of DDLm dictionaries""" def setUp(self): testdic_string = """#\#CIF_2.0 #\#CIF_2.0 ############################################################################## # # # DDLm REFERENCE DICTIONARY # # # ############################################################################## data_DDL_DIC _dictionary.title DDL_DIC _dictionary.class Reference _dictionary.version 3.11.08 _dictionary.date 2015-01-28 _dictionary.uri www.iucr.org/cif/dic/ddl.dic _dictionary.ddl_conformance 3.11.08 _dictionary.namespace DdlDic _description.text ; This dictionary contains the definitions of attributes that make up the DDLm dictionary definition language. It provides the meta meta data for all CIF dictionaries. ; save_ATTRIBUTES _definition.id ATTRIBUTES _definition.scope Category _definition.class Head _definition.update 2011-07-27 _description.text ; This category is parent of all other categories in the DDLm dictionary. ; _name.object_id ATTRIBUTES save_ #============================================================================ save_ALIAS _definition.id ALIAS _definition.scope Category _definition.class Loop _definition.update 2013-09-08 _description.text ; The attributes used to specify the aliased names of definitions. ; _name.category_id ATTRIBUTES _name.object_id ALIAS _category.key_id '_alias.definition_id' loop_ _category_key.name '_alias.definition_id' save_ save_alias.definition_id _definition.id '_alias.definition_id' _definition.class Attribute _definition.update 2006-11-16 _description.text ; Identifier tag of an aliased definition. ; _name.category_id alias _name.object_id definition_id _type.purpose Key _type.source Assigned _type.container Single _type.contents Tag save_ save_definition.scope _definition.id '_definition.scope' _definition.class Attribute _definition.update 2006-11-16 _description.text ; The extent to which a definition affects other definitions. ; _name.category_id definition _name.object_id scope _type.purpose State _type.source Assigned _type.container Single _type.contents Code loop_ _enumeration_set.state _enumeration_set.detail _description.common Dictionary 'applies to all defined items in the dictionary' 'whoops' Category 'applies to all defined items in the category' 'not' Item 'applies to a single item definition' 'allowed' _enumeration.default Item save_ """ f = open('tests/ddlm_valid_test.cif2','w') f.write(testdic_string) f.close() self.testcif = CifFile.CifFile('tests/ddlm_valid_test.cif2',grammar='auto') self.refdic = CifFile.CifDic('dictionaries/ddl.dic',grammar='auto') def tearDown(self): os.remove('tests/ddlm_valid_test.cif2') def testMandatory(self): """Test that missing mandatory items are found""" del self.testcif['alias.definition_id']['_name.category_id'] result = self.refdic.run_block_validation(self.testcif['alias.definition_id']) self.failUnless(dict(result['whole_block'])['check_mandatory_items']['result']==False) def testProhibited(self): """Test that prohibited items are found""" self.testcif['alias']['_enumeration_set.state'] = [1,2,3,4] result = self.refdic.run_block_validation(self.testcif['alias']) self.failUnless(dict(result['whole_block'])['check_prohibited_items']['result']==False) def testUnlooped(self): """Test that unloopable data items are found""" result = self.refdic.run_loop_validation(self.testcif['definition.scope'].loops[1]) self.failUnless(dict(result['_enumeration_set.state'])['validate_looping_ddlm']['result']==False) def testWrongLoop(self): """Test that non-co-loopable data items are found""" del self.testcif['definition.scope']['_description.common'] #get rid of bad one self.testcif['definition.scope']['_description_example.case'] = [1,2,3] self.testcif['definition.scope'].CreateLoop(['_enumeration_set.state', '_enumeration_set.detail', '_description_example.case']) loop_no = self.testcif['definition.scope'].FindLoop('_enumeration_set.state') result = self.refdic.run_loop_validation(self.testcif['definition.scope'].loops[loop_no]) self.failUnless(dict(result['_enumeration_set.state'])['validate_loop_membership']['result']==False) def testUnKeyed(self): """Test that a missing key is found""" del self.testcif['definition.scope']['_description.common'] del self.testcif['definition.scope']['_enumeration_set.state'] result = self.refdic.run_loop_validation(self.testcif['definition.scope'].loops[1]) self.failUnless(dict(result['_enumeration_set.detail'])['validate_loop_key_ddlm']['result']==False) def testNotMissingKey(self): """Test that a key that should be present is detected""" result = self.refdic.run_loop_validation(self.testcif['definition.scope'].loops[1]) print(repr(result)) self.failUnless(dict(result['_enumeration_set.state'])['validate_loop_key_ddlm']['result']==True) class FakeDicTestCase(unittest.TestCase): # we test stuff that hasn't been used in official dictionaries to date. def setUp(self): self.testcif = CifFile.CifFile("dictionaries/novel_test.cif") def testTypeConstruct(self): self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile, diclist=["dictionaries/novel.dic"],datasource=self.testcif) class DicEvalTestCase(unittest.TestCase): def setUp(self): testdic = CifFile.CifDic("dictionaries/cif_core_ddlm.dic",grammar="auto") c_old = CifFile.CifFile("tests/drel/nick_old.cif",grammar="2.0") c_new = CifFile.CifFile("tests/drel/nick_new.cif",grammar="2.0") self.fb = c_new['saly2'] self.fb.assign_dictionary(testdic) self.fb_old = c_old['saly2'] self.fb_old.assign_dictionary(testdic) def check_value(self,dataname,scalar=True): """Generic check of value""" target = self.fb[dataname] del self.fb[dataname] result = self.fb[dataname] print("Result: {!r} Target: {!r}".format(result,target)) if scalar: self.failUnless(abs(float(target)-float(result))<0.01) else: self.assertEqual(target,result,"Target = %s, Result = %s" % (repr(target),repr(result))) def testCellVolume(self): self.check_value('_cell.volume') @unittest.expectedFailure def testNoInCell(self,scalar=False): self.check_value('_atom_type.number_in_cell',scalar=False) def testDensity(self): self.check_value('_exptl_crystal.density_diffrn') @unittest.expectedFailure def testReflnF(self): self.check_value('_refln.F_calc',scalar=False) def testCalcOldAlias(self): """Test that a calculation is performed for an old dataname""" target = self.fb['_cell.volume'] print("CalcOldAlias target is {!r}".format(target)) del self.fb['_cell.volume'] self.failUnless(abs(self.fb['_cell_volume']-float(target))<0.01) def testFullCalcAlias(self): """Test that a calculation is performed if dependent datanames have aliased values""" del self.fb_old['_relfn.F_calc'] result = self.fb_old['_refln.F_calc'] def testEigenSystem(self): """Test that question marks are seen as missing values""" self.fb.provide_value = True result = self.fb['_model_site.adp_eigen_system'] print('adp eigensystem is: ' + repr(result)) def testCategoryMethod(self): """Test that a category method calculates and updates""" # delete pre-existing values del self.fb['_model_site.adp_eigen_system'] del self.fb["_model_site.label"] del self.fb["_model_site.symop"] self.fb.provide_value = True result = self.fb['model_site'] self.fb.provide_value = False print('**Updated block:') print(str(self.fb)) self.failUnless(self.fb.has_key('_model_site.Cartn_xyz')) self.failUnless(self.fb.has_key('_model_site.mole_index')) def testEmptyKey(self): """Test that empty keys are not stored""" del self.fb['_atom_type_scat.symbol'] del self.fb['_atom_type_scat.dispersion_real'] del self.fb['_atom_type_scat.dispersion_imag'] del self.fb['_atom_type_scat.source'] p = self.fb.GetKeyedSemanticPacket('O','atom_type') self.failUnless(not hasattr(p,'_atom_type_scat.symbol')) class DicStructureTestCase(unittest.TestCase): """Tests use of dictionary semantic information for item lookup""" def setUp(self): self.testdic = CifFile.CifDic("dictionaries/cif_core_ddlm.dic",grammar="auto") cc = CifFile.CifFile("tests/drel/nick.cif",grammar="STAR2") self.fb = cc["saly2"] self.fb.assign_dictionary(self.testdic) def testOldAlias(self): """Test finding an older form of a new dataname""" self.failUnless(self.fb['_symmetry.space_group_name_H_M']=='P_1_21/a_1') def testNewAlias(self): """Test finding a newer form of an old dataname""" self.failUnless(self.fb['_symmetry_space_group_name_Hall']=='-p_2yab') def testCatObj(self): """Test that we can obtain a name by category/object specification""" target = self.testdic.get_name_by_cat_obj('atom_type','Cromer_Mann_coeffs') self.assertEqual(target,'_atom_type_scat.Cromer_Mann_coeffs') target = self.testdic.get_name_by_cat_obj('cell','volume') self.assertEqual(target,'_cell.volume') def testCatKey(self): """Test that we get a complete list of keys for child categories""" target = self.testdic.cat_key_table self.assertEqual(target['atom_site'],[['_atom_site.label'],['_atom_site_aniso.label']]) def testEquivKey(self): """Test that we can identify equivalent key datanames""" target = self.testdic.key_equivs self.assertEqual(target['_atom_site_aniso.label'],['_atom_site.label']) def testChildPacket(self): """Test that a case-insensitive child packet is included in attributes of parent category""" target = self.fb.GetKeyedSemanticPacket("o2",'atom_site') self.failUnless(hasattr(target,'_atom_site_aniso.u_23')) self.assertEqual(getattr(target,'_atom_site_aniso.U_33'),'.040(3)') @unittest.skip("Functionality not yet implemented") def testChildPacketMultiKey(self): """Test that a case-insensitive child packet is included in attributes of parent category using the compound key routine""" target = self.fb.GetMultiKeyedSemanticPacket({'_atom_site.label':("o2",True)},'atom_site') self.assertEqual(getattr(target,'_atom_site_aniso.U_33'),'.040(3)') self.failUnless(hasattr(target,'_atom_site_aniso.u_23')) def testPacketCalcs(self): """Test that a star packet can calculate missing values""" target = self.fb.GetKeyedSemanticPacket("O",'atom_type') rad = getattr(target,'_atom_type.radius_bond') self.assertEqual(rad,0.74) def testEnumDefault(self): """Test that we can obtain enumerated values""" target = self.fb['_atom_type.radius_bond'] self.failUnless(0.77 in target) def testCatObjKey(self): """Test that keys are correctly handled by the cat/obj table""" self.assertEqual(self.testdic.get_name_by_cat_obj('atom_site','label'),"_atom_site.label") def testRepeatedName(self): """Test that a repeated object_id is handled correctly""" self.assertEqual(self.testdic.cat_obj_lookup_table[('atom_site','type_symbol')], ['_atom_site.type_symbol','_atom_site_aniso.type_symbol']) def testPrintOut(self): """Test that a block with dictionary attached can print(out string values""" print(self.fb) def pullbacksetup(self): """Initial steps when setting up a pullback""" dic_info = CifFile.CifDic("tests/full_demo_0.0.6.dic",grammar="auto") start_data = CifFile.CifFile("tests/multi-image-test.cif",grammar="auto") start_data = start_data['Merged_scans'] start_data.assign_dictionary(dic_info) return start_data def unpullbacksetup(self): """Initial values when setting up a pullback""" dic_info = CifFile.CifDic("tests/full_demo_0.0.6.dic",grammar="auto") start_data = CifFile.CifFile("tests/multi-image-test.cif.pulled_back",grammar="auto") start_data = start_data['Merged_scans'] start_data.assign_dictionary(dic_info) return start_data def testPullBack(self): """Test construction of a category that is pulled back from other categories""" start_data = self.pullbacksetup() q = start_data['_diffrn_detector_monolithic_element.key'] p = start_data['_diffrn_detector_monolithic_element.detector_id'] print('p,q = ' + repr(p) + '\n' + repr(q)) self.failUnless(q==[['element1','adscq210-sn457']]) self.failUnless(p==['ADSCQ210-SN457']) def testMultiPullback(self): """Test that pullbacks with multiple matches work properly""" start_data = self.pullbacksetup() q = start_data['_full_frame.id'] r = start_data['_full_frame.detector_element_id'] print('Frames from monolithic detector:' + repr(q)) self.failUnless(['scan1','frame1'] in q) self.failUnless(['scan1','frame3'] in q) def testIntegerFilter(self): """Test construction of a block that is filtered from another category""" start_data = self.pullbacksetup() q = start_data['_diffrn_detector_monolithic.id'] self.failUnless(q == ['ADSCQ210-SN457']) def testTextFilter(self): """Test construction of a block that is filtered using a text string""" start_data = self.pullbacksetup() q = start_data['_detector_axis.id'] print('q is ' + repr(q)) self.failUnless(['detector_y','detector'] in q) self.failUnless(['goniometer_phi','goniometer'] not in q) def testPopulateFromPullback(self): """Test population of a category with id items from a pulled-back category""" start_data = self.unpullbacksetup() q = start_data['_diffrn_data_frame.key'] self.failUnless(['SCAN1','FRAME1'] in q) self.failUnless(['SCAN1','Frame3'] in q) def testPopulateFromFilter(self): """Test population of a category that has been filtered""" start_data = self.unpullbacksetup() q = start_data['_diffrn_detector.id'] r = start_data['_diffrn_detector.number_of_elements'] print('q,r = ' + repr(q) + ' , ' + repr(r)) self.failUnless(q==['ADSCQ210-SN457']) self.failUnless(r == [1]) def testPopulateFromMultiFilters(self): """Test population of a category that is filtered into multiple streams""" start_data = self.unpullbacksetup() q = start_data['_axis.key'] print('q ends up as:' + repr(q)) self.failUnless(['detector_x','detector'] in q) self.failUnless(['GONIOMETER_PHI','goniometer'] in q) def testPopulateNonIDFromFilter(self): """Test that duplicate datanames are populated""" start_data = self.unpullbacksetup() q = start_data['_diffrn_data_frame.binary_id'] self.failUnless('3' in q) class BlockOutputOrderTestCase(unittest.TestCase): def tearDown(self): try: os.remove("tests/order_test.cif") os.remove("tests/round_trip_test.cif") except: pass def testOutputOrder(self): outstrg = """#\#CIF_2.0 data_testa _item1 1 data_testb _item2 2 data_testc _item3 3 data_testd _item4 4 """ f = open("tests/order_test.cif","w") f.write(outstrg) f.close() q = CifFile.CifFile("tests/order_test.cif",grammar="auto") print(repr(q.block_input_order)) self.failUnless(q.block_input_order[1] == "testb") f = open("tests/round_trip_test.cif","w") f.write(str(q)) if __name__=='__main__': #suite = unittest.TestLoader().loadTestsFromTestCase(DicEvalTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(SimpleWriteTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(FileWriteTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(GrammarTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DicStructureTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(BasicUtilitiesTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(BlockRWTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(BlockOutputOrderTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(SyntaxErrorTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(LoopBlockTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(BlockChangeTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DDLmValueTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DDLmImportCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DDL1TestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DDLmDicTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(TemplateTestCase) #suite = unittest.TestLoader().loadTestsFromTestCase(DictTestCase) #unittest.TextTestRunner(verbosity=2).run(suite) unittest.main() pycifrw-4.4/docs/000077500000000000000000000000001345362224200137775ustar00rootroot00000000000000pycifrw-4.4/docs/CifFile/000077500000000000000000000000001345362224200153005ustar00rootroot00000000000000pycifrw-4.4/docs/CifFile/CifFile.m.html000066400000000000000000056455011345362224200177420ustar00rootroot00000000000000 CifFile.CifFile API documentation Top

CifFile.CifFile module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO

# Python 2,3 compatibility
try:
    from urllib import urlopen         # for arbitrary opening
    from urlparse import urlparse, urlunparse,urljoin
except:
    from urllib.request import urlopen
    from urllib.parse import urlparse,urlunparse,urljoin

# The unicode type does not exist in Python3 as the str type
# encompasses unicode.  PyCIFRW tests for 'unicode' would fail
# Suggestions for a better approach welcome.

if isinstance(u"abc",str):   #Python3
    unicode = str
    
__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""


import re,sys
from . import StarFile
from .StarFile import StarList  #put in global scope for exec statement
try:
    import numpy                   #put in global scope for exec statement
    from .drel import drel_runtime  #put in global scope for exec statement
except ImportError:
    pass                       #will fail when using dictionaries for calcs
from copy import copy          #must be in global scope for exec statement

def track_recursion(in_this_func):
    """Keep an eye on a function call to make sure that the key argument hasn't been
    seen before"""
    def wrapper(*args,**kwargs):
        key_arg = args[1]
        if key_arg in wrapper.called_list:
            print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg)))
            raise CifRecursionError( key_arg,wrapper.called_list[:])    #failure
        if len(wrapper.called_list) == 0:   #first time
            wrapper.stored_use_defaults = kwargs.get("allow_defaults",False)
            print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults))
        else:
            kwargs["allow_defaults"] = wrapper.stored_use_defaults
        wrapper.called_list.append(key_arg)
        print('Recursion watch: call stack: ' + repr(wrapper.called_list))
        try:
            result = in_this_func(*args,**kwargs)
        except StarFile.StarDerivationError as s:
            if len(wrapper.called_list) == 1: #no more
                raise StarFile.StarDerivationFailure(wrapper.called_list[0])
            else:
                raise
        finally:
            wrapper.called_list.pop()
            if len(wrapper.called_list) == 0:
                wrapper.stored_used_defaults = 'error'
        return result
    wrapper.called_list = []
    return wrapper

class CifBlock(StarFile.StarBlock):
    """
    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
    a Python dictionary, in particular, individual items can be accessed using square
    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.

    ## Initialisation

    When provided, `data` should be another `CifBlock` whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
        """When provided, `data` should be another CifBlock whose contents will be copied to
        this block.

        * if `strict` is set, maximum name lengths will be enforced

        * `maxoutlength` is the maximum length for output lines

        * `wraplength` is the ideal length to make output lines

        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
        is raised).

        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
        after setting the dataitem value.
        """
        if strict: maxnamelength=75
        else:
           maxnamelength=-1
        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
        self.dictionary = None   #DDL dictionary referring to this block
        self.compat_mode = compat_mode   #old-style behaviour of setitem

    def RemoveCifItem(self,itemname):
        """Remove `itemname` from the CifBlock"""
        self.RemoveItem(itemname)

    def __setitem__(self,key,value):
        self.AddItem(key,value)
        # for backwards compatibility make a single-element loop
        if self.compat_mode:
            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
                 # single element loop
                 self.CreateLoop([key])

    def copy(self):
        newblock = super(CifBlock,self).copy()
        return self.copy.im_class(newblock)   #catch inheritance

    def AddCifItem(self,data):
        """ *DEPRECATED*. Use `AddItem` instead."""
        # we accept only tuples, strings and lists!!
        if not (isinstance(data[0],(unicode,tuple,list,str))):
                  raise TypeError('Cif datanames are either a string, tuple or list')
        # we catch single item loops as well...
        if isinstance(data[0],(unicode,str)):
            self.AddSingleCifItem(data[0],list(data[1]))
            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
                self.CreateLoop([data[0]])
            return
        # otherwise, we loop over the datanames
        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
        [self.AddSingleCifItem(a,b) for a,b in keyvals]
        # and create the loop
        self.CreateLoop(data[0][0])

    def AddSingleCifItem(self,key,value):
        """*Deprecated*. Use `AddItem` instead"""
        """Add a single data item. If it is part of a loop, a separate call should be made"""
        self.AddItem(key,value)

    def loopnames(self):
        return [self.loops[a] for a in self.loops]


class CifFile(StarFile.StarFile):
    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
        self.strict = strict
        self.header_comment = \
"""
##########################################################################
#               Crystallographic Information Format file
#               Produced by PyCifRW module
#
#  This is a CIF file.  CIF has been adopted by the International
#  Union of Crystallography as the standard for data archiving and
#  transmission.
#
#  For information on this file format, follow the CIF links at
#  http://www.iucr.org
##########################################################################
"""


class CifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Format error: '+ self.value

class ValidCifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Validity error: ' + self.value

class CifRecursionError(Exception):
    def __init__(self,key_value,call_stack):
        self.key_value = key_value
        self.call_stack = call_stack
    def __str__(self):
        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))


class CifDic(StarFile.StarFile):
    """Create a Cif Dictionary object from the provided source, which can
    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
    only):

    * do_minimum (Boolean):
         Do not set up the dREL system for auto-calculation or perform
         imports.  This implies do_imports=False and do_dREL=False

    * do_imports = No/Full/Contents/All:
         If not 'No', replace _import.get statements with the imported contents for
         Full mode/Contents mode/Both respectively.

    * do_dREL = True/False:
         Parse and convert all dREL methods to Python. Implies do_imports=All

    """
    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
                                                             grammar='auto',**kwargs):
        self.do_minimum = do_minimum
        if do_minimum:
            do_imports = 'No'
            do_dREL = False
        if do_dREL: do_imports = 'All'
        self.template_cache = {}    #for DDLm imports
        self.ddlm_functions = {}    #for DDLm functions
        self.switch_numpy(False)    #no Numpy arrays returned
        super(CifDic,self).__init__(datasource=dic,grammar=grammar,**kwargs)
        self.standard = 'Dic'    #for correct output order
        self.scoping = 'dictionary'
        (self.dicname,self.diclang) = self.dic_determine()
        print('%s is a %s dictionary' % (self.dicname,self.diclang))
        self.scopes_mandatory = {}
        self.scopes_naughty = {}
        # rename and expand out definitions using "_name" in DDL dictionaries
        if self.diclang == "DDL1":
            self.DDL1_normalise()   #this removes any non-definition entries
        self.create_def_block_table() #From now on, [] uses definition_id
        if self.diclang == "DDL1":
            self.ddl1_cat_load()
        elif self.diclang == "DDL2":
            self.DDL2_normalise()   #iron out some DDL2 tricky bits
        elif self.diclang == "DDLm":
            self.scoping = 'dictionary'   #expose all save frames
            if do_imports is not 'No':
               self.ddlm_import(import_mode=do_imports)#recursively calls this routine
            self.create_alias_table()
            self.create_cat_obj_table()
            self.create_cat_key_table()
            if do_dREL:
                print('Doing full dictionary initialisation')
                self.initialise_drel()
        self.add_category_info(full=do_dREL)
        # initialise type information
        self.typedic={}
        self.primdic = {}   #typecode<->primitive type translation
        self.add_type_info()
        self.install_validation_functions()

    def dic_determine(self):
        if "on_this_dictionary" in self: 
            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
            self.def_id_spec = "_name"
            self.cat_id_spec = "_category.id"   #we add this ourselves 
            self.type_spec = "_type"
            self.enum_spec = "_enumeration"
            self.cat_spec = "_category"
            self.esd_spec = "_type_conditions"
            self.must_loop_spec = "_list"
            self.must_exist_spec = "_list_mandatory"
            self.list_ref_spec = "_list_reference"
            self.key_spec = "_list_mandatory"
            self.unique_spec = "_list_uniqueness"
            self.child_spec = "_list_link_child"
            self.parent_spec = "_list_link_parent"
            self.related_func = "_related_function"
            self.related_item = "_related_item"
            self.primitive_type = "_type"
            self.dep_spec = "xxx"
            self.cat_list = []   #to save searching all the time
            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
            return (name+version,"DDL1")
        elif len(self.get_roots()) == 1:              # DDL2/DDLm
            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])      
            # now change to dictionary scoping
            self.scoping = 'dictionary'
            name = self.master_block["_dictionary.title"]
            version = self.master_block["_dictionary.version"]
            if self.master_block.has_key("_dictionary.class"):   #DDLm
                self.enum_spec = '_enumeration_set.state'
                self.key_spec = '_category.key_id'
                self.must_exist_spec = None
                self.cat_spec = '_name.category_id'
                self.primitive_type = '_type.contents'
                self.cat_id_spec = "_definition.id"
                self.def_id_spec = "_definition.id"
                return(name+version,"DDLm") 
            else:   #DDL2
                self.cat_id_spec = "_category.id"
                self.def_id_spec = "_item.name"
                self.key_spec = "_category_mandatory.name"
                self.type_spec = "_item_type.code"
                self.enum_spec = "_item_enumeration.value"
                self.esd_spec = "_item_type_conditions.code"
                self.cat_spec = "_item.category_id"
                self.loop_spec = "there_is_no_loop_spec!"
                self.must_loop_spec = "xxx"
                self.must_exist_spec = "_item.mandatory_code"
                self.child_spec = "_item_linked.child_name"
                self.parent_spec = "_item_linked.parent_name"
                self.related_func = "_item_related.function_code"
                self.related_item = "_item_related.related_name"
                self.unique_spec = "_category_key.name"
                self.list_ref_spec = "xxx"
                self.primitive_type = "_type"
                self.dep_spec = "_item_dependent.dependent_name"
                return (name+version,"DDL2")
        else:
            raise CifError("Unable to determine dictionary DDL version")

    def DDL1_normalise(self):
        # switch off block name collision checks
        self.standard = None
        # add default type information in DDL2 style
        # initial types and constructs
        base_types = ["char","numb","null"]
        prim_types = base_types[:]
        base_constructs = [".*",
            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
            "\"\" "]
        for key,value in self.items():
           newnames = [key]  #keep by default
           if "_name" in value:
               real_name = value["_name"]
               if isinstance(real_name,list):        #looped values
                   for looped_name in real_name:
                      new_value = value.copy()
                      new_value["_name"] = looped_name  #only looped name
                      self[looped_name] = new_value
                   newnames = real_name
               else:
                      self[real_name] = value
                      newnames = [real_name]
           # delete the old one
           if key not in newnames:
              del self[key]
        # loop again to normalise the contents of each definition
        for key,value in self.items():
           #unlock the block
           save_overwrite = value.overwrite
           value.overwrite = True
           # deal with a missing _list, _type_conditions
           if "_list" not in value: value["_list"] = 'no'
           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
           # deal with enumeration ranges
           if "_enumeration_range" in value:
               max,min = self.getmaxmin(value["_enumeration_range"])
               if min == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
               elif max == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
               else:
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
           #add any type construct information
           if "_type_construct" in value:
               base_types.append(value["_name"]+"_type")   #ie dataname_type
               base_constructs.append(value["_type_construct"]+"$")
               prim_types.append(value["_type"])     #keep a record
               value["_type"] = base_types[-1]   #the new type name

        #make categories conform with ddl2
        #note that we must remove everything from the last underscore
           if value.get("_category",None) == "category_overview":
                last_under = value["_name"].rindex("_")
                catid = value["_name"][1:last_under]
                value["_category.id"] = catid  #remove square bracks
                if catid not in self.cat_list: self.cat_list.append(catid)
           value.overwrite = save_overwrite
        # we now add any missing categories before filling in the rest of the
        # information
        for key,value in self.items():
            #print('processing ddl1 definition %s' % key)
            if "_category" in self[key]:
                if self[key]["_category"] not in self.cat_list:
                    # rogue category, add it in
                    newcat = self[key]["_category"]
                    fake_name = "_" + newcat + "_[]"
                    newcatdata = CifBlock()
                    newcatdata["_category"] = "category_overview"
                    newcatdata["_category.id"] = newcat
                    newcatdata["_type"] = "null"
                    self[fake_name] = newcatdata
                    self.cat_list.append(newcat)
        # write out the type information in DDL2 style
        self.master_block.AddLoopItem((
            ("_item_type_list.code","_item_type_list.construct",
              "_item_type_list.primitive_code"),
            (base_types,base_constructs,prim_types)
            ))

    def ddl1_cat_load(self):
        deflist = self.keys()       #slight optimization
        cat_mand_dic = {}
        cat_unique_dic = {}
        # a function to extract any necessary information from each definition
        def get_cat_info(single_def):
            if self[single_def].get(self.must_exist_spec)=='yes':
                thiscat = self[single_def]["_category"]
                curval = cat_mand_dic.get(thiscat,[])
                curval.append(single_def)
                cat_mand_dic[thiscat] = curval
            # now the unique items...
            # cif_core.dic throws us a curly one: the value of list_uniqueness is
            # not the same as the defined item for publ_body_label, so we have
            # to collect both together.  We assume a non-listed entry, which
            # is true for all current (May 2005) ddl1 dictionaries.
            if self[single_def].get(self.unique_spec,None)!=None:
                thiscat = self[single_def]["_category"]
                new_unique = self[single_def][self.unique_spec]
                uis = cat_unique_dic.get(thiscat,[])
                if single_def not in uis: uis.append(single_def)
                if new_unique not in uis: uis.append(new_unique)
                cat_unique_dic[thiscat] = uis

        [get_cat_info(a) for a in deflist] # apply the above function
        for cat in cat_mand_dic.keys():
            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
        for cat in cat_unique_dic.keys():
            self[cat]["_category_key.name"] = cat_unique_dic[cat]

    def create_pcloop(self,definition):
        old_children = self[definition].get('_item_linked.child_name',[])
        old_parents = self[definition].get('_item_linked.parent_name',[])
        if isinstance(old_children,unicode):
             old_children = [old_children]
        if isinstance(old_parents,unicode):
             old_parents = [old_parents]
        if (len(old_children)==0 and len(old_parents)==0) or \
           (len(old_children) > 1 and len(old_parents)>1):
             return
        if len(old_children)==0:
             old_children = [definition]*len(old_parents)
        if len(old_parents)==0:
             old_parents = [definition]*len(old_children)
        newloop = CifLoopBlock(dimension=1)
        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
        newloop.AddLoopItem(('_item_linked.child_name',old_children))
        try:
            del self[definition]['_item_linked.parent_name']
            del self[definition]['_item_linked.child_name']
        except KeyError:
            pass
        self[definition].insert_loop(newloop)



    def DDL2_normalise(self):
       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
       # now filter out all the single element lists!
       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
       for item_def in dodgy_defs:
                # print("DDL2 norm: processing %s" % item_def)
                thisdef = self[item_def]
                packet_no = thisdef['_item.name'].index(item_def)
                realcat = thisdef['_item.category_id'][packet_no]
                realmand = thisdef['_item.mandatory_code'][packet_no]
                # first add in all the missing categories
                # we don't replace the entry in the list corresponding to the
                # current item, as that would wipe out the information we want
                for child_no in range(len(thisdef['_item.name'])):
                    if child_no == packet_no: continue
                    child_name = thisdef['_item.name'][child_no]
                    child_cat = thisdef['_item.category_id'][child_no]
                    child_mand = thisdef['_item.mandatory_code'][child_no]
                    if child_name not in self:
                        self[child_name] = CifBlock()
                        self[child_name]['_item.name'] = child_name
                    self[child_name]['_item.category_id'] = child_cat
                    self[child_name]['_item.mandatory_code'] = child_mand
                self[item_def]['_item.name'] = item_def
                self[item_def]['_item.category_id'] = realcat
                self[item_def]['_item.mandatory_code'] = realmand

       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                     '_item_linked.parent_name' in self[a]]
       # now dodgy_defs contains all definition blocks with more than one child/parent link
       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
       for item_def in dodgy_defs:
             print('Processing %s' % item_def)
             thisdef = self[item_def]
             child_list = thisdef['_item_linked.child_name']
             parents = thisdef['_item_linked.parent_name']
             # for each parent, find the list of children.
             family = list(zip(parents,child_list))
             notmychildren = family         #We aim to remove non-children
             # Loop over the parents, relocating as necessary
             while len(notmychildren):
                # get all children of first entry
                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
                for parent,child in mychildren:   #parent is the same for all
                         # Make sure that we simply add in the new entry for the child, not replace it,
                         # otherwise we might spoil the child entry loop structure
                         try:
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                         except KeyError:
                             print('Creating new parent entry %s for definition %s' % (parent,child))
                             self[child]['_item_linked.parent_name'] = [parent]
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
                             continue
                         else:
                             # A parent loop already exists and so will a child loop due to the
                             # call to create_pcloop above
                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                             if len(goodpars)>0:   #no need to add it
                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                                 continue
                             print('Adding %s to %s entry' % (parent,child))
                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                             setattr(newpacket,'_item_linked.child_name',child)
                             setattr(newpacket,'_item_linked.parent_name',parent)
                             childloop.AddPacket(newpacket)
                #
                # Make sure the parent also points to the children.  We get
                # the current entry, then add our
                # new values if they are not there already
                #
                parent_name = mychildren[0][0]
                old_children = self[parent_name].get('_item_linked.child_name',[])
                old_parents = self[parent_name].get('_item_linked.parent_name',[])
                oldfamily = zip(old_parents,old_children)
                newfamily = []
                print('Old parents -> %s' % repr(old_parents))
                for jj, childname in mychildren:
                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                    if len(alreadythere)>0: continue
                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
                    old_children.append(childname)
                    old_parents.append(parent_name)
                # Now output the loop, blowing away previous definitions.  If there is something
                # else in this category, we are destroying it.
                newloop = CifLoopBlock(dimension=1)
                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
                newloop.AddLoopItem(('_item_linked.child_name',old_children))
                del self[parent_name]['_item_linked.parent_name']
                del self[parent_name]['_item_linked.child_name']
                self[parent_name].insert_loop(newloop)
                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
                # now make a new,smaller list
                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]

       # now flatten any single element lists
       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
       for flat_def in single_defs:
           flat_keys = self[flat_def].GetLoop('_item.name').keys()
           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
       # now deal with the multiple lists
       # next we do aliases
       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
       for aliased in all_aliases:
          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
          for alias in my_aliases:
              self[alias] = self[aliased].copy()   #we are going to delete stuff...
              del self[alias]["_item_aliases.alias_name"]
 
    def ddlm_parse_valid(self):
        if "_dictionary_valid.application" not in self.master_block:
            return
        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
            scope = getattr(scope_pack,"_dictionary_valid.application")
            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
            if scope[1] == "Mandatory":
                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
            elif scope[1] == "Prohibited":
                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)
                
    def ddlm_import(self,import_mode='All'):
        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
        print ('Import mode %s applied to following frames' % import_mode)
        print (str([a[0] for a in import_frames]))
        if import_mode != 'All':
           for i in range(len(import_frames)):
                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents') == import_mode])
           print('Importing following frames in mode %s' % import_mode)
           print(str(import_frames))
        #resolve all references
        for parent_block,import_list in import_frames:
          for import_ref in import_list:
            file_loc = import_ref["file"]
            full_uri = self.resolve_path(file_loc)
            if full_uri not in self.template_cache:
                dic_as_cif = CifFile(urlopen(full_uri),grammar=self.grammar)
                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,do_dREL=False)  #this will recurse internal imports
                print('Added %s to cached dictionaries' % full_uri)
            import_from = self.template_cache[full_uri]
            dupl = import_ref.get('dupl','Exit')
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if target_key in self and mode=='full':  #so blockname will be duplicated
                if dupl == 'Exit':
                    raise CifError('Import frame %s already in dictionary' % target_key)
                elif dupl == 'Ignore':
                    continue
            if mode == 'contents':   #merge attributes only
                self[parent_block].merge(import_target)
            elif mode =="full":
                # Do the syntactic merge
                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
                from_cat_head = import_target['_name.object_id']
                child_frames = import_from.ddlm_all_children(from_cat_head)
                 # Check for Head merging Head
                if self[parent_block].get('_definition.class','Datum')=='Head' and \
                   import_target.get('_definition.class','Datum')=='Head':
                      head_to_head = True
                else:
                      head_to_head = False
                      child_frames.remove(from_cat_head)
                # As we are in syntax land, we call the CifFile methods
                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
                child_blocks = super(CifDic,import_from).makebc(child_blocks)
                # Prune out any datablocks that have identical definitions
                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
                print ('Definitions for %s superseded' % repr(double_defs))
                for b in double_defs:
                    del child_blocks[b[0]]
                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
                   mode,len(self)))
                # Now the semantic merge
                # First expand our definition <-> blockname tree
                self.create_def_block_table()
                merging_cat = self[parent_block]['_name.object_id']      #new parent
                if head_to_head:
                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
                    #the new parent is the importing category for all old children
                    for f in child_frames:
                        self[f].overwrite = True
                        self[f]['_name.category_id'] = merging_cat
                        self[f].overwrite = False
                    # remove the old head
                    del self[from_cat_head]
                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
                else:  #imported category is only child
                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
                    self[child_frame]['_name.category_id'] = merging_cat
                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
            # it will never happen again...
            del self[parent_block]["_import.get"]

    def resolve_path(self,file_loc):
        url_comps = urlparse(file_loc)
        if url_comps[0]: return file_loc    #already full URI
        new_url = urljoin(self.my_uri,file_loc)
        #print("Transformed %s to %s for import " % (file_loc,new_url))
        return new_url



    def create_def_block_table(self):
        """ Create an internal table matching definition to block id """
        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
        # now get the actual ids instead of blocks
        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
        # remove non-definitions
        if self.diclang != "DDL1":
            top_blocks = list([a[0].lower() for a in self.get_roots()])
        else:
            top_blocks = ["on_this_dictionary"]
        # catch dodgy duplicates
        uniques = set([a[0] for a in proto_table])
        if len(uniques)<len(proto_table):
            def_names = list([a[0] for a in proto_table])
            dodgy = [a for a in def_names if def_names.count(a)>1]
            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])
        
    def __getitem__(self,key):
        """Access a datablock by definition id, after the lookup has been created"""
        try:
            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
        except AttributeError:   #block_id_table not present yet
            return super(CifDic,self).__getitem__(key)
        except KeyError: # key is missing
            # print(Definition for %s not found, reverting to CifFile' % key)
            return super(CifDic,self).__getitem__(key)

    def __setitem__(self,key,value):
        """Add a new definition block"""
        super(CifDic,self).__setitem__(key,value)
        try:
            self.block_id_table[value['_definition.id']]=key
        except AttributeError:   #does not exist yet
            pass

    def __delitem__(self,key):
        """Remove a definition"""
        try:
            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
            del self.block_id_table[key.lower()]
        except (AttributeError,KeyError):   #block_id_table not present yet
            super(CifDic,self).__delitem__(key)
            return
        # fix other datastructures
        # cat_obj table
        
    def keys(self):
        """Return all definitions"""
        try:
            return self.block_id_table.keys()
        except AttributeError:
            return super(CifDic,self).keys()

    def has_key(self,key):
        return key in self

    def __contains__(self,key):
        try:
            return key.lower() in self.block_id_table
        except AttributeError:
            return super(CifDic,self).__contains__(key)
            
    def items(self):
        """Return (key,value) pairs"""
        return list([(a,self[a]) for a in self.keys()])

    def unlock(self):
        """Allow overwriting of all definitions in this collection"""
        for a in self.keys():
            self[a].overwrite=True

    def lock(self):
        """Disallow changes in definitions"""
        for a in self.keys():
            self[a].overwrite=False

    def rename(self,oldname,newname,blockname_as_well=True):
        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
        change the underlying blockname too."""
        if blockname_as_well:
            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)        
            self.block_id_table[newname.lower()]=newname
            if oldname.lower() in self.block_id_table: #not removed
               del self.block_id_table[oldname.lower()]
        else:
            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
            del self.block_id_table[oldname.lower()]
            return
                                                 
    def get_root_category(self):
        """Get the single 'Head' category of this dictionary"""
        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
        if len(root_cats)>1 or len(root_cats)==0:
            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
        return root_cats[0]

    def ddlm_immediate_children(self,catname):
        """Return a list of datanames for the immediate children of catname.  These are
        semantic children (i.e. based on _name.category_id), not structural children as
        in the case of StarFile.get_immediate_children"""
                                                 
        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
        return list(straight_children)

    def ddlm_all_children(self,catname):
        """Return a list of all children, including the `catname`"""
        all_children = self.ddlm_immediate_children(catname)
        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
        for c in cat_children:
            all_children.remove(c)
            all_children += self.ddlm_all_children(c)
        return all_children + [catname]

    def is_semantic_child(self,parent,maybe_child):
        """Return true if `maybe_child` is a child of `parent`"""
        all_children = self.ddlm_all_children(parent)
        return maybe_child in all_children

    def ddlm_danglers(self):
        """Return a list of definitions that do not have a category defined
        for them, or are children of an unattached category"""
        top_block = self.get_root_category()
        connected = set(self.ddlm_all_children(top_block))
        all_keys = set(self.keys())
        unconnected = all_keys - connected
        return list(unconnected)

    def get_ddlm_parent(self,itemname):
        """Get the parent category of itemname"""
        parent = self[itemname].get('_name.category_id','')
        if parent == '':  # use the top block by default
            raise CifError("%s has no parent" % itemname)
        return parent

    def expand_category_opt(self,name_list):
        """Return a list of all non-category items in a category or return the name
           if the name is not a category"""
        new_list = []
        for name in name_list:
          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
            new_list += self.expand_category_opt([a for a in self.keys() if \
                     self[a].get('_name.category_id','').lower() == name.lower()])
          else:
            new_list.append(name)
        return new_list

    def get_categories(self):
        """Return a list of category names"""
        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])

    def names_in_cat(self,cat,names_only=False):
        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
        if not names_only:
            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
        else:
            return list([self[a]["_name.object_id"] for a in names])

                           

    def create_alias_table(self):
        """Populate an alias table that we can look up when searching for a dataname"""
        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])

    def create_cat_obj_table(self):
        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
        loopable = self.get_loopable_cats() 
        loopers = [self.ddlm_immediate_children(a) for a in loopable]
        print('Loopable cats:' + repr(loopable))
        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
        print("Expansion list:" + repr(expand_list))
        extra_table = {}   #for debugging we keep it separate from base_table until the end
        def expand_base_table(parent_cat,child_cats):
            extra_names = []
            # first deal with all the child categories
            for child_cat in child_cats:
              nn = []
              if child_cat in expand_list:  # a nested category: grab its names
                nn = expand_base_table(child_cat,expand_list[child_cat])
                # store child names
                extra_names += nn
              # add all child names to the table
              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
              child_names += extra_names
              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
            # and the repeated ones get appended instead
            repeats = [a for a in child_names if a in extra_table]
            for obj,name in repeats:
                extra_table[(parent_cat,obj)] += [name]
            # and finally, add our own names to the return list
            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
            return child_names
        [expand_base_table(parent,child) for parent,child in expand_list.items()]
        print('Expansion cat/obj values: ' + repr(extra_table))
        # append repeated ones
        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
        repeats = [a for a in extra_table.keys() if a in base_table]
        base_table.update(non_repeats)
        for k in repeats:
            base_table[k] += extra_table[k]
        self.cat_obj_lookup_table = base_table
        self.loop_expand_list = expand_list

    def get_loopable_cats(self):
        """A short utility function which returns a list of looped categories. This
        is preferred to a fixed attribute as that fixed attribute would need to be
        updated after any edits"""
        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']

    def create_cat_key_table(self):
        """Create a utility table with a list of keys applicable to each category. A key is
        a compound key, that is, it is a list"""
        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
        def collect_keys(parent_cat,child_cats):
                kk = []
                for child_cat in child_cats:
                    if child_cat in self.loop_expand_list:
                        kk += collect_keys(child_cat)
                    # add these keys to our list
                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
                return kk
        for k,v in self.loop_expand_list.items():
            collect_keys(k,v)
        print('Keys for categories' + repr(self.cat_key_table))

    def add_type_info(self):
        if "_item_type_list.construct" in self.master_block:
            types = self.master_block["_item_type_list.code"]
            prim_types = self.master_block["_item_type_list.primitive_code"]
            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
            # add in \r wherever we see \n, and change \{ to \\{
            def regex_fiddle(mm_regex):
                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
                fixed_regexp = mm_regex[:]  #copy
                # fix the brackets
                bm = re.match(brack_match,mm_regex)
                if bm != None:
                    fixed_regexp = bm.expand(r"\2\\\\{\4")
                # fix missing \r
                rm = re.match(ret_match,fixed_regexp)
                if rm != None:
                    fixed_regexp = rm.expand(r"\2\3\\r\4")
                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
                return fixed_regexp
            constructs = map(regex_fiddle,constructs)
            for typecode,construct in zip(types,constructs):
                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
            # now make a primitive <-> type construct mapping
            for typecode,primtype in zip(types,prim_types):
                self.primdic[typecode] = primtype

    def add_category_info(self,full=True):
        if self.diclang == "DDLm":
            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
            self.parent_lookup = {}
            for one_cat in looped_cats:
                parent_cat = one_cat
                parent_def = self[parent_cat]
                next_up = parent_def['_name.category_id'].lower()
                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
                    parent_def = self[next_up]
                    parent_cat = next_up
                    next_up = parent_def['_name.category_id'].lower()
                self.parent_lookup[one_cat] = parent_cat

            if full:
                self.key_equivs = {}
                for one_cat in looped_cats:   #follow them up
                    lower_keys = listify(self[one_cat]['_category_key.name'])
                    start_keys = lower_keys[:]
                    while len(lower_keys)>0:
                        this_cat = self[lower_keys[0]]['_name.category_id']
                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
                        if len(parent)>1:
                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
                        if len(parent)==0: break
                        parent = parent[0]
                        parent_keys = listify(self[parent]['_category_key.name'])
                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
                        # sanity check
                        if set(parent_keys) != set(linked_keys):
                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
                            # now add in our information
                        for parent,child in zip(linked_keys,start_keys):
                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
                        lower_keys = linked_keys  #preserves order of start keys

        else:
            self.parent_lookup = {}
            self.key_equivs = {}

    def change_category_name(self,oldname,newname):
        self.unlock()
        """Change the category name from [[oldname]] to [[newname]]"""
        if oldname not in self:
            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
        if newname in self:
            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
        child_defs = self.ddlm_immediate_children(oldname)
        self.rename(oldname,newname)   #NB no name integrity checks
        self[newname]['_name.object_id']=newname
        self[newname]['_definition.id']=newname
        for child_def in child_defs:
            self[child_def]['_name.category_id'] = newname
            if self[child_def].get('_definition.scope','Item')=='Item':
                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
                self[child_def]['_definition.id']=newid
                self.rename(child_def,newid[1:])  #no underscore at the beginning
        self.lock()

    def create_catobj_name(self,cat,obj):
        """Combine category and object in approved fashion to create id"""
        return ('_'+cat+'.'+obj)

    def change_category(self,itemname,catname):
        """Move itemname into catname, return new handle"""
        defid = self[itemname]
        if defid['_name.category_id'].lower()==catname.lower():
            print('Already in category, no change')
            return itemname
        if catname not in self:    #don't have it
            print('No such category %s' % catname)
            return itemname
        self.unlock()
        objid = defid['_name.object_id']
        defid['_name.category_id'] = catname
        newid = itemname # stays the same for categories
        if defid.get('_definition.scope','Item') == 'Item':
            newid = self.create_catobj_name(catname,objid)
            defid['_definition.id']= newid
            self.rename(itemname,newid)
        self.set_parent(catname,newid)
        self.lock()
        return newid

    def change_name(self,one_def,newobj):
        """Change the object_id of one_def to newobj. This is not used for
        categories, but can be used for dictionaries"""
        if '_dictionary.title' not in self[one_def]:  #a dictionary block
            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
            self.unlock()
            self.rename(one_def,newid)
            self[newid]['_definition.id']=newid
            self[newid]['_name.object_id']=newobj
        else:
            self.unlock()
            newid = newobj
            self.rename(one_def,newobj)
            self[newid]['_dictionary.title'] = newid
        self.lock()
        return newid

    # Note that our semantic parent is given by catparent, but our syntactic parent is
    # always just the root block
    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
        """Add a new category to the dictionary with name [[catname]].
           If [[catparent]] is None, the category will be a child of
           the topmost 'Head' category or else the top data block. If
           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
           is true, the parent category does not have to exist."""
        if catname in self:
            raise CifError('Attempt to add existing category %s' % catname)
        self.unlock()
        syntactic_root = self.get_roots()[0][0]
        if catparent is None:
            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
            if len(semantic_root)>0:
                semantic_root = semantic_root[0]
            else:
                semantic_root = syntactic_root
        else:
            semantic_root = catparent
        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
        self.block_id_table[catname.lower()]=realname
        self[catname]['_name.object_id'] = catname
        if not allow_dangler or catparent is None:
            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
        else:
            self[catname]['_name.category_id'] = catparent
        self[catname]['_definition.id'] = catname
        self[catname]['_definition.scope'] = 'Category'
        if is_loop:
            self[catname]['_definition.class'] = 'Loop'
        else:
            self[catname]['_definition.class'] = 'Set'
        self[catname]['_description.text'] = 'No definition provided'
        self.lock()
        return catname

    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
        """Add itemname to category [[catparent]]. If itemname contains periods,
        all text before the final period is ignored. If [[allow_dangler]] is True,
        no check for a parent category is made."""
        self.unlock()
        if '.' in itemname:
            objname = itemname.split('.')[-1]
        else:
            objname = itemname
        objname = objname.strip('_')
        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
            raise CifError('No category %s in dictionary' % catparent)
        fullname = '_'+catparent.lower()+'.'+objname
        print('New name: %s' % fullname)
        syntactic_root = self.get_roots()[0][0]
        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
        # update our dictionary structures
        self.block_id_table[fullname]=realname
        self[fullname]['_definition.id']=fullname
        self[fullname]['_name.object_id']=objname
        self[fullname]['_name.category_id']=catparent
        self[fullname]['_definition.class']='Datum'
        self[fullname]['_description.text']=def_text
        
    def remove_definition(self,defname):
        """Remove a definition from the dictionary."""
        if defname not in self:
            return
        if self[defname].get('_definition.scope')=='Category':
            children = self.ddlm_immediate_children(defname)
            [self.remove_definition(a) for a in children]
            cat_id = self[defname]['_definition.id'].lower()
        del self[defname]

    def get_cat_obj(self,name):
        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
        cat,obj = name.split('.')
        return (cat.strip('_'),obj)

    def get_name_by_cat_obj(self,category,object,give_default=False):
        """Return the dataname corresponding to the given category and object"""
        if category[0] == '_':    #accidentally left in
           true_cat = category[1:].lower()
        else:
           true_cat = category.lower()
        try:
            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
        except KeyError:
            if give_default:
               return '_'+true_cat+'.'+object
        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))


    def WriteOut(self,**kwargs):
        myblockorder = self.get_full_child_list()
        self.set_grammar(self.grammar)
        self.standard = 'Dic'
        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

    def get_full_child_list(self):
        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
        top_block = self.get_roots()[0][0]
        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
        if len(root_cat) == 1:
            all_names = [top_block] + self.recurse_child_list(root_cat[0])
            unrooted = self.ddlm_danglers()
            double_names =  set(unrooted).intersection(set(all_names))
            if len(double_names)>0:
                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
            remaining = unrooted[:]
            for no_root in unrooted:
                if self[no_root].get('_definition.scope','Item')=='Category':
                    all_names += [no_root]
                    remaining.remove(no_root)
                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
                    all_names += these_children
                    [remaining.remove(n) for n in these_children]
            # now sort by category
            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
            for e in ext_cats:
                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
                [remaining.remove(n) for n in cat_items]
                all_names += cat_items
            if len(remaining)>0:
                print('WARNING: following items do not seem to belong to a category??')
                print(repr(remaining))
                all_names += remaining
            print('Final block order: ' + repr(all_names))
            return all_names
        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')

    def cat_from_name(self,one_name):
        """Guess the category from the name. This should be used only when this is not important semantic information,
        for example, when printing out"""
        (cat,obj) = one_name.split(".")
        if cat[0] == "_": cat = cat[1:]
        return cat

    def recurse_child_list(self,parentname):
        """Recursively expand the logical child list of [[parentname]]"""
        final_list = [parentname]
        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
        child_blocks.sort()    #we love alphabetical order
        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
        final_list += child_items
        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
        for child_cat in child_cats:
            final_list += self.recurse_child_list(child_cat)
        return final_list



    def get_key_pack(self,category,value,data):
        keyname = self[category][self.unique_spec]
        onepack = data.GetPackKey(keyname,value)
        return onepack

    def get_number_with_esd(numstring):
        import string
        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
        our_match = re.match(numb_re,numstring)
        if our_match:
            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
        else:
            return None,None
        if dot or q: return None,None     #a dot or question mark
        if exp:          #has exponent
           exp = exp.replace("d","e")     # mop up old fashioned numbers
           exp = exp.replace("D","e")
           base_num = base_num + exp
        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
        base_num = float(base_num)
        # work out esd, if present.
        if esd:
            esd = float(esd[1:-1])    # no brackets
            if dad:                   # decimal point + digits
                esd = esd * (10 ** (-1* len(dad)))
            if exp:
                esd = esd * (10 ** (float(exp[1:])))
        return base_num,esd

    def getmaxmin(self,rangeexp):
        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
        regexp = regexp + ":" + regexp
        regexp = re.match(regexp,rangeexp)
        try:
            minimum = regexp.group(1)
            maximum = regexp.group(7)
        except AttributeError:
            print("Can't match %s" % rangeexp)
        if minimum == None: minimum = "."
        else: minimum = float(minimum)
        if maximum == None: maximum = "."
        else: maximum = float(maximum)
        return maximum,minimum

    def initialise_drel(self):
        """Parse drel functions and prepare data structures in dictionary"""
        self.ddlm_parse_valid() #extract validity information from data block
        self.transform_drel()   #parse the drel functions
        self.add_drel_funcs()   #put the drel functions into the namespace

    def transform_drel(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        import traceback
        parser = drel_ast_yacc.parser
        lexer = drel_ast_yacc.lexer
        my_namespace = self.keys()
        my_namespace = dict(zip(my_namespace,my_namespace))
        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
                              and self[a].get("_name.category_id","")!= "function"]
        for derivable in derivable_list:
            target_id = derivable
            # reset the list of visible names for parser
            special_ids = [dict(zip(self.keys(),self.keys()))]
            print("Target id: %s" % derivable)
            drel_exprs = self[derivable]["_method.expression"]
            drel_purposes = self[derivable]["_method.purpose"]
            all_methods = []
            if not isinstance(drel_exprs,list):
                drel_exprs = [drel_exprs]
                drel_purposes = [drel_purposes]
            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
                if drel_purpose != 'Evaluation':
                    continue
                drel_expr = "\n".join(drel_expr.splitlines())
                # print("Transforming %s" % drel_expr)
                # List categories are treated differently...
                try:
                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
                except:
                    print('Syntax error in method for %s; leaving as is' % derivable)
                    a,b = sys.exc_info()[:2]
                    print((repr(a),repr(b)))
                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
                    # reset the lexer
                    lexer.begin('INITIAL')
                    continue
                # Construct the python method
                cat_meth = False
                if self[derivable].get('_definition.scope','Item') == 'Category':
                    cat_meth = True
                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
                                                                           loopable=loop_info,
                                                             cif_dic = self,cat_meth=cat_meth)
                all_methods.append(pyth_meth)
            if len(all_methods)>0:
                save_overwrite = self[derivable].overwrite
                self[derivable].overwrite = True
                self[derivable]["_method.py_expression"] = all_methods
                self[derivable].overwrite = save_overwrite
            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))

    def add_drel_funcs(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
        funcnames = [(self[a]["_name.object_id"],
                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
        # create executable python code...
        parser = drel_ast_yacc.parser
        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        for funcname,funcbody in funcnames:
            newline_body = "\n".join(funcbody.splitlines())
            parser.target_id = funcname
            res_ast = parser.parse(newline_body)
            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
            #print('dREL library function ->\n' + py_function)
            global_table = globals()
            exec(py_function, global_table)    #add to namespace
        #print('Globals after dREL functions added:' + repr(globals()))
        self.ddlm_functions = globals()  #for outside access

    @track_recursion
    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
        key = start_key   #starting value
        result = None     #success is a non-None value
        default_result = False #we have not used a default value
        # check for aliases
        # check for an older form of a new value
        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
        if len(found_it)>0:
            corrected_type = self.change_type(key,cifdata[found_it[0]])
            return corrected_type
        # now do the reverse check - any alternative form
        alias_name = [a for a in self.alias_table.items() if key in a[1]]
        print('Aliases for %s: %s' % (key,repr(alias_name)))
        if len(alias_name)==1:
            key = alias_name[0][0]   #actual definition name
            if key in cifdata: return self.change_type(key,cifdata[key])
            found_it = [k for k in alias_name[0][1] if k in cifdata]
            if len(found_it)>0:
                return self.change_type(key,cifdata[found_it[0]])
        elif len(alias_name)>1:
            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))

        the_category = self[key]["_name.category_id"]
        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        # store any default value in case we have a problem
        def_val = self[key].get("_enumeration.default","")
        def_index_val = self[key].get("_enumeration.def_index_id","")
        if len(has_cat_names)==0: # try category method
            cat_result = {}
            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
            pulled_from_cats = [(k,[
                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
                               ) for k in pulled_from_cats]
            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
            if '_category_construct_local.type' in self[the_category]:
                print("**Now constructing category %s using DDLm attributes**" % the_category)
                try:
                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s (error)' % the_category)
            # Trying a pull-back when the category is partially populated
            # will not work, hence we test that cat_result has no keys
            if len(pulled_to_cats)>0 and len(cat_result)==0:
                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
                try:
                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s from pullback information (error)' % the_category)
            if '_method.py_expression' in self[the_category] and key not in cat_result:
                print("**Now applying category method for %s in search of %s**" % (the_category,key))
                cat_result = self.derive_item(the_category,cifdata,store_value=True)
            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
            # do we now have our value?
            if key in cat_result:
                return cat_result[key]

        # Recalculate in case it actually worked
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        the_funcs = self[key].get('_method.py_expression',"")
        if the_funcs:   #attempt to calculate it
            #global_table = globals()
            #global_table.update(self.ddlm_functions)
            for one_func in the_funcs:
                print('Executing function for %s:' % key)
                #print(one_func)
                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
                # print('in following global environment: ' + repr(global_table))
                stored_setting = cifdata.provide_value
                cifdata.provide_value = True
                try:
                    result = pyfunc(cifdata)
                except CifRecursionError as s:
                    print(s)
                    result = None
                except StarFile.StarDerivationError as s:
                    print(s)
                    result = None
                finally:
                    cifdata.provide_value = stored_setting
                if result is not None:
                    break
                #print("Function returned {!r}".format(result))

        if result is None and allow_defaults:   # try defaults
            if def_val:
                result = self.change_type(key,def_val)
                default_result = True
            elif def_index_val:            #derive a default value
                index_vals = self[key]["_enumeration_default.index"]
                val_to_index = cifdata[def_index_val]     #what we are keying on
                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
                    lcase_comp = True
                    index_vals = [a.lower() for a in index_vals]
                # Handle loops
                if isinstance(val_to_index,list):
                    if lcase_comp:
                        val_to_index = [a.lower() for a in val_to_index]
                    keypos = [index_vals.index(a) for a in val_to_index]
                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
                else:
                    if lcase_comp:
                        val_to_index = val_to_index.lower()
                    keypos = index_vals.index(val_to_index)   #value error if no such value available
                    result = self[key]["_enumeration_default.value"][keypos]
                    default_result = True   #flag that it must be extended
                result = self.change_type(key,result)
                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))

        # read it in
        if result is None:   #can't do anything else
            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
            raise StarFile.StarDerivationError(start_key)
        is_looped = False
        if self[the_category].get('_definition.class','Set')=='Loop':
            is_looped = True
            if len(has_cat_names)>0:   #this category already exists
                if result is None or default_result: #need to create a list of values
                    loop_len = len(cifdata[has_cat_names[0]])
                    out_result = [result]*loop_len
                    result = out_result
            else:   #nothing exists in this category, we can't store this at all
                print('Resetting result %s for %s to null list as category is empty' % (key,result))
                result = []

        # now try to insert the new information into the right place
        # find if items of this category already appear...
        # Never cache empty values
        if not (isinstance(result,list) and len(result)==0) and\
          store_value:
            if self[key].get("_definition.scope","Item")=='Item':
                if is_looped:
                    result = self.store_new_looped_value(key,cifdata,result,default_result)
                else:
                    result = self.store_new_unlooped_value(key,cifdata,result)
            else:
                self.store_new_cat_values(cifdata,result,the_category)
        return result

    def store_new_looped_value(self,key,cifdata,result,default_result):
          """Store a looped value from the dREL system into a CifFile"""
          # try to change any matrices etc. to lists
          the_category = self[key]["_name.category_id"]
          out_result = result
          if result is not None and not default_result:
                  # find any numpy arrays
                  def conv_from_numpy(one_elem):
                      if not hasattr(one_elem,'dtype'):
                         if isinstance(one_elem,(list,tuple)):
                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
                         return one_elem
                      if one_elem.size > 1:   #so is not a float
                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
                      else:
                          try:
                            return one_elem.item(0)
                          except:
                            return one_elem
                  out_result = [conv_from_numpy(a) for a in result]
          # so out_result now contains a value suitable for storage
          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
          has_cat_names = [a for a in cat_names if a in cifdata]
          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
          if len(has_cat_names)>0:   #this category already exists
              cifdata[key] = out_result      #lengths must match or else!!
              cifdata.AddLoopName(has_cat_names[0],key)
          else:
              cifdata[key] = out_result
              cifdata.CreateLoop([key])
          print('Loop info:' + repr(cifdata.loops))
          return out_result

    def store_new_unlooped_value(self,key,cifdata,result):
          """Store a single value from the dREL system"""
          if result is not None and hasattr(result,'dtype'):
              if result.size > 1:
                  out_result = StarFile.StarList(result.tolist())
                  cifdata[key] = out_result
              else:
                  cifdata[key] = result.item(0)
          else:
              cifdata[key] = result
          return result

    def construct_category(self,category,cifdata,store_value=True):
        """Construct a category using DDLm attributes"""
        con_type = self[category].get('_category_construct_local.type',None)
        if con_type == None:
            return {}
        if con_type == 'Pullback' or con_type == 'Filter':
            morphisms  = self[category]['_category_construct_local.components']
            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
            cats = [self[a]['_name.category_id'] for a in morphisms]
            cat_keys = [self[a]['_category.key_id'] for a in cats]
            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
            if con_type == 'Filter':
                int_filter = self[category].get('_category_construct_local.integer_filter',None)
                text_filter = self[category].get('_category_construct_local.text_filter',None)
                if int_filter is not None:
                    morph_values.append([int(a) for a in int_filter])
                if text_filter is not None:
                    morph_values.append(text_filter)
                cat_values.append(range(len(morph_values[-1])))
            # create the mathematical product filtered by equality of dataname values
            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
            # now prepare for return
            if len(pullback_ids)==0:
                return {}
            newids = self[category]['_category_construct_local.new_ids']
            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
            if con_type == 'Pullback':
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
            elif con_type == 'Filter':   #simple filter
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
            if store_value:
                self.store_new_cat_values(cifdata,final_results,category)
            return final_results

    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
        """Each of the categories in source_categories are pullbacks that include
        the target_category"""
        target_key = self[target_category]['_category.key_id']
        result = {target_key:[]}
        first_time = True
        # for each source category, determine which element goes to the target
        for sc in source_categories:
            components = self[sc]['_category_construct_local.components']
            comp_cats = [self[c]['_name.category_id'] for c in components]
            new_ids = self[sc]['_category_construct_local.new_ids']
            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
            if len(components) == 2:  # not a filter
                element_pos = comp_cats.index(target_category)
                old_id = source_ids[element_pos]
                print('Using %s to populate %s' % (old_id,target_key))
                result[target_key].extend(cifdata[old_id])
                # project through all identical names
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
                # we only include keys that are common to all categories
                if first_time:
                    result.update(extra_result)
                else:
                    for k in extra_result.keys():
                        if k in result:
                            print('Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
            else:
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
                    result[target_key].extend(cifdata[source_ids[0]])
                    for k in extra_result.keys():
                        if k in result:
                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
                        else:
                            result[k]=extra_result[k]
    # Bonus derivation if there is a singleton filter
                    if self[sc]['_category_construct_local.type'] == 'Filter':
                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
                        if int_filter is not None:
                            filter_values = int_filter
                        else:
                            filter_values = text_filter
                        if len(filter_values)==1:    #a singleton
                            extra_dataname = self[sc]['_category_construct_local.components'][0]
                            if int_filter is not None:
                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
                            else:
                                new_value = filter_values * len(cifdata[source_ids[0]])
                            if extra_dataname not in result:
                                result[extra_dataname] = new_value
                            else:
                                result[extra_dataname].extend(new_value)
                    else:
                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
            first_time = False
        # check for sanity - all dataname lengths must be identical
        datalen = len(set([len(a) for a in result.values()]))
        if datalen != 1:
            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
        if store_value:
            print('Now storing ' + repr(result))
            self.store_new_cat_values(cifdata,result,target_category)
        return result

    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
        """Copy across datanames for which the from_category key equals [[key_vals]]"""
        result = {}
        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
        can_project = s_names_in_cat & t_names_in_cat
        can_project -= set(skip_names)  #already dealt with
        source_key = self[from_category]['_category.key_id']
        print('Source dataname set: ' + repr(s_names_in_cat))
        print('Target dataname set: ' + repr(t_names_in_cat))
        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
        for project_name in can_project:
            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
            if key_vals is None:
                try:
                    result[full_to_name] = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
            else:
                all_key_vals = cifdata[source_key]
                filter_pos = [all_key_vals.index(a) for a in key_vals]
                try:
                    all_data_vals = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
        return result

    def store_new_cat_values(self,cifdata,result,the_category):
        """Store the values in [[result]] into [[cifdata]]"""
        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
        double_names = [a for a in result.keys() if a in cifdata]
        if len(double_names)>0:
            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
            if set(already_present) != set(result.keys()):
                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
                return
            #check key values
            old_keys = set(cifdata[the_key])
            common_keys = old_keys & set(result[the_key])
            if len(common_keys)>0:
                print("Category %s not updated, key values in common:" % (common_keys))
                return
            #extend result values with old values
            for one_name,one_value in result.items():
                result[one_name].extend(cifdata[one_name])
        for one_name, one_value in result.items():
            try:
                self.store_new_looped_value(one_name,cifdata,one_value,False)
            except StarFile.StarError:
                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
        #put the key as the first item
        print('Fixing item order for {}'.format(repr(the_key)))
        for one_key in the_key:  #should only be one
            cifdata.ChangeItemOrder(one_key,0)


    def generate_default_packet(self,catname,catkey,keyvalue):
        """Return a StarPacket with items from ``catname`` and a key value
        of ``keyvalue``"""
        newpack = StarPacket()
        for na in self.names_in_cat(catname):
            def_val = self[na].get("_enumeration.default","")
            if def_val:
                final_val = self.change_type(na,def_val)
                newpack.extend(final_val)
                setattr(newpack,na,final_val)
        if len(newpack)>0:
            newpack.extend(keyvalue)
            setattr(newpack,catkey,keyvalue)
        return newpack


    def switch_numpy(self,to_val):
        pass

    def change_type(self,itemname,inval):
        import numpy
        if inval == "?": return inval
        change_function = convert_type(self[itemname])
        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
            newval = list([change_function(a) for a in inval])
        else:
            newval = change_function(inval)
        return newval

    def install_validation_functions(self):
        """Install the DDL-appropriate validation checks"""
        if self.diclang != 'DDLm':
          self.item_validation_funs = [
            self.validate_item_type,
            self.validate_item_esd,
            self.validate_item_enum,   # functions which check conformance
            self.validate_enum_range,
            self.validate_looping]
          self.loop_validation_funs = [
            self.validate_loop_membership,
            self.validate_loop_key,
            self.validate_loop_references]    # functions checking loop values
          self.global_validation_funs = [
            self.validate_exclusion,
            self.validate_parent,
            self.validate_child,
            self.validate_dependents,
            self.validate_uniqueness] # where we need to look at other values
          self.block_validation_funs = [  # where only a full block will do
            self.validate_mandatory_category]
          self.global_remove_validation_funs = [
            self.validate_remove_parent_child] # removal is quicker with special checks
        elif self.diclang == 'DDLm':
            self.item_validation_funs = [
                self.validate_item_enum,
                self.validate_item_esd_ddlm,
                ]
            self.loop_validation_funs = [
                self.validate_looping_ddlm,
                self.validate_loop_key_ddlm,
                self.validate_loop_membership
                ]
            self.global_validation_funs = []
            self.block_validation_funs = [
                self.check_mandatory_items,
                self.check_prohibited_items
                ]
            self.global_remove_validation_funs = []
        self.optimize = False        # default value
        self.done_parents = []
        self.done_children = []
        self.done_keys = []

    def validate_item_type(self,item_name,item_value):
        def mymatch(m,a):
            res = m.match(a)
            if res != None: return res.group()
            else: return ""
        target_type = self[item_name].get(self.type_spec)
        if target_type == None:          # e.g. a category definition
            return {"result":True}                  # not restricted in any way
        matchexpr = self.typedic[target_type]
        item_values = listify(item_value)
        #for item in item_values:
            #print("Type match " + item_name + " " + item + ":",)
        #skip dots and question marks
        check_all = [a for a in item_values if a !="." and a != "?"]
        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def decide(self,result_list):
        """Construct the return list"""
        if len(result_list)==0:
               return {"result":True}
        else:
               return {"result":False,"bad_values":result_list}

    def validate_item_container(self, item_name,item_value):
        container_type = self[item_name]['_type.container']
        item_values = listify(item_value)
        if container_type == 'Single':
           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
           return decide(okcheck)
        if container_type in ('Multiple','List'):
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           return decide(okcheck)
        if container_type == 'Array':    #A list with numerical values
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           first_check = decide(okcheck)
           if not first_check['result']: return first_check
           #num_check = [a for a in item_values if len([b for b in a if not isinstance

    def validate_item_esd(self,item_name,item_value):
        if self[item_name].get(self.primitive_type) != 'numb':
            return {"result":None}
        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
        if can_esd: return {"result":True}         #must be OK!
        item_values = listify(item_value)
        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_item_esd_ddlm(self,item_name,item_value):
        if self[item_name].get('self.primitive_type') not in \
        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
            return {"result":None}
        can_esd = True
        if self[item_name].get('_type.purpose') != 'Measurand':
            can_esd = False
        item_values = listify(item_value)
        check_all = [get_number_with_esd(a)[1] for a in item_values]
        check_all = [v for v in check_all if (can_esd and v == None) or \
                 (not can_esd and v != None)]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_enum_range(self,item_name,item_value):
        if "_item_range.minimum" not in self[item_name] and \
           "_item_range.maximum" not in self[item_name]:
            return {"result":None}
        minvals = self[item_name].get("_item_range.minimum",default = ["."])
        maxvals = self[item_name].get("_item_range.maximum",default = ["."])
        def makefloat(a):
            if a == ".": return a
            else: return float(a)
        maxvals = map(makefloat, maxvals)
        minvals = map(makefloat, minvals)
        rangelist = list(zip(minvals,maxvals))
        item_values = listify(item_value)
        def map_check(rangelist,item_value):
            if item_value == "?" or item_value == ".": return True
            iv,esd = get_number_with_esd(item_value)
            if iv==None: return None  #shouldn't happen as is numb type
            for lower,upper in rangelist:
                #check the minima
                if lower == ".": lower = iv - 1
                if upper == ".": upper = iv + 1
                if iv > lower and iv < upper: return True
                if upper == lower and iv == upper: return True
            # debug
            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
            return False
        check_all = [a for a in item_values if map_check(rangelist,a) != True]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_item_enum(self,item_name,item_value):
        try:
            enum_list = self[item_name][self.enum_spec][:]
        except KeyError:
            return {"result":None}
        enum_list.append(".")   #default value
        enum_list.append("?")   #unknown
        item_values = listify(item_value)
        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
        check_all = [a for a in item_values if a not in enum_list]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_looping(self,item_name,item_value):
        try:
            must_loop = self[item_name][self.must_loop_spec]
        except KeyError:
            return {"result":None}
        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
            return {"result":False}      #this could be triggered
        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
            return {"result":False}
        return {"result":True}

    def validate_looping_ddlm(self,loop_names):
        """Check that all names are loopable"""
        truly_loopy = self.get_final_cats(loop_names)
        if len(truly_loopy)<len(loop_names):  #some are bad
            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
            return {"result":False,"bad_items":not_looped}
        return {"result":True}


    def validate_loop_membership(self,loop_names):
        final_cat = self.get_final_cats(loop_names)
        bad_items =  [a for a in final_cat if a != final_cat[0]]
        if len(bad_items)>0:
            return {"result":False,"bad_items":bad_items}
        else: return {"result":True}

    def get_final_cats(self,loop_names):
        """Return a list of the uppermost parent categories for the loop_names. Names
        that are not from loopable categories are ignored."""
        try:
            categories = [self[a][self.cat_spec].lower() for a in loop_names]
        except KeyError:       #category is mandatory
            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
        return [self.parent_lookup[a] for a in truly_looped]

    def validate_loop_key(self,loop_names):
        category = self[loop_names[0]][self.cat_spec]
        # find any unique values which must be present 
        key_spec = self[category].get(self.key_spec,[])
        for names_to_check in key_spec:
            if isinstance(names_to_check,unicode):   #only one
                names_to_check = [names_to_check]
            for loop_key in names_to_check:
                if loop_key not in loop_names:
                    #is this one of those dang implicit items?
                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
                        continue          #it is virtually there...
                    alternates = self.get_alternates(loop_key)
                    if alternates == []:
                        return {"result":False,"bad_items":loop_key}
                    for alt_names in alternates:
                        alt = [a for a in alt_names if a in loop_names]
                        if len(alt) == 0:
                            return {"result":False,"bad_items":loop_key}  # no alternates
        return {"result":True}

    def validate_loop_key_ddlm(self,loop_names):
        """Make sure at least one of the necessary keys are available"""
        final_cats = self.get_final_cats(loop_names)
        if len(final_cats)>0:
            poss_keys = self.cat_key_table[final_cats[0]]
            found_keys = [a for a in poss_keys if a in loop_names]
            if len(found_keys)>0:
                return {"result":True}
            else:
                return {"result":False,"bad_items":poss_keys}
        else:
            return {"result":True}

    def validate_loop_references(self,loop_names):
        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
        must_haves = [a for a in must_haves if a != None]
        # build a flat list.  For efficiency we don't remove duplicates,as
        # we expect no more than the order of 10 or 20 looped names.
        def flat_func(a,b):
            if isinstance(b,unicode):
               a.append(b)       #single name
            else:
               a.extend(b)       #list of names
            return a
        flat_mh = []
        [flat_func(flat_mh,a) for a in must_haves]
        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
        res = [a for a in single_mh if a not in loop_names]
        def check_gr(s_item, name_list):
            nl = map(lambda a:a[:len(s_item)],name_list)
            if s_item in nl: return True
            return False
        res_g = [a for a in group_mh if check_gr(a,loop_names)]
        if len(res) == 0 and len(res_g) == 0: return {"result":True}
        # construct alternate list
        alternates = map(lambda a: (a,self.get_alternates(a)),res)
        alternates = [a for a in alternates if a[1] != []]
        # next line purely for error reporting
        missing_alts = [a[0] for a in alternates if a[1] == []]
        if len(alternates) != len(res):
           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
                                                       #doesn't have an altern
        #loop over alternates
        for orig_name,alt_names in alternates:
             alt = [a for a in alt_names if a in loop_names]
             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
        return {"result":True}        #found alternates

    def get_alternates(self,main_name,exclusive_only=False):
        alternates = self[main_name].get(self.related_func,None)
        alt_names = []
        if alternates != None:
            alt_names =  self[main_name].get(self.related_item,None)
            if isinstance(alt_names,unicode):
                alt_names = [alt_names]
                alternates = [alternates]
            together = zip(alt_names,alternates)
            if exclusive_only:
                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
                                             or a[1]=="replace"]
            else:
                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
            alt_names = list([a[0] for a in alt_names])
        # now do the alias thing
        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
        alt_names.extend(alias_names)
        # print("Alternates for {}: {!r}".format(main_name, alt_names))
        return alt_names


    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
       item_name_list = [a.lower() for a in whole_block.keys()]
       item_name_list.extend([a.lower() for a in provisional_items.keys()])
       bad = [a for a in alternates if a in item_name_list]
       if len(bad)>0:
           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
           return {"result":False,"bad_items":bad}
       else: return {"result":True}

    # validate that parent exists and contains matching values
    def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        parent_item = self[item_name].get(self.parent_spec)
        if not parent_item: return {"result":None}   #no parent specified
        if isinstance(parent_item,list):
            parent_item = parent_item[0]
        if self.optimize:
            if parent_item in self.done_parents:
                return {"result":None}
            else:
                self.done_parents.append(parent_item)
                print("Done parents %s" % repr(self.done_parents))
        # initialise parent/child values
        if isinstance(item_value,unicode):
            child_values = [item_value]
        else: child_values = item_value[:]    #copy for safety
        # track down the parent
        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
        # we have collected all parent values into the global block - so no need to search
        # for them elsewhere.
        # print("Looking for {!r}".format(parent_item))
        parent_values = globals.get(parent_item)
        if not parent_values:
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
        if not parent_values:
            # go for alternates
            namespace = whole_block.keys()
            namespace.extend(provisional_items.keys())
            namespace.extend(globals.keys())
            alt_names = filter_present(self.get_alternates(parent_item),namespace)
            if len(alt_names) == 0:
                if len([a for a in child_values if a != "." and a != "?"])>0:
                    return {"result":False,"parent":parent_item}#no parent available -> error
                else:
                    return {"result":None}       #maybe True is more appropriate??
            parent_item = alt_names[0]           #should never be more than one??
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
            if not parent_values:   # check global block
                parent_values = globals.get(parent_item)
        if isinstance(parent_values,unicode):
            parent_values = [parent_values]
        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
        #                                          item_name, parent_values, child_values))
        missing = self.check_parent_child(parent_values,child_values)
        if len(missing) > 0:
            return {"result":False,"bad_values":missing,"parent":parent_item}
        return {"result":True}

    def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        try:
            child_items = self[item_name][self.child_spec][:]  #copy
        except KeyError:
            return {"result":None}    #not relevant
        # special case for dictionaries  -> we check parents of children only
        if item_name in globals:  #dictionary so skip
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        if isinstance(item_value,unicode): # single value
            parent_values = [item_value]
        else: parent_values = item_value[:]
        # expand child list with list of alternates
        for child_item in child_items[:]:
            child_items.extend(self.get_alternates(child_item))
        # now loop over the children
        for child_item in child_items:
            if self.optimize:
                if child_item in self.done_children:
                    return {"result":None}
                else:
                    self.done_children.append(child_item)
                    print("Done children %s" % repr(self.done_children))
            if child_item in provisional_items:
                child_values = provisional_items[child_item][:]
            elif child_item in whole_block:
                child_values = whole_block[child_item][:]
            else:  continue
            if isinstance(child_values,unicode):
                child_values = [child_values]
                # print("Checking child %s against %s, values %r/%r" % (child_item,
                #       item_name, child_values, parent_values))
            missing = self.check_parent_child(parent_values,child_values)
            if len(missing)>0:
                return {"result":False,"bad_values":missing,"child":child_item}
        return {"result":True}       #could mean that no child items present

    #a generic checker: all child vals should appear in parent_vals
    def check_parent_child(self,parent_vals,child_vals):
        # shield ourselves from dots and question marks
        pv = parent_vals[:]
        pv.extend([".","?"])
        res =  [a for a in child_vals if a not in pv]
        #print("Missing: %s" % res)
        return res

    def validate_remove_parent_child(self,item_name,whole_block):
        try:
            child_items = self[item_name][self.child_spec]
        except KeyError:
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        for child_item in child_items:
            if child_item in whole_block:
                return {"result":False,"child":child_item}
        return {"result":True}

    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
        try:
            dep_items = self[item_name][self.dep_spec][:]
        except KeyError:
            return {"result":None}    #not relevant
        if isinstance(dep_items,unicode):
            dep_items = [dep_items]
        actual_names = whole_block.keys()
        actual_names.extend(prov.keys())
        actual_names.extend(globals.keys())
        missing = [a for a in dep_items if a not in actual_names]
        if len(missing) > 0:
            alternates = map(lambda a:[self.get_alternates(a),a],missing)
            # compact way to get a list of alternative items which are
            # present
            have_check = [(filter_present(b[0],actual_names),
                                       b[1]) for b in alternates]
            have_check = list([a for a in have_check if len(a[0])==0])
            if len(have_check) > 0:
                have_check = [a[1] for a in have_check]
                return {"result":False,"bad_items":have_check}
        return {"result":True}

    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
                                                                  globals={}):
        category = self[item_name].get(self.cat_spec)
        if category == None:
            print("No category found for %s" % item_name)
            return {"result":None}
        # print("Category {!r} for item {}".format(category, item_name))
        # we make a copy in the following as we will be removing stuff later!
        unique_i = self[category].get("_category_key.name",[])[:]
        if isinstance(unique_i,unicode):
            unique_i = [unique_i]
        if item_name not in unique_i:       #no need to verify
            return {"result":None}
        if isinstance(item_value,unicode):  #not looped
            return {"result":None}
        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
        # check that we can't optimize by not doing this check
        if self.optimize:
            if unique_i in self.done_keys:
                return {"result":None}
            else:
                self.done_keys.append(unique_i)
        val_list = []
        # get the matching data from any other data items
        unique_i.remove(item_name)
        other_data = []
        if len(unique_i) > 0:            # i.e. do have others to think about
           for other_name in unique_i:
           # we look for the value first in the provisional dict, then the main block
           # the logic being that anything in the provisional dict overrides the
           # main block
               if other_name in provisional_items:
                   other_data.append(provisional_items[other_name])
               elif other_name in whole_block:
                   other_data.append(whole_block[other_name])
               elif self[other_name].get(self.must_exist_spec)=="implicit":
                   other_data.append([item_name]*len(item_value))  #placeholder
               else:
                   return {"result":False,"bad_items":other_name}#missing data name
        # ok, so we go through all of our values
        # this works by comparing lists of strings to one other, and
        # so could be fooled if you think that '1.' and '1' are
        # identical
        for i in range(len(item_value)):
            #print("Value no. %d" % i, end=" ")
            this_entry = item_value[i]
            for j in range(len(other_data)):
                this_entry = " ".join([this_entry,other_data[j][i]])
            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
            if this_entry in val_list:
                return {"result":False,"bad_values":this_entry}
            val_list.append(this_entry)
        return {"result":True}


    def validate_mandatory_category(self,whole_block):
        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
        if len(mand_cats) == 0:
            return {"result":True}
        # print("Mandatory categories - {!r}".format(mand_cats)
        # find which categories each of our datanames belongs to
        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
        missing = set(mand_cats) - set(all_cats)
        if len(missing) > 0:
            return {"result":False,"bad_items":repr(missing)}
        return {"result":True}

    def check_mandatory_items(self,whole_block,default_scope='Item'):
        """Return an error if any mandatory items are missing"""
        if len(self.scopes_mandatory)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
        if len(missing)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":missing}

    def check_prohibited_items(self,whole_block,default_scope='Item'):
        """Return an error if any prohibited items are present"""
        if len(self.scopes_naughty)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
        if len(present)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":present}


    def run_item_validation(self,item_name,item_value):
        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}

    def run_loop_validation(self,loop_names):
        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}

    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
        return {item_name:results}

    def run_block_validation(self,whole_block,block_scope='Item'):
        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
        # fix up the return values
        return {"whole_block":results}

    def optimize_on(self):
        self.optimize = True
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

    def optimize_off(self):
        self.optimize = False
        self.done_keys = []
        self.done_children = []
        self.done_parents = []



class ValidCifBlock(CifBlock):
    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
    `CifBlock`.

    ## Initialisation

    * `dic` is a `CifDic` object to be used for validation.

    """
    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
        CifBlock.__init__(self,*args,**kwords)
        if dic and diclist:
            print("Warning: diclist argument ignored when initialising ValidCifBlock")
        if isinstance(dic,CifDic):
            self.fulldic = dic
        else:
            raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
        if len(diclist)==0 and not dic:
            raise ValidCifError( "At least one dictionary must be specified")
        if diclist and not dic:
            self.fulldic = merge_dic(diclist,mergemode)
        if not self.run_data_checks()[0]:
            raise ValidCifError( self.report())

    def run_data_checks(self,verbose=False):
        self.v_result = {}
        self.fulldic.optimize_on()
        for dataname in self.keys():
            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
        for loop_names in self.loops.values():
            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
        # now run block-level checks
        update_value(self.v_result,self.fulldic.run_block_validation(self))
        # return false and list of baddies if anything didn't match
        self.fulldic.optimize_off()
        all_keys = list(self.v_result.keys()) #dictionary will change
        for test_key in all_keys:
            #print("%s: %r" % (test_key, self.v_result[test_key]))
            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
            if len(self.v_result[test_key]) == 0:
                del self.v_result[test_key]
        isvalid = len(self.v_result)==0
        #if not isvalid:
        #    print("Baddies: {!r}".format(self.v_result))
        return isvalid,self.v_result

    def single_item_check(self,item_name,item_value):
        #self.match_single_item(item_name)
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_item_validation(item_name,item_value)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def loop_item_check(self,loop_names):
        in_dic_names = list([a for a in loop_names if a in self.fulldic])
        if len(in_dic_names)==0:
            result = {loop_names[0]:[]}
        else:
            result = self.fulldic.run_loop_validation(in_dic_names)
        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
        return isvalid,baddies

    def global_item_check(self,item_name,item_value,provisional_items={}):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_global_validation(item_name,
               item_value,self,provisional_items = provisional_items)
        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def remove_global_item_check(self,item_name):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_remove_global_validation(item_name,self,False)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def AddToLoop(self,dataname,loopdata):
        # single item checks
        paired_data = loopdata.items()
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems)
        # loop item checks; merge with current loop
        found = 0
        for aloop in self.block["loops"]:
            if dataname in aloop:
                loopnames = aloop.keys()
                for new_name in loopdata.keys():
                    if new_name not in loopnames: loopnames.append(new_name)
                valid,problems = self.looped_item_check(loopnames)
                self.report_if_invalid(valid,problems)
        prov_dict = loopdata.copy()
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems)
        CifBlock.AddToLoop(self,dataname,loopdata)

    def AddCifItem(self,data):
        if isinstance(data[0],(unicode,str)):   # single item
            valid,problems = self.single_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
            valid,problems = self.global_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
        elif isinstance(data[0],tuple) or isinstance(data[0],list):
            paired_data = list(zip(data[0],data[1]))
            for name,value in paired_data:
                valid,problems = self.single_item_check(name,value)
                self.report_if_invalid(valid,problems,name)
            valid,problems = self.loop_item_check(data[0])
            self.report_if_invalid(valid,problems,data[0])
            prov_dict = {}            # for storing temporary items
            for name,value in paired_data: prov_dict[name]=value
            for name,value in paired_data:
                del prov_dict[name]   # remove temporarily
                valid,problems = self.global_item_check(name,value,prov_dict)
                prov_dict[name] = value  # add back in
                self.report_if_invalid(valid,problems,name)
        else:
            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
        super(ValidCifBlock,self).AddCifItem(data)

    def AddItem(self,key,value,**kwargs):
        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
        valid,problems = self.single_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        valid,problems = self.global_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        super(ValidCifBlock,self).AddItem(key,value,**kwargs)

    # utility function
    def report_if_invalid(self,valid,bad_list,data_name):
        if not valid:
            bad_tests = [a[0] for a in bad_list]
            error_string = ",".join(bad_tests)
            error_string = repr(data_name) + " fails following validity checks: "  + error_string
            raise ValidCifError( error_string)

    def __delitem__(self,key):
        # we don't need to run single item checks; we do need to run loop and
        # global checks.
        if key in self:
            try:
                loop_items = self.GetLoop(key)
            except TypeError:
                loop_items = []
            if loop_items:             #need to check loop conformance
                loop_names = [a[0] for a in loop_items if a[0] != key]
                valid,problems = self.loop_item_check(loop_names)
                self.report_if_invalid(valid,problems)
            valid,problems = self.remove_global_item_check(key)
            self.report_if_invalid(valid,problems)
        self.RemoveCifItem(key)


    def report(self):
       outstr = StringIO()
       outstr.write( "Validation results\n")
       outstr.write( "------------------\n")
       print("%d invalid items found\n" % len(self.v_result))
       for item_name,val_func_list in self.v_result.items():
           outstr.write("%s fails following tests:\n" % item_name)
           for val_func in val_func_list:
               outstr.write("\t%s\n")
       return outstr.getvalue()


class ValidCifFile(CifFile):
    """A CIF file for which all datablocks are valid.  Argument `dic` to
    initialisation specifies a `CifDic` object to use for validation."""
    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
        if not diclist and not dic and not hasattr(self,'bigdic'):
            raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
        if not dic and diclist:     #merge here for speed
            self.bigdic = merge_dic(diclist,mergemode)
        elif dic and not diclist:
            self.bigdic = dic
        CifFile.__init__(self,*args,**kwargs)
        for blockname in self.keys():
            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)

    def NewBlock(self,blockname,blockcontents,**kwargs):
        CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
        # dictionary[blockname] is now a CifBlock object.  We
        # turn it into a ValidCifBlock object
        self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                         data=self.dictionary[blockname])


class ValidationResult:
    """Represents validation result. It is initialised with """
    def __init__(self,results):
        """results is return value of validate function"""
        self.valid_result, self.no_matches = results

    def report(self,use_html):
        """Return string with human-readable description of validation result"""
        return validate_report((self.valid_result, self.no_matches),use_html)

    def is_valid(self,block_name=None):
        """Return True for valid CIF file, otherwise False"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.valid_result.iterkeys()
        for block_name in block_names:
            if not self.valid_result[block_name] == (True,{}):
                valid = False
                break
            else:
                valid = True
        return valid

    def has_no_match_items(self,block_name=None):
        """Return true if some items are not found in dictionary"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.no_matches.iter_keys()
        for block_name in block_names:
            if self.no_matches[block_name]:
                has_no_match_items = True
                break
            else:
                has_no_match_items = False
        return has_no_match_items



def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
    if not isinstance(ciffile,CifFile):
        check_file = CifFile(ciffile)
    else:
        check_file = ciffile
    if not dic:
        fulldic = merge_dic(diclist,mergemode)
    else:
        fulldic = dic
    no_matches = {}
    valid_result = {}
    if isdic:          #assume one block only
        check_file.scoping = 'instance' #only data blocks visible
        top_level = check_file.keys()[0]
        check_file.scoping = 'dictionary'   #all blocks visible
        # collect a list of parents for speed
        if fulldic.diclang == 'DDL2':
            poss_parents = fulldic.get_all("_item_linked.parent_name")
            for parent in poss_parents:
                curr_parent = listify(check_file.get(parent,[]))
                new_vals = check_file.get_all(parent)
                new_vals.extend(curr_parent)
                if len(new_vals)>0:
                    check_file[parent] = new_vals
                print("Added %s (len %d)" % (parent,len(check_file[parent])))
    # now run the validations
    for block in check_file.keys():
        if isdic and block == top_level:
           block_scope = 'Dictionary'
        elif isdic:
           block_scope = 'Item'
        else:
           block_scope = 'Datablock'
        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
        # remove non-matching items
        print("Not matched: " + repr(no_matches[block]))
        for nogood in no_matches[block]:
             del check_file[block][nogood]
        print("Validating block %s, scope %s" % (block,block_scope))
        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    return valid_result,no_matches

def validate_report(val_result,use_html=False):
    valid_result,no_matches = val_result
    outstr = StringIO()
    if use_html:
        outstr.write("<h2>Validation results</h2>")
    else:
        outstr.write( "Validation results\n")
        outstr.write( "------------------\n")
    if len(valid_result) > 10:
        suppress_valid = True         #don't clutter with valid messages
        if use_html:
           outstr.write("<p>For brevity, valid blocks are not reported in the output.</p>")
    else:
        suppress_valid = False
    for block in valid_result.keys():
        block_result = valid_result[block]
        if block_result[0]:
            out_line = "Block '%s' is VALID" % block
        else:
            out_line = "Block '%s' is INVALID" % block
        if use_html:
            if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]:
                outstr.write( "<h3>%s</h3><p>" % out_line)
        else:
                outstr.write( "\n %s\n" % out_line)
        if len(no_matches[block])!= 0:
            if use_html:
                outstr.write( "<p>The following items were not found in the dictionary")
                outstr.write(" (note that this does not invalidate the data block):</p>")
                outstr.write("<p><table>\n")
                [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]]
                outstr.write("</table>\n")
            else:
                outstr.write( "\n The following items were not found in the dictionary:\n")
                outstr.write("Note that this does not invalidate the data block\n")
                [outstr.write("%s\n" % it) for it in no_matches[block]]
        # now organise our results by type of error, not data item...
        error_type_dic = {}
        for error_item, error_list in block_result[1].items():
            for func_name,bad_result in error_list:
                bad_result.update({"item_name":error_item})
                try:
                    error_type_dic[func_name].append(bad_result)
                except KeyError:
                    error_type_dic[func_name] = [bad_result]
        # make a table of test name, test message
        info_table = {\
        'validate_item_type':\
            "The following data items had badly formed values",
        'validate_item_esd':\
            "The following data items should not have esds appended",
        'validate_enum_range':\
            "The following data items have values outside permitted range",
        'validate_item_enum':\
            "The following data items have values outside permitted set",
        'validate_looping':\
            "The following data items violate looping constraints",
        'validate_loop_membership':\
            "The following looped data names are of different categories to the first looped data name",
        'validate_loop_key':\
            "A required dataname for this category is missing from the loop\n containing the dataname",
        'validate_loop_key_ddlm':\
            "A loop key is missing for the category containing the dataname",
        'validate_loop_references':\
            "A dataname required by the item is missing from the loop",
        'validate_parent':\
            "A parent dataname is missing or contains different values",
        'validate_child':\
            "A child dataname contains different values to the parent",
        'validate_uniqueness':\
            "One or more data items do not take unique values",
        'validate_dependents':\
            "A dataname required by the item is missing from the data block",
        'validate_exclusion': \
            "Both dataname and exclusive alternates or aliases are present in data block",
        'validate_mandatory_category':\
            "A required category is missing from this block",
        'check_mandatory_items':\
            "A required data attribute is missing from this block",
        'check_prohibited_items':\
            "A prohibited data attribute is present in this block"}

        for test_name,test_results in error_type_dic.items():
           if use_html:
               outstr.write(html_error_report(test_name,info_table[test_name],test_results))
           else:
               outstr.write(error_report(test_name,info_table[test_name],test_results))
               outstr.write("\n\n")
    return outstr.getvalue()

# A function to lay out a single error report.  We are passed
# the name of the error (one of our validation functions), the
# explanation to print out, and a dictionary with the error
# information.  We print no more than 50 characters of the item

def error_report(error_name,error_explanation,error_dics):
   retstring = "\n\n " + error_explanation + ":\n\n"
   headstring = "%-32s" % "Item name"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "%-20s" % "Bad value(s)"
   if "bad_items" in error_dics[0]:
      headstring += "%-20s" % "Bad dataname(s)"
   if "child" in error_dics[0]:
      headstring += "%-20s" % "Child"
   if "parent" in error_dics[0]:
      headstring += "%-20s" % "Parent"
   headstring +="\n"
   for error in error_dics:
      bodystring += "\n%-32s" % error["item_name"]
      if "bad_values" in error:
          out_vals = [repr(a)[:50] for a in error["bad_values"]]
          bodystring += "%-20s" % out_vals
      if "bad_items" in error:
          bodystring += "%-20s" % repr(error["bad_items"])
      if "child" in error:
          bodystring += "%-20s" % repr(error["child"])
      if "parent" in error:
          bodystring += "%-20s" % repr(error["parent"])
   return retstring + headstring + bodystring

#  This lays out an HTML error report

def html_error_report(error_name,error_explanation,error_dics,annotate=[]):
   retstring = "<h4>" + error_explanation + ":</h4>"
   retstring = retstring + "<table cellpadding=5><tr>"
   headstring = "<th>Item name</th>"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "<th>Bad value(s)</th>"
   if "bad_items" in error_dics[0]:
      headstring += "<th>Bad dataname(s)</th>"
   if "child" in error_dics[0]:
      headstring += "<th>Child</th>"
   if "parent" in error_dics[0]:
      headstring += "<th>Parent</th>"
   headstring +="</tr>\n"
   for error in error_dics:
      bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"]
      if "bad_values" in error:
          bodystring += "<td>%s</td>" % error["bad_values"]
      if "bad_items" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["bad_items"]
      if "child" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["child"]
      if "parent" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["parent"]
      bodystring += "</tr>\n"
   return retstring + headstring + bodystring + "</table>\n"

def run_data_checks(check_block,fulldic,block_scope='Item'):
    v_result = {}
    for key in check_block.keys():
        update_value(v_result, fulldic.run_item_validation(key,check_block[key]))
        update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block))
    for loopnames in check_block.loops.values():
        update_value(v_result, fulldic.run_loop_validation(loopnames))
    update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope))
    # return false and list of baddies if anything didn't match
    all_keys = list(v_result.keys())
    for test_key in all_keys:
        v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False]
        if len(v_result[test_key]) == 0:
            del v_result[test_key]
    # if even one false one is found, this should trigger
    # print("Baddies: {!r}".format(v_result))
    isvalid = len(v_result)==0
    return isvalid,v_result


def get_number_with_esd(numstring):
    import string
    numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
    our_match = re.match(numb_re,numstring)
    if our_match:
        a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
        # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
    else:
        return None,None
    if dot or q: return None,None     #a dot or question mark
    if exp:          #has exponent
       exp = exp.replace("d","e")     # mop up old fashioned numbers
       exp = exp.replace("D","e")
       base_num = base_num + exp
    # print("Debug: have %s for base_num from %s" % (base_num,numstring))
    base_num = float(base_num)
    # work out esd, if present.
    if esd:
        esd = float(esd[1:-1])    # no brackets
        if dad:                   # decimal point + digits
            esd = esd * (10 ** (-1* len(dad)))
        if exp:
            esd = esd * (10 ** (float(exp[1:])))
    return base_num,esd

def float_with_esd(inval):
    if isinstance(inval,unicode):
        j = inval.find("(")
        if j>=0:  return float(inval[:j])
    return float(inval)



def convert_type(definition):
    """Convert value to have the type given by definition"""
    #extract the actual required type information
    container = definition['_type.container']
    dimension = definition.get('_type.dimension',StarFile.StarList([]))
    structure = interpret_structure(definition['_type.contents'])
    if container == 'Single':   #a single value to convert
        return convert_single_value(structure)
    elif container == 'List':   #lots of the same value
        return convert_list_values(structure,dimension)
    elif container == 'Multiple': #no idea
        return None
    elif container in ('Array','Matrix'): #numpy array
        return convert_matrix_values(structure)
    return lambda a:a    #unable to convert

def convert_single_value(type_spec):
    """Convert a single item according to type_spec"""
    if type_spec == 'Real':
        return float_with_esd
    if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
        return int
    if type_spec == 'Complex':
        return complex
    if type_spec == 'Imag':
        return lambda a:complex(0,a)
    if type_spec in ('Code','Name','Tag'):  #case-insensitive -> lowercase
        return lambda a:a.lower()
    return lambda a:a   #can't do anything numeric

def convert_list_values(structure,dimension):
    """Convert the values according to the element
       structure given in [[structure]]"""
    if isinstance(structure,(unicode,str)):   #simple repetition
        func_def =  "element_convert = convert_single_value('%s')" % structure
    else:
        func_def =       "def element_convert(element):\n"
        func_def +=      "   final_val = []\n"
        for pos_no in range(len(structure)):
            func_def +=  "   final_val.append("
            type_spec = structure[pos_no]
            if type_spec == 'Real':
                cf = "float_with_esd("
            elif type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
                cf = 'int('
            elif type_spec == 'Complex':
                cf = 'complex('
            elif type_spec == 'Imag':
                cf = 'complex(0,'
            elif type_spec in ('Code','Name','Tag'):
                cf = '('
            else: cf = ''
            func_def += cf
            func_def += "element[%d]" % pos_no
            if "(" in cf: func_def +=")"
            if type_spec in ('Code','Name','Tag'):
                func_def +=".lower()"
            func_def +=")\n"  # close append
        func_def +=      "   return final_val\n"
    print(func_def)
    exec(func_def, globals()) #(re)defines element_convert in global namespace
    if len(dimension)> 0 and int(dimension[0]) != 1:
        return lambda a: list(map(element_convert,a))
    else: return element_convert

def convert_matrix_values(valtype):
    """Convert a dREL String or Float valued List structure to a numpy matrix structure"""
    # first convert to numpy array, then let numpy do the work
    try: import numpy
    except:
        return lambda a:a   #cannot do it
    func_def =     "def matrix_convert(a):\n"
    func_def +=    "    import numpy\n"
    func_def +=    "    p = numpy.array(a)\n"
    if valtype == 'Real':
        func_def+= "    return p.astype('float')\n"
    elif valtype == 'Integer':
        func_def +="    return p.astype('int')\n"
    elif valtype == 'Complex':
        func_def +="    return p.astype('complex')\n"
    else:
        raise ValueError('Unknown matrix value type')
    exec(func_def,globals())  #matrix convert is defined
    return matrix_convert

def interpret_structure(struc_spec):
    """Interpret a DDLm structure specification"""
    from . import TypeContentsParser as t
    p = t.TypeParser(t.TypeParserScanner(struc_spec))
    return getattr(p,"input")()


# A utility function to append to item values rather than replace them
def update_value(base_dict,new_items):
    for new_key in new_items.keys():
        if new_key in base_dict:
            base_dict[new_key].extend(new_items[new_key])
        else:
            base_dict[new_key] = new_items[new_key]

#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = []
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

# listify strings - used surprisingly often
def listify(item):
    if isinstance(item,(unicode,str)): return [item]
    else: return item

# given a list of search items, return a list of items
# actually contained in the given data block
def filter_present(namelist,datablocknames):
    return [a for a in namelist if a in datablocknames]

# Make an item immutable, used if we want a list to be a key
def make_immutable(values):
    """Turn list of StarList values into a list of immutable items"""
    if not isinstance(values[0],StarList):
        return values
    else:
        return [tuple(a) for a in values]

# merge ddl dictionaries.  We should be passed filenames or CifFile
# objects
def merge_dic(diclist,mergemode="replace",ddlspec=None):
    dic_as_cif_list = []
    for dic in diclist:
        if not isinstance(dic,CifFile) and \
           not isinstance(dic,(unicode,str)):
               raise TypeError("Require list of CifFile names/objects for dictionary merging")
        if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic))
        else: dic_as_cif_list.append(dic)
    # we now merge left to right
    basedic = dic_as_cif_list[0]
    if "on_this_dictionary" in basedic:   #DDL1 style only
        for dic in dic_as_cif_list[1:]:
           basedic.merge(dic,mode=mergemode,match_att=["_name"])
    elif len(basedic.keys()) == 1:                     #One block: DDL2/m style
        old_block = basedic[basedic.keys()[0]]
        for dic in dic_as_cif_list[1:]:
           new_block = dic[dic.keys()[0]]
           basedic.merge(dic,mode=mergemode,
                         single_block=[basedic.keys()[0],dic.keys()[0]],
                         match_att=["_item.name"],match_function=find_parent)
    return CifDic(basedic)

def find_parent(ddl2_def):
    if "_item.name" not in ddl2_def:
       return None
    if isinstance(ddl2_def["_item.name"],unicode):
        return ddl2_def["_item.name"]
    if "_item_linked.child_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no child_names")
    if "_item_linked.parent_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no parent_names")
    result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]])
    if len(result)>1 or len(result)==0:
        raise CifError("Unable to find single unique parent data item")
    return result[0]


def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF'):
    """ Read in a CIF file, returning a `CifFile` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
    Jython uses native Java regular expressions
    to provide a speedup regardless of this argument (and does not yet support CIF2).

    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
    `instance` scoping makes nested save frames
    invisible outside their hierarchy, allowing duplicate save frame names in separate
    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
    other, thereby restricting all save frames to have unique names.
    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
    maximum length of 75 characters for datanames and has no other effect. """

    finalcif = CifFile(scoping=scoping,standard=standard)
    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype)
    #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs)

class CifLoopBlock(StarFile.LoopBlock):
    def __init__(self,data=(),**kwargs):
        super(CifLoopBlock,self).__init__(data,**kwargs)

#No documentation flags

Functions

def ReadCif(

filename, grammar=u'auto', scantype=u'standard', scoping=u'instance', standard=u'CIF')

Read in a CIF file, returning a CifFile object.

  • filename may be a URL, a file path on the local system, or any object with a read method.

  • grammar chooses the CIF grammar variant. 1.0 is the original 1992 grammar and 1.1 is identical except for the exclusion of square brackets as the first characters in undelimited datanames. 2.0 will read files in the CIF2.0 standard, and STAR2 will read files according to the STAR2 publication. If grammar is None, autodetection will be attempted in the order 2.0, 1.1 and 1.0. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

  • scantype can be standard or flex. standard provides pure Python parsing at the cost of a factor of 10 or so in speed. flex will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2).

  • scoping is only relevant where nested save frames are expected (STAR2 only). instance scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. dictionary scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for standard is CIF, which when set enforces a maximum length of 75 characters for datanames and has no other effect.

def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF'):
    """ Read in a CIF file, returning a `CifFile` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
    Jython uses native Java regular expressions
    to provide a speedup regardless of this argument (and does not yet support CIF2).

    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
    `instance` scoping makes nested save frames
    invisible outside their hierarchy, allowing duplicate save frame names in separate
    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
    other, thereby restricting all save frames to have unique names.
    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
    maximum length of 75 characters for datanames and has no other effect. """

    finalcif = CifFile(scoping=scoping,standard=standard)
    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype)

def Validate(

ciffile, dic=u'', diclist=[], mergemode=u'replace', isdic=False)

Validate the ciffile conforms to the definitions in CifDic object dic, or if dic is missing, to the results of merging the CifDic objects in diclist according to mergemode. Flag isdic indicates that ciffile is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.

def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
    if not isinstance(ciffile,CifFile):
        check_file = CifFile(ciffile)
    else:
        check_file = ciffile
    if not dic:
        fulldic = merge_dic(diclist,mergemode)
    else:
        fulldic = dic
    no_matches = {}
    valid_result = {}
    if isdic:          #assume one block only
        check_file.scoping = 'instance' #only data blocks visible
        top_level = check_file.keys()[0]
        check_file.scoping = 'dictionary'   #all blocks visible
        # collect a list of parents for speed
        if fulldic.diclang == 'DDL2':
            poss_parents = fulldic.get_all("_item_linked.parent_name")
            for parent in poss_parents:
                curr_parent = listify(check_file.get(parent,[]))
                new_vals = check_file.get_all(parent)
                new_vals.extend(curr_parent)
                if len(new_vals)>0:
                    check_file[parent] = new_vals
                print("Added %s (len %d)" % (parent,len(check_file[parent])))
    # now run the validations
    for block in check_file.keys():
        if isdic and block == top_level:
           block_scope = 'Dictionary'
        elif isdic:
           block_scope = 'Item'
        else:
           block_scope = 'Datablock'
        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
        # remove non-matching items
        print("Not matched: " + repr(no_matches[block]))
        for nogood in no_matches[block]:
             del check_file[block][nogood]
        print("Validating block %s, scope %s" % (block,block_scope))
        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    return valid_result,no_matches

Classes

class CifBlock

A class to hold a single block of a CIF file. A CifBlock object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. b['_a_dataname']. All other Python dictionary methods are also available (e.g. keys(), values()). Looped datanames will return a list of values.

Initialisation

When provided, data should be another CifBlock whose contents will be copied to this block.

  • if strict is set, maximum name lengths will be enforced

  • maxoutlength is the maximum length for output lines

  • wraplength is the ideal length to make output lines

  • When set, overwrite allows the values of datanames to be changed (otherwise an error is raised).

  • compat_mode will allow deprecated behaviour of creating single-dataname loops using the syntax a[_dataname] = [1,2,3,4]. This should now be done by calling CreateLoop after setting the dataitem value.

class CifBlock(StarFile.StarBlock):
    """
    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
    a Python dictionary, in particular, individual items can be accessed using square
    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.

    ## Initialisation

    When provided, `data` should be another `CifBlock` whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
        """When provided, `data` should be another CifBlock whose contents will be copied to
        this block.

        * if `strict` is set, maximum name lengths will be enforced

        * `maxoutlength` is the maximum length for output lines

        * `wraplength` is the ideal length to make output lines

        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
        is raised).

        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
        after setting the dataitem value.
        """
        if strict: maxnamelength=75
        else:
           maxnamelength=-1
        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
        self.dictionary = None   #DDL dictionary referring to this block
        self.compat_mode = compat_mode   #old-style behaviour of setitem

    def RemoveCifItem(self,itemname):
        """Remove `itemname` from the CifBlock"""
        self.RemoveItem(itemname)

    def __setitem__(self,key,value):
        self.AddItem(key,value)
        # for backwards compatibility make a single-element loop
        if self.compat_mode:
            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
                 # single element loop
                 self.CreateLoop([key])

    def copy(self):
        newblock = super(CifBlock,self).copy()
        return self.copy.im_class(newblock)   #catch inheritance

    def AddCifItem(self,data):
        """ *DEPRECATED*. Use `AddItem` instead."""
        # we accept only tuples, strings and lists!!
        if not (isinstance(data[0],(unicode,tuple,list,str))):
                  raise TypeError('Cif datanames are either a string, tuple or list')
        # we catch single item loops as well...
        if isinstance(data[0],(unicode,str)):
            self.AddSingleCifItem(data[0],list(data[1]))
            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
                self.CreateLoop([data[0]])
            return
        # otherwise, we loop over the datanames
        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
        [self.AddSingleCifItem(a,b) for a,b in keyvals]
        # and create the loop
        self.CreateLoop(data[0][0])

    def AddSingleCifItem(self,key,value):
        """*Deprecated*. Use `AddItem` instead"""
        """Add a single data item. If it is part of a loop, a separate call should be made"""
        self.AddItem(key,value)

    def loopnames(self):
        return [self.loops[a] for a in self.loops]

Ancestors (in MRO)

  • CifBlock
  • CifFile.StarFile.StarBlock
  • __builtin__.object

Methods

def AddCifItem(

self, data)

DEPRECATED. Use AddItem instead.

def AddCifItem(self,data):
    """ *DEPRECATED*. Use `AddItem` instead."""
    # we accept only tuples, strings and lists!!
    if not (isinstance(data[0],(unicode,tuple,list,str))):
              raise TypeError('Cif datanames are either a string, tuple or list')
    # we catch single item loops as well...
    if isinstance(data[0],(unicode,str)):
        self.AddSingleCifItem(data[0],list(data[1]))
        if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
            self.CreateLoop([data[0]])
        return
    # otherwise, we loop over the datanames
    keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
    [self.AddSingleCifItem(a,b) for a,b in keyvals]
    # and create the loop
    self.CreateLoop(data[0][0])

def AddItem(

self, key, value, precheck=False)

Add dataname key to block with value value. value may be a single value, a list or a tuple. If precheck is False (the default), all values will be checked and converted to unicode strings as necessary. If precheck is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.

def AddItem(self,key,value,precheck=False):
    """Add dataname `key` to block with value `value`.  `value` may be
    a single value, a list or a tuple. If `precheck` is False (the default),
    all values will be checked and converted to unicode strings as necessary. If
    `precheck` is True, this checking is bypassed.  No checking is necessary
    when values are read from a CIF file as they are already in correct form."""
    if not isinstance(key,(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
    key = unicode(key)    #everything is unicode internally
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
    # check for overwriting
    if key in self:
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    if not precheck:   #need to sanitise
        regval,empty_val = self.regularise_data(value)
        pure_string = check_stringiness(regval)
        self.check_item_value(regval)
    else:
        regval,empty_val = value,None
        pure_string = True
    # update ancillary information first
    lower_key = key.lower()
    if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    try:
        del self.true_case[lower_key]
    except KeyError:
        pass
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddSingleCifItem(

self, key, value)

Deprecated. Use AddItem instead

def AddSingleCifItem(self,key,value):
    """*Deprecated*. Use `AddItem` instead"""
    """Add a single data item. If it is part of a loop, a separate call should be made"""
    self.AddItem(key,value)

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveCifItem(

self, itemname)

Remove itemname from the CifBlock

def RemoveCifItem(self,itemname):
    """Remove `itemname` from the CifBlock"""
    self.RemoveItem(itemname)

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class CifDic

Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only):

  • do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False

  • do_imports = No/Full/Contents/All: If not 'No', replace _import.get statements with the imported contents for Full mode/Contents mode/Both respectively.

  • do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All

class CifDic(StarFile.StarFile):
    """Create a Cif Dictionary object from the provided source, which can
    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
    only):

    * do_minimum (Boolean):
         Do not set up the dREL system for auto-calculation or perform
         imports.  This implies do_imports=False and do_dREL=False

    * do_imports = No/Full/Contents/All:
         If not 'No', replace _import.get statements with the imported contents for
         Full mode/Contents mode/Both respectively.

    * do_dREL = True/False:
         Parse and convert all dREL methods to Python. Implies do_imports=All

    """
    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
                                                             grammar='auto',**kwargs):
        self.do_minimum = do_minimum
        if do_minimum:
            do_imports = 'No'
            do_dREL = False
        if do_dREL: do_imports = 'All'
        self.template_cache = {}    #for DDLm imports
        self.ddlm_functions = {}    #for DDLm functions
        self.switch_numpy(False)    #no Numpy arrays returned
        super(CifDic,self).__init__(datasource=dic,grammar=grammar,**kwargs)
        self.standard = 'Dic'    #for correct output order
        self.scoping = 'dictionary'
        (self.dicname,self.diclang) = self.dic_determine()
        print('%s is a %s dictionary' % (self.dicname,self.diclang))
        self.scopes_mandatory = {}
        self.scopes_naughty = {}
        # rename and expand out definitions using "_name" in DDL dictionaries
        if self.diclang == "DDL1":
            self.DDL1_normalise()   #this removes any non-definition entries
        self.create_def_block_table() #From now on, [] uses definition_id
        if self.diclang == "DDL1":
            self.ddl1_cat_load()
        elif self.diclang == "DDL2":
            self.DDL2_normalise()   #iron out some DDL2 tricky bits
        elif self.diclang == "DDLm":
            self.scoping = 'dictionary'   #expose all save frames
            if do_imports is not 'No':
               self.ddlm_import(import_mode=do_imports)#recursively calls this routine
            self.create_alias_table()
            self.create_cat_obj_table()
            self.create_cat_key_table()
            if do_dREL:
                print('Doing full dictionary initialisation')
                self.initialise_drel()
        self.add_category_info(full=do_dREL)
        # initialise type information
        self.typedic={}
        self.primdic = {}   #typecode<->primitive type translation
        self.add_type_info()
        self.install_validation_functions()

    def dic_determine(self):
        if "on_this_dictionary" in self: 
            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
            self.def_id_spec = "_name"
            self.cat_id_spec = "_category.id"   #we add this ourselves 
            self.type_spec = "_type"
            self.enum_spec = "_enumeration"
            self.cat_spec = "_category"
            self.esd_spec = "_type_conditions"
            self.must_loop_spec = "_list"
            self.must_exist_spec = "_list_mandatory"
            self.list_ref_spec = "_list_reference"
            self.key_spec = "_list_mandatory"
            self.unique_spec = "_list_uniqueness"
            self.child_spec = "_list_link_child"
            self.parent_spec = "_list_link_parent"
            self.related_func = "_related_function"
            self.related_item = "_related_item"
            self.primitive_type = "_type"
            self.dep_spec = "xxx"
            self.cat_list = []   #to save searching all the time
            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
            return (name+version,"DDL1")
        elif len(self.get_roots()) == 1:              # DDL2/DDLm
            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])      
            # now change to dictionary scoping
            self.scoping = 'dictionary'
            name = self.master_block["_dictionary.title"]
            version = self.master_block["_dictionary.version"]
            if self.master_block.has_key("_dictionary.class"):   #DDLm
                self.enum_spec = '_enumeration_set.state'
                self.key_spec = '_category.key_id'
                self.must_exist_spec = None
                self.cat_spec = '_name.category_id'
                self.primitive_type = '_type.contents'
                self.cat_id_spec = "_definition.id"
                self.def_id_spec = "_definition.id"
                return(name+version,"DDLm") 
            else:   #DDL2
                self.cat_id_spec = "_category.id"
                self.def_id_spec = "_item.name"
                self.key_spec = "_category_mandatory.name"
                self.type_spec = "_item_type.code"
                self.enum_spec = "_item_enumeration.value"
                self.esd_spec = "_item_type_conditions.code"
                self.cat_spec = "_item.category_id"
                self.loop_spec = "there_is_no_loop_spec!"
                self.must_loop_spec = "xxx"
                self.must_exist_spec = "_item.mandatory_code"
                self.child_spec = "_item_linked.child_name"
                self.parent_spec = "_item_linked.parent_name"
                self.related_func = "_item_related.function_code"
                self.related_item = "_item_related.related_name"
                self.unique_spec = "_category_key.name"
                self.list_ref_spec = "xxx"
                self.primitive_type = "_type"
                self.dep_spec = "_item_dependent.dependent_name"
                return (name+version,"DDL2")
        else:
            raise CifError("Unable to determine dictionary DDL version")

    def DDL1_normalise(self):
        # switch off block name collision checks
        self.standard = None
        # add default type information in DDL2 style
        # initial types and constructs
        base_types = ["char","numb","null"]
        prim_types = base_types[:]
        base_constructs = [".*",
            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
            "\"\" "]
        for key,value in self.items():
           newnames = [key]  #keep by default
           if "_name" in value:
               real_name = value["_name"]
               if isinstance(real_name,list):        #looped values
                   for looped_name in real_name:
                      new_value = value.copy()
                      new_value["_name"] = looped_name  #only looped name
                      self[looped_name] = new_value
                   newnames = real_name
               else:
                      self[real_name] = value
                      newnames = [real_name]
           # delete the old one
           if key not in newnames:
              del self[key]
        # loop again to normalise the contents of each definition
        for key,value in self.items():
           #unlock the block
           save_overwrite = value.overwrite
           value.overwrite = True
           # deal with a missing _list, _type_conditions
           if "_list" not in value: value["_list"] = 'no'
           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
           # deal with enumeration ranges
           if "_enumeration_range" in value:
               max,min = self.getmaxmin(value["_enumeration_range"])
               if min == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
               elif max == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
               else:
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
           #add any type construct information
           if "_type_construct" in value:
               base_types.append(value["_name"]+"_type")   #ie dataname_type
               base_constructs.append(value["_type_construct"]+"$")
               prim_types.append(value["_type"])     #keep a record
               value["_type"] = base_types[-1]   #the new type name

        #make categories conform with ddl2
        #note that we must remove everything from the last underscore
           if value.get("_category",None) == "category_overview":
                last_under = value["_name"].rindex("_")
                catid = value["_name"][1:last_under]
                value["_category.id"] = catid  #remove square bracks
                if catid not in self.cat_list: self.cat_list.append(catid)
           value.overwrite = save_overwrite
        # we now add any missing categories before filling in the rest of the
        # information
        for key,value in self.items():
            #print('processing ddl1 definition %s' % key)
            if "_category" in self[key]:
                if self[key]["_category"] not in self.cat_list:
                    # rogue category, add it in
                    newcat = self[key]["_category"]
                    fake_name = "_" + newcat + "_[]"
                    newcatdata = CifBlock()
                    newcatdata["_category"] = "category_overview"
                    newcatdata["_category.id"] = newcat
                    newcatdata["_type"] = "null"
                    self[fake_name] = newcatdata
                    self.cat_list.append(newcat)
        # write out the type information in DDL2 style
        self.master_block.AddLoopItem((
            ("_item_type_list.code","_item_type_list.construct",
              "_item_type_list.primitive_code"),
            (base_types,base_constructs,prim_types)
            ))

    def ddl1_cat_load(self):
        deflist = self.keys()       #slight optimization
        cat_mand_dic = {}
        cat_unique_dic = {}
        # a function to extract any necessary information from each definition
        def get_cat_info(single_def):
            if self[single_def].get(self.must_exist_spec)=='yes':
                thiscat = self[single_def]["_category"]
                curval = cat_mand_dic.get(thiscat,[])
                curval.append(single_def)
                cat_mand_dic[thiscat] = curval
            # now the unique items...
            # cif_core.dic throws us a curly one: the value of list_uniqueness is
            # not the same as the defined item for publ_body_label, so we have
            # to collect both together.  We assume a non-listed entry, which
            # is true for all current (May 2005) ddl1 dictionaries.
            if self[single_def].get(self.unique_spec,None)!=None:
                thiscat = self[single_def]["_category"]
                new_unique = self[single_def][self.unique_spec]
                uis = cat_unique_dic.get(thiscat,[])
                if single_def not in uis: uis.append(single_def)
                if new_unique not in uis: uis.append(new_unique)
                cat_unique_dic[thiscat] = uis

        [get_cat_info(a) for a in deflist] # apply the above function
        for cat in cat_mand_dic.keys():
            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
        for cat in cat_unique_dic.keys():
            self[cat]["_category_key.name"] = cat_unique_dic[cat]

    def create_pcloop(self,definition):
        old_children = self[definition].get('_item_linked.child_name',[])
        old_parents = self[definition].get('_item_linked.parent_name',[])
        if isinstance(old_children,unicode):
             old_children = [old_children]
        if isinstance(old_parents,unicode):
             old_parents = [old_parents]
        if (len(old_children)==0 and len(old_parents)==0) or \
           (len(old_children) > 1 and len(old_parents)>1):
             return
        if len(old_children)==0:
             old_children = [definition]*len(old_parents)
        if len(old_parents)==0:
             old_parents = [definition]*len(old_children)
        newloop = CifLoopBlock(dimension=1)
        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
        newloop.AddLoopItem(('_item_linked.child_name',old_children))
        try:
            del self[definition]['_item_linked.parent_name']
            del self[definition]['_item_linked.child_name']
        except KeyError:
            pass
        self[definition].insert_loop(newloop)



    def DDL2_normalise(self):
       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
       # now filter out all the single element lists!
       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
       for item_def in dodgy_defs:
                # print("DDL2 norm: processing %s" % item_def)
                thisdef = self[item_def]
                packet_no = thisdef['_item.name'].index(item_def)
                realcat = thisdef['_item.category_id'][packet_no]
                realmand = thisdef['_item.mandatory_code'][packet_no]
                # first add in all the missing categories
                # we don't replace the entry in the list corresponding to the
                # current item, as that would wipe out the information we want
                for child_no in range(len(thisdef['_item.name'])):
                    if child_no == packet_no: continue
                    child_name = thisdef['_item.name'][child_no]
                    child_cat = thisdef['_item.category_id'][child_no]
                    child_mand = thisdef['_item.mandatory_code'][child_no]
                    if child_name not in self:
                        self[child_name] = CifBlock()
                        self[child_name]['_item.name'] = child_name
                    self[child_name]['_item.category_id'] = child_cat
                    self[child_name]['_item.mandatory_code'] = child_mand
                self[item_def]['_item.name'] = item_def
                self[item_def]['_item.category_id'] = realcat
                self[item_def]['_item.mandatory_code'] = realmand

       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                     '_item_linked.parent_name' in self[a]]
       # now dodgy_defs contains all definition blocks with more than one child/parent link
       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
       for item_def in dodgy_defs:
             print('Processing %s' % item_def)
             thisdef = self[item_def]
             child_list = thisdef['_item_linked.child_name']
             parents = thisdef['_item_linked.parent_name']
             # for each parent, find the list of children.
             family = list(zip(parents,child_list))
             notmychildren = family         #We aim to remove non-children
             # Loop over the parents, relocating as necessary
             while len(notmychildren):
                # get all children of first entry
                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
                for parent,child in mychildren:   #parent is the same for all
                         # Make sure that we simply add in the new entry for the child, not replace it,
                         # otherwise we might spoil the child entry loop structure
                         try:
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                         except KeyError:
                             print('Creating new parent entry %s for definition %s' % (parent,child))
                             self[child]['_item_linked.parent_name'] = [parent]
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
                             continue
                         else:
                             # A parent loop already exists and so will a child loop due to the
                             # call to create_pcloop above
                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                             if len(goodpars)>0:   #no need to add it
                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                                 continue
                             print('Adding %s to %s entry' % (parent,child))
                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                             setattr(newpacket,'_item_linked.child_name',child)
                             setattr(newpacket,'_item_linked.parent_name',parent)
                             childloop.AddPacket(newpacket)
                #
                # Make sure the parent also points to the children.  We get
                # the current entry, then add our
                # new values if they are not there already
                #
                parent_name = mychildren[0][0]
                old_children = self[parent_name].get('_item_linked.child_name',[])
                old_parents = self[parent_name].get('_item_linked.parent_name',[])
                oldfamily = zip(old_parents,old_children)
                newfamily = []
                print('Old parents -> %s' % repr(old_parents))
                for jj, childname in mychildren:
                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                    if len(alreadythere)>0: continue
                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
                    old_children.append(childname)
                    old_parents.append(parent_name)
                # Now output the loop, blowing away previous definitions.  If there is something
                # else in this category, we are destroying it.
                newloop = CifLoopBlock(dimension=1)
                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
                newloop.AddLoopItem(('_item_linked.child_name',old_children))
                del self[parent_name]['_item_linked.parent_name']
                del self[parent_name]['_item_linked.child_name']
                self[parent_name].insert_loop(newloop)
                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
                # now make a new,smaller list
                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]

       # now flatten any single element lists
       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
       for flat_def in single_defs:
           flat_keys = self[flat_def].GetLoop('_item.name').keys()
           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
       # now deal with the multiple lists
       # next we do aliases
       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
       for aliased in all_aliases:
          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
          for alias in my_aliases:
              self[alias] = self[aliased].copy()   #we are going to delete stuff...
              del self[alias]["_item_aliases.alias_name"]
 
    def ddlm_parse_valid(self):
        if "_dictionary_valid.application" not in self.master_block:
            return
        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
            scope = getattr(scope_pack,"_dictionary_valid.application")
            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
            if scope[1] == "Mandatory":
                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
            elif scope[1] == "Prohibited":
                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)
                
    def ddlm_import(self,import_mode='All'):
        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
        print ('Import mode %s applied to following frames' % import_mode)
        print (str([a[0] for a in import_frames]))
        if import_mode != 'All':
           for i in range(len(import_frames)):
                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents') == import_mode])
           print('Importing following frames in mode %s' % import_mode)
           print(str(import_frames))
        #resolve all references
        for parent_block,import_list in import_frames:
          for import_ref in import_list:
            file_loc = import_ref["file"]
            full_uri = self.resolve_path(file_loc)
            if full_uri not in self.template_cache:
                dic_as_cif = CifFile(urlopen(full_uri),grammar=self.grammar)
                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,do_dREL=False)  #this will recurse internal imports
                print('Added %s to cached dictionaries' % full_uri)
            import_from = self.template_cache[full_uri]
            dupl = import_ref.get('dupl','Exit')
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if target_key in self and mode=='full':  #so blockname will be duplicated
                if dupl == 'Exit':
                    raise CifError('Import frame %s already in dictionary' % target_key)
                elif dupl == 'Ignore':
                    continue
            if mode == 'contents':   #merge attributes only
                self[parent_block].merge(import_target)
            elif mode =="full":
                # Do the syntactic merge
                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
                from_cat_head = import_target['_name.object_id']
                child_frames = import_from.ddlm_all_children(from_cat_head)
                 # Check for Head merging Head
                if self[parent_block].get('_definition.class','Datum')=='Head' and \
                   import_target.get('_definition.class','Datum')=='Head':
                      head_to_head = True
                else:
                      head_to_head = False
                      child_frames.remove(from_cat_head)
                # As we are in syntax land, we call the CifFile methods
                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
                child_blocks = super(CifDic,import_from).makebc(child_blocks)
                # Prune out any datablocks that have identical definitions
                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
                print ('Definitions for %s superseded' % repr(double_defs))
                for b in double_defs:
                    del child_blocks[b[0]]
                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
                   mode,len(self)))
                # Now the semantic merge
                # First expand our definition <-> blockname tree
                self.create_def_block_table()
                merging_cat = self[parent_block]['_name.object_id']      #new parent
                if head_to_head:
                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
                    #the new parent is the importing category for all old children
                    for f in child_frames:
                        self[f].overwrite = True
                        self[f]['_name.category_id'] = merging_cat
                        self[f].overwrite = False
                    # remove the old head
                    del self[from_cat_head]
                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
                else:  #imported category is only child
                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
                    self[child_frame]['_name.category_id'] = merging_cat
                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
            # it will never happen again...
            del self[parent_block]["_import.get"]

    def resolve_path(self,file_loc):
        url_comps = urlparse(file_loc)
        if url_comps[0]: return file_loc    #already full URI
        new_url = urljoin(self.my_uri,file_loc)
        #print("Transformed %s to %s for import " % (file_loc,new_url))
        return new_url



    def create_def_block_table(self):
        """ Create an internal table matching definition to block id """
        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
        # now get the actual ids instead of blocks
        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
        # remove non-definitions
        if self.diclang != "DDL1":
            top_blocks = list([a[0].lower() for a in self.get_roots()])
        else:
            top_blocks = ["on_this_dictionary"]
        # catch dodgy duplicates
        uniques = set([a[0] for a in proto_table])
        if len(uniques)<len(proto_table):
            def_names = list([a[0] for a in proto_table])
            dodgy = [a for a in def_names if def_names.count(a)>1]
            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])
        
    def __getitem__(self,key):
        """Access a datablock by definition id, after the lookup has been created"""
        try:
            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
        except AttributeError:   #block_id_table not present yet
            return super(CifDic,self).__getitem__(key)
        except KeyError: # key is missing
            # print(Definition for %s not found, reverting to CifFile' % key)
            return super(CifDic,self).__getitem__(key)

    def __setitem__(self,key,value):
        """Add a new definition block"""
        super(CifDic,self).__setitem__(key,value)
        try:
            self.block_id_table[value['_definition.id']]=key
        except AttributeError:   #does not exist yet
            pass

    def __delitem__(self,key):
        """Remove a definition"""
        try:
            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
            del self.block_id_table[key.lower()]
        except (AttributeError,KeyError):   #block_id_table not present yet
            super(CifDic,self).__delitem__(key)
            return
        # fix other datastructures
        # cat_obj table
        
    def keys(self):
        """Return all definitions"""
        try:
            return self.block_id_table.keys()
        except AttributeError:
            return super(CifDic,self).keys()

    def has_key(self,key):
        return key in self

    def __contains__(self,key):
        try:
            return key.lower() in self.block_id_table
        except AttributeError:
            return super(CifDic,self).__contains__(key)
            
    def items(self):
        """Return (key,value) pairs"""
        return list([(a,self[a]) for a in self.keys()])

    def unlock(self):
        """Allow overwriting of all definitions in this collection"""
        for a in self.keys():
            self[a].overwrite=True

    def lock(self):
        """Disallow changes in definitions"""
        for a in self.keys():
            self[a].overwrite=False

    def rename(self,oldname,newname,blockname_as_well=True):
        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
        change the underlying blockname too."""
        if blockname_as_well:
            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)        
            self.block_id_table[newname.lower()]=newname
            if oldname.lower() in self.block_id_table: #not removed
               del self.block_id_table[oldname.lower()]
        else:
            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
            del self.block_id_table[oldname.lower()]
            return
                                                 
    def get_root_category(self):
        """Get the single 'Head' category of this dictionary"""
        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
        if len(root_cats)>1 or len(root_cats)==0:
            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
        return root_cats[0]

    def ddlm_immediate_children(self,catname):
        """Return a list of datanames for the immediate children of catname.  These are
        semantic children (i.e. based on _name.category_id), not structural children as
        in the case of StarFile.get_immediate_children"""
                                                 
        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
        return list(straight_children)

    def ddlm_all_children(self,catname):
        """Return a list of all children, including the `catname`"""
        all_children = self.ddlm_immediate_children(catname)
        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
        for c in cat_children:
            all_children.remove(c)
            all_children += self.ddlm_all_children(c)
        return all_children + [catname]

    def is_semantic_child(self,parent,maybe_child):
        """Return true if `maybe_child` is a child of `parent`"""
        all_children = self.ddlm_all_children(parent)
        return maybe_child in all_children

    def ddlm_danglers(self):
        """Return a list of definitions that do not have a category defined
        for them, or are children of an unattached category"""
        top_block = self.get_root_category()
        connected = set(self.ddlm_all_children(top_block))
        all_keys = set(self.keys())
        unconnected = all_keys - connected
        return list(unconnected)

    def get_ddlm_parent(self,itemname):
        """Get the parent category of itemname"""
        parent = self[itemname].get('_name.category_id','')
        if parent == '':  # use the top block by default
            raise CifError("%s has no parent" % itemname)
        return parent

    def expand_category_opt(self,name_list):
        """Return a list of all non-category items in a category or return the name
           if the name is not a category"""
        new_list = []
        for name in name_list:
          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
            new_list += self.expand_category_opt([a for a in self.keys() if \
                     self[a].get('_name.category_id','').lower() == name.lower()])
          else:
            new_list.append(name)
        return new_list

    def get_categories(self):
        """Return a list of category names"""
        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])

    def names_in_cat(self,cat,names_only=False):
        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
        if not names_only:
            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
        else:
            return list([self[a]["_name.object_id"] for a in names])

                           

    def create_alias_table(self):
        """Populate an alias table that we can look up when searching for a dataname"""
        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])

    def create_cat_obj_table(self):
        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
        loopable = self.get_loopable_cats() 
        loopers = [self.ddlm_immediate_children(a) for a in loopable]
        print('Loopable cats:' + repr(loopable))
        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
        print("Expansion list:" + repr(expand_list))
        extra_table = {}   #for debugging we keep it separate from base_table until the end
        def expand_base_table(parent_cat,child_cats):
            extra_names = []
            # first deal with all the child categories
            for child_cat in child_cats:
              nn = []
              if child_cat in expand_list:  # a nested category: grab its names
                nn = expand_base_table(child_cat,expand_list[child_cat])
                # store child names
                extra_names += nn
              # add all child names to the table
              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
              child_names += extra_names
              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
            # and the repeated ones get appended instead
            repeats = [a for a in child_names if a in extra_table]
            for obj,name in repeats:
                extra_table[(parent_cat,obj)] += [name]
            # and finally, add our own names to the return list
            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
            return child_names
        [expand_base_table(parent,child) for parent,child in expand_list.items()]
        print('Expansion cat/obj values: ' + repr(extra_table))
        # append repeated ones
        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
        repeats = [a for a in extra_table.keys() if a in base_table]
        base_table.update(non_repeats)
        for k in repeats:
            base_table[k] += extra_table[k]
        self.cat_obj_lookup_table = base_table
        self.loop_expand_list = expand_list

    def get_loopable_cats(self):
        """A short utility function which returns a list of looped categories. This
        is preferred to a fixed attribute as that fixed attribute would need to be
        updated after any edits"""
        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']

    def create_cat_key_table(self):
        """Create a utility table with a list of keys applicable to each category. A key is
        a compound key, that is, it is a list"""
        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
        def collect_keys(parent_cat,child_cats):
                kk = []
                for child_cat in child_cats:
                    if child_cat in self.loop_expand_list:
                        kk += collect_keys(child_cat)
                    # add these keys to our list
                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
                return kk
        for k,v in self.loop_expand_list.items():
            collect_keys(k,v)
        print('Keys for categories' + repr(self.cat_key_table))

    def add_type_info(self):
        if "_item_type_list.construct" in self.master_block:
            types = self.master_block["_item_type_list.code"]
            prim_types = self.master_block["_item_type_list.primitive_code"]
            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
            # add in \r wherever we see \n, and change \{ to \\{
            def regex_fiddle(mm_regex):
                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
                fixed_regexp = mm_regex[:]  #copy
                # fix the brackets
                bm = re.match(brack_match,mm_regex)
                if bm != None:
                    fixed_regexp = bm.expand(r"\2\\\\{\4")
                # fix missing \r
                rm = re.match(ret_match,fixed_regexp)
                if rm != None:
                    fixed_regexp = rm.expand(r"\2\3\\r\4")
                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
                return fixed_regexp
            constructs = map(regex_fiddle,constructs)
            for typecode,construct in zip(types,constructs):
                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
            # now make a primitive <-> type construct mapping
            for typecode,primtype in zip(types,prim_types):
                self.primdic[typecode] = primtype

    def add_category_info(self,full=True):
        if self.diclang == "DDLm":
            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
            self.parent_lookup = {}
            for one_cat in looped_cats:
                parent_cat = one_cat
                parent_def = self[parent_cat]
                next_up = parent_def['_name.category_id'].lower()
                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
                    parent_def = self[next_up]
                    parent_cat = next_up
                    next_up = parent_def['_name.category_id'].lower()
                self.parent_lookup[one_cat] = parent_cat

            if full:
                self.key_equivs = {}
                for one_cat in looped_cats:   #follow them up
                    lower_keys = listify(self[one_cat]['_category_key.name'])
                    start_keys = lower_keys[:]
                    while len(lower_keys)>0:
                        this_cat = self[lower_keys[0]]['_name.category_id']
                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
                        if len(parent)>1:
                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
                        if len(parent)==0: break
                        parent = parent[0]
                        parent_keys = listify(self[parent]['_category_key.name'])
                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
                        # sanity check
                        if set(parent_keys) != set(linked_keys):
                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
                            # now add in our information
                        for parent,child in zip(linked_keys,start_keys):
                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
                        lower_keys = linked_keys  #preserves order of start keys

        else:
            self.parent_lookup = {}
            self.key_equivs = {}

    def change_category_name(self,oldname,newname):
        self.unlock()
        """Change the category name from [[oldname]] to [[newname]]"""
        if oldname not in self:
            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
        if newname in self:
            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
        child_defs = self.ddlm_immediate_children(oldname)
        self.rename(oldname,newname)   #NB no name integrity checks
        self[newname]['_name.object_id']=newname
        self[newname]['_definition.id']=newname
        for child_def in child_defs:
            self[child_def]['_name.category_id'] = newname
            if self[child_def].get('_definition.scope','Item')=='Item':
                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
                self[child_def]['_definition.id']=newid
                self.rename(child_def,newid[1:])  #no underscore at the beginning
        self.lock()

    def create_catobj_name(self,cat,obj):
        """Combine category and object in approved fashion to create id"""
        return ('_'+cat+'.'+obj)

    def change_category(self,itemname,catname):
        """Move itemname into catname, return new handle"""
        defid = self[itemname]
        if defid['_name.category_id'].lower()==catname.lower():
            print('Already in category, no change')
            return itemname
        if catname not in self:    #don't have it
            print('No such category %s' % catname)
            return itemname
        self.unlock()
        objid = defid['_name.object_id']
        defid['_name.category_id'] = catname
        newid = itemname # stays the same for categories
        if defid.get('_definition.scope','Item') == 'Item':
            newid = self.create_catobj_name(catname,objid)
            defid['_definition.id']= newid
            self.rename(itemname,newid)
        self.set_parent(catname,newid)
        self.lock()
        return newid

    def change_name(self,one_def,newobj):
        """Change the object_id of one_def to newobj. This is not used for
        categories, but can be used for dictionaries"""
        if '_dictionary.title' not in self[one_def]:  #a dictionary block
            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
            self.unlock()
            self.rename(one_def,newid)
            self[newid]['_definition.id']=newid
            self[newid]['_name.object_id']=newobj
        else:
            self.unlock()
            newid = newobj
            self.rename(one_def,newobj)
            self[newid]['_dictionary.title'] = newid
        self.lock()
        return newid

    # Note that our semantic parent is given by catparent, but our syntactic parent is
    # always just the root block
    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
        """Add a new category to the dictionary with name [[catname]].
           If [[catparent]] is None, the category will be a child of
           the topmost 'Head' category or else the top data block. If
           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
           is true, the parent category does not have to exist."""
        if catname in self:
            raise CifError('Attempt to add existing category %s' % catname)
        self.unlock()
        syntactic_root = self.get_roots()[0][0]
        if catparent is None:
            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
            if len(semantic_root)>0:
                semantic_root = semantic_root[0]
            else:
                semantic_root = syntactic_root
        else:
            semantic_root = catparent
        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
        self.block_id_table[catname.lower()]=realname
        self[catname]['_name.object_id'] = catname
        if not allow_dangler or catparent is None:
            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
        else:
            self[catname]['_name.category_id'] = catparent
        self[catname]['_definition.id'] = catname
        self[catname]['_definition.scope'] = 'Category'
        if is_loop:
            self[catname]['_definition.class'] = 'Loop'
        else:
            self[catname]['_definition.class'] = 'Set'
        self[catname]['_description.text'] = 'No definition provided'
        self.lock()
        return catname

    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
        """Add itemname to category [[catparent]]. If itemname contains periods,
        all text before the final period is ignored. If [[allow_dangler]] is True,
        no check for a parent category is made."""
        self.unlock()
        if '.' in itemname:
            objname = itemname.split('.')[-1]
        else:
            objname = itemname
        objname = objname.strip('_')
        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
            raise CifError('No category %s in dictionary' % catparent)
        fullname = '_'+catparent.lower()+'.'+objname
        print('New name: %s' % fullname)
        syntactic_root = self.get_roots()[0][0]
        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
        # update our dictionary structures
        self.block_id_table[fullname]=realname
        self[fullname]['_definition.id']=fullname
        self[fullname]['_name.object_id']=objname
        self[fullname]['_name.category_id']=catparent
        self[fullname]['_definition.class']='Datum'
        self[fullname]['_description.text']=def_text
        
    def remove_definition(self,defname):
        """Remove a definition from the dictionary."""
        if defname not in self:
            return
        if self[defname].get('_definition.scope')=='Category':
            children = self.ddlm_immediate_children(defname)
            [self.remove_definition(a) for a in children]
            cat_id = self[defname]['_definition.id'].lower()
        del self[defname]

    def get_cat_obj(self,name):
        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
        cat,obj = name.split('.')
        return (cat.strip('_'),obj)

    def get_name_by_cat_obj(self,category,object,give_default=False):
        """Return the dataname corresponding to the given category and object"""
        if category[0] == '_':    #accidentally left in
           true_cat = category[1:].lower()
        else:
           true_cat = category.lower()
        try:
            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
        except KeyError:
            if give_default:
               return '_'+true_cat+'.'+object
        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))


    def WriteOut(self,**kwargs):
        myblockorder = self.get_full_child_list()
        self.set_grammar(self.grammar)
        self.standard = 'Dic'
        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

    def get_full_child_list(self):
        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
        top_block = self.get_roots()[0][0]
        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
        if len(root_cat) == 1:
            all_names = [top_block] + self.recurse_child_list(root_cat[0])
            unrooted = self.ddlm_danglers()
            double_names =  set(unrooted).intersection(set(all_names))
            if len(double_names)>0:
                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
            remaining = unrooted[:]
            for no_root in unrooted:
                if self[no_root].get('_definition.scope','Item')=='Category':
                    all_names += [no_root]
                    remaining.remove(no_root)
                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
                    all_names += these_children
                    [remaining.remove(n) for n in these_children]
            # now sort by category
            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
            for e in ext_cats:
                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
                [remaining.remove(n) for n in cat_items]
                all_names += cat_items
            if len(remaining)>0:
                print('WARNING: following items do not seem to belong to a category??')
                print(repr(remaining))
                all_names += remaining
            print('Final block order: ' + repr(all_names))
            return all_names
        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')

    def cat_from_name(self,one_name):
        """Guess the category from the name. This should be used only when this is not important semantic information,
        for example, when printing out"""
        (cat,obj) = one_name.split(".")
        if cat[0] == "_": cat = cat[1:]
        return cat

    def recurse_child_list(self,parentname):
        """Recursively expand the logical child list of [[parentname]]"""
        final_list = [parentname]
        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
        child_blocks.sort()    #we love alphabetical order
        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
        final_list += child_items
        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
        for child_cat in child_cats:
            final_list += self.recurse_child_list(child_cat)
        return final_list



    def get_key_pack(self,category,value,data):
        keyname = self[category][self.unique_spec]
        onepack = data.GetPackKey(keyname,value)
        return onepack

    def get_number_with_esd(numstring):
        import string
        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
        our_match = re.match(numb_re,numstring)
        if our_match:
            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
        else:
            return None,None
        if dot or q: return None,None     #a dot or question mark
        if exp:          #has exponent
           exp = exp.replace("d","e")     # mop up old fashioned numbers
           exp = exp.replace("D","e")
           base_num = base_num + exp
        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
        base_num = float(base_num)
        # work out esd, if present.
        if esd:
            esd = float(esd[1:-1])    # no brackets
            if dad:                   # decimal point + digits
                esd = esd * (10 ** (-1* len(dad)))
            if exp:
                esd = esd * (10 ** (float(exp[1:])))
        return base_num,esd

    def getmaxmin(self,rangeexp):
        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
        regexp = regexp + ":" + regexp
        regexp = re.match(regexp,rangeexp)
        try:
            minimum = regexp.group(1)
            maximum = regexp.group(7)
        except AttributeError:
            print("Can't match %s" % rangeexp)
        if minimum == None: minimum = "."
        else: minimum = float(minimum)
        if maximum == None: maximum = "."
        else: maximum = float(maximum)
        return maximum,minimum

    def initialise_drel(self):
        """Parse drel functions and prepare data structures in dictionary"""
        self.ddlm_parse_valid() #extract validity information from data block
        self.transform_drel()   #parse the drel functions
        self.add_drel_funcs()   #put the drel functions into the namespace

    def transform_drel(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        import traceback
        parser = drel_ast_yacc.parser
        lexer = drel_ast_yacc.lexer
        my_namespace = self.keys()
        my_namespace = dict(zip(my_namespace,my_namespace))
        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
                              and self[a].get("_name.category_id","")!= "function"]
        for derivable in derivable_list:
            target_id = derivable
            # reset the list of visible names for parser
            special_ids = [dict(zip(self.keys(),self.keys()))]
            print("Target id: %s" % derivable)
            drel_exprs = self[derivable]["_method.expression"]
            drel_purposes = self[derivable]["_method.purpose"]
            all_methods = []
            if not isinstance(drel_exprs,list):
                drel_exprs = [drel_exprs]
                drel_purposes = [drel_purposes]
            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
                if drel_purpose != 'Evaluation':
                    continue
                drel_expr = "\n".join(drel_expr.splitlines())
                # print("Transforming %s" % drel_expr)
                # List categories are treated differently...
                try:
                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
                except:
                    print('Syntax error in method for %s; leaving as is' % derivable)
                    a,b = sys.exc_info()[:2]
                    print((repr(a),repr(b)))
                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
                    # reset the lexer
                    lexer.begin('INITIAL')
                    continue
                # Construct the python method
                cat_meth = False
                if self[derivable].get('_definition.scope','Item') == 'Category':
                    cat_meth = True
                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
                                                                           loopable=loop_info,
                                                             cif_dic = self,cat_meth=cat_meth)
                all_methods.append(pyth_meth)
            if len(all_methods)>0:
                save_overwrite = self[derivable].overwrite
                self[derivable].overwrite = True
                self[derivable]["_method.py_expression"] = all_methods
                self[derivable].overwrite = save_overwrite
            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))

    def add_drel_funcs(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
        funcnames = [(self[a]["_name.object_id"],
                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
        # create executable python code...
        parser = drel_ast_yacc.parser
        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        for funcname,funcbody in funcnames:
            newline_body = "\n".join(funcbody.splitlines())
            parser.target_id = funcname
            res_ast = parser.parse(newline_body)
            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
            #print('dREL library function ->\n' + py_function)
            global_table = globals()
            exec(py_function, global_table)    #add to namespace
        #print('Globals after dREL functions added:' + repr(globals()))
        self.ddlm_functions = globals()  #for outside access

    @track_recursion
    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
        key = start_key   #starting value
        result = None     #success is a non-None value
        default_result = False #we have not used a default value
        # check for aliases
        # check for an older form of a new value
        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
        if len(found_it)>0:
            corrected_type = self.change_type(key,cifdata[found_it[0]])
            return corrected_type
        # now do the reverse check - any alternative form
        alias_name = [a for a in self.alias_table.items() if key in a[1]]
        print('Aliases for %s: %s' % (key,repr(alias_name)))
        if len(alias_name)==1:
            key = alias_name[0][0]   #actual definition name
            if key in cifdata: return self.change_type(key,cifdata[key])
            found_it = [k for k in alias_name[0][1] if k in cifdata]
            if len(found_it)>0:
                return self.change_type(key,cifdata[found_it[0]])
        elif len(alias_name)>1:
            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))

        the_category = self[key]["_name.category_id"]
        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        # store any default value in case we have a problem
        def_val = self[key].get("_enumeration.default","")
        def_index_val = self[key].get("_enumeration.def_index_id","")
        if len(has_cat_names)==0: # try category method
            cat_result = {}
            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
            pulled_from_cats = [(k,[
                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
                               ) for k in pulled_from_cats]
            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
            if '_category_construct_local.type' in self[the_category]:
                print("**Now constructing category %s using DDLm attributes**" % the_category)
                try:
                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s (error)' % the_category)
            # Trying a pull-back when the category is partially populated
            # will not work, hence we test that cat_result has no keys
            if len(pulled_to_cats)>0 and len(cat_result)==0:
                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
                try:
                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s from pullback information (error)' % the_category)
            if '_method.py_expression' in self[the_category] and key not in cat_result:
                print("**Now applying category method for %s in search of %s**" % (the_category,key))
                cat_result = self.derive_item(the_category,cifdata,store_value=True)
            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
            # do we now have our value?
            if key in cat_result:
                return cat_result[key]

        # Recalculate in case it actually worked
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        the_funcs = self[key].get('_method.py_expression',"")
        if the_funcs:   #attempt to calculate it
            #global_table = globals()
            #global_table.update(self.ddlm_functions)
            for one_func in the_funcs:
                print('Executing function for %s:' % key)
                #print(one_func)
                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
                # print('in following global environment: ' + repr(global_table))
                stored_setting = cifdata.provide_value
                cifdata.provide_value = True
                try:
                    result = pyfunc(cifdata)
                except CifRecursionError as s:
                    print(s)
                    result = None
                except StarFile.StarDerivationError as s:
                    print(s)
                    result = None
                finally:
                    cifdata.provide_value = stored_setting
                if result is not None:
                    break
                #print("Function returned {!r}".format(result))

        if result is None and allow_defaults:   # try defaults
            if def_val:
                result = self.change_type(key,def_val)
                default_result = True
            elif def_index_val:            #derive a default value
                index_vals = self[key]["_enumeration_default.index"]
                val_to_index = cifdata[def_index_val]     #what we are keying on
                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
                    lcase_comp = True
                    index_vals = [a.lower() for a in index_vals]
                # Handle loops
                if isinstance(val_to_index,list):
                    if lcase_comp:
                        val_to_index = [a.lower() for a in val_to_index]
                    keypos = [index_vals.index(a) for a in val_to_index]
                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
                else:
                    if lcase_comp:
                        val_to_index = val_to_index.lower()
                    keypos = index_vals.index(val_to_index)   #value error if no such value available
                    result = self[key]["_enumeration_default.value"][keypos]
                    default_result = True   #flag that it must be extended
                result = self.change_type(key,result)
                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))

        # read it in
        if result is None:   #can't do anything else
            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
            raise StarFile.StarDerivationError(start_key)
        is_looped = False
        if self[the_category].get('_definition.class','Set')=='Loop':
            is_looped = True
            if len(has_cat_names)>0:   #this category already exists
                if result is None or default_result: #need to create a list of values
                    loop_len = len(cifdata[has_cat_names[0]])
                    out_result = [result]*loop_len
                    result = out_result
            else:   #nothing exists in this category, we can't store this at all
                print('Resetting result %s for %s to null list as category is empty' % (key,result))
                result = []

        # now try to insert the new information into the right place
        # find if items of this category already appear...
        # Never cache empty values
        if not (isinstance(result,list) and len(result)==0) and\
          store_value:
            if self[key].get("_definition.scope","Item")=='Item':
                if is_looped:
                    result = self.store_new_looped_value(key,cifdata,result,default_result)
                else:
                    result = self.store_new_unlooped_value(key,cifdata,result)
            else:
                self.store_new_cat_values(cifdata,result,the_category)
        return result

    def store_new_looped_value(self,key,cifdata,result,default_result):
          """Store a looped value from the dREL system into a CifFile"""
          # try to change any matrices etc. to lists
          the_category = self[key]["_name.category_id"]
          out_result = result
          if result is not None and not default_result:
                  # find any numpy arrays
                  def conv_from_numpy(one_elem):
                      if not hasattr(one_elem,'dtype'):
                         if isinstance(one_elem,(list,tuple)):
                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
                         return one_elem
                      if one_elem.size > 1:   #so is not a float
                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
                      else:
                          try:
                            return one_elem.item(0)
                          except:
                            return one_elem
                  out_result = [conv_from_numpy(a) for a in result]
          # so out_result now contains a value suitable for storage
          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
          has_cat_names = [a for a in cat_names if a in cifdata]
          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
          if len(has_cat_names)>0:   #this category already exists
              cifdata[key] = out_result      #lengths must match or else!!
              cifdata.AddLoopName(has_cat_names[0],key)
          else:
              cifdata[key] = out_result
              cifdata.CreateLoop([key])
          print('Loop info:' + repr(cifdata.loops))
          return out_result

    def store_new_unlooped_value(self,key,cifdata,result):
          """Store a single value from the dREL system"""
          if result is not None and hasattr(result,'dtype'):
              if result.size > 1:
                  out_result = StarFile.StarList(result.tolist())
                  cifdata[key] = out_result
              else:
                  cifdata[key] = result.item(0)
          else:
              cifdata[key] = result
          return result

    def construct_category(self,category,cifdata,store_value=True):
        """Construct a category using DDLm attributes"""
        con_type = self[category].get('_category_construct_local.type',None)
        if con_type == None:
            return {}
        if con_type == 'Pullback' or con_type == 'Filter':
            morphisms  = self[category]['_category_construct_local.components']
            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
            cats = [self[a]['_name.category_id'] for a in morphisms]
            cat_keys = [self[a]['_category.key_id'] for a in cats]
            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
            if con_type == 'Filter':
                int_filter = self[category].get('_category_construct_local.integer_filter',None)
                text_filter = self[category].get('_category_construct_local.text_filter',None)
                if int_filter is not None:
                    morph_values.append([int(a) for a in int_filter])
                if text_filter is not None:
                    morph_values.append(text_filter)
                cat_values.append(range(len(morph_values[-1])))
            # create the mathematical product filtered by equality of dataname values
            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
            # now prepare for return
            if len(pullback_ids)==0:
                return {}
            newids = self[category]['_category_construct_local.new_ids']
            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
            if con_type == 'Pullback':
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
            elif con_type == 'Filter':   #simple filter
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
            if store_value:
                self.store_new_cat_values(cifdata,final_results,category)
            return final_results

    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
        """Each of the categories in source_categories are pullbacks that include
        the target_category"""
        target_key = self[target_category]['_category.key_id']
        result = {target_key:[]}
        first_time = True
        # for each source category, determine which element goes to the target
        for sc in source_categories:
            components = self[sc]['_category_construct_local.components']
            comp_cats = [self[c]['_name.category_id'] for c in components]
            new_ids = self[sc]['_category_construct_local.new_ids']
            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
            if len(components) == 2:  # not a filter
                element_pos = comp_cats.index(target_category)
                old_id = source_ids[element_pos]
                print('Using %s to populate %s' % (old_id,target_key))
                result[target_key].extend(cifdata[old_id])
                # project through all identical names
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
                # we only include keys that are common to all categories
                if first_time:
                    result.update(extra_result)
                else:
                    for k in extra_result.keys():
                        if k in result:
                            print('Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
            else:
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
                    result[target_key].extend(cifdata[source_ids[0]])
                    for k in extra_result.keys():
                        if k in result:
                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
                        else:
                            result[k]=extra_result[k]
    # Bonus derivation if there is a singleton filter
                    if self[sc]['_category_construct_local.type'] == 'Filter':
                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
                        if int_filter is not None:
                            filter_values = int_filter
                        else:
                            filter_values = text_filter
                        if len(filter_values)==1:    #a singleton
                            extra_dataname = self[sc]['_category_construct_local.components'][0]
                            if int_filter is not None:
                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
                            else:
                                new_value = filter_values * len(cifdata[source_ids[0]])
                            if extra_dataname not in result:
                                result[extra_dataname] = new_value
                            else:
                                result[extra_dataname].extend(new_value)
                    else:
                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
            first_time = False
        # check for sanity - all dataname lengths must be identical
        datalen = len(set([len(a) for a in result.values()]))
        if datalen != 1:
            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
        if store_value:
            print('Now storing ' + repr(result))
            self.store_new_cat_values(cifdata,result,target_category)
        return result

    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
        """Copy across datanames for which the from_category key equals [[key_vals]]"""
        result = {}
        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
        can_project = s_names_in_cat & t_names_in_cat
        can_project -= set(skip_names)  #already dealt with
        source_key = self[from_category]['_category.key_id']
        print('Source dataname set: ' + repr(s_names_in_cat))
        print('Target dataname set: ' + repr(t_names_in_cat))
        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
        for project_name in can_project:
            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
            if key_vals is None:
                try:
                    result[full_to_name] = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
            else:
                all_key_vals = cifdata[source_key]
                filter_pos = [all_key_vals.index(a) for a in key_vals]
                try:
                    all_data_vals = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
        return result

    def store_new_cat_values(self,cifdata,result,the_category):
        """Store the values in [[result]] into [[cifdata]]"""
        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
        double_names = [a for a in result.keys() if a in cifdata]
        if len(double_names)>0:
            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
            if set(already_present) != set(result.keys()):
                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
                return
            #check key values
            old_keys = set(cifdata[the_key])
            common_keys = old_keys & set(result[the_key])
            if len(common_keys)>0:
                print("Category %s not updated, key values in common:" % (common_keys))
                return
            #extend result values with old values
            for one_name,one_value in result.items():
                result[one_name].extend(cifdata[one_name])
        for one_name, one_value in result.items():
            try:
                self.store_new_looped_value(one_name,cifdata,one_value,False)
            except StarFile.StarError:
                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
        #put the key as the first item
        print('Fixing item order for {}'.format(repr(the_key)))
        for one_key in the_key:  #should only be one
            cifdata.ChangeItemOrder(one_key,0)


    def generate_default_packet(self,catname,catkey,keyvalue):
        """Return a StarPacket with items from ``catname`` and a key value
        of ``keyvalue``"""
        newpack = StarPacket()
        for na in self.names_in_cat(catname):
            def_val = self[na].get("_enumeration.default","")
            if def_val:
                final_val = self.change_type(na,def_val)
                newpack.extend(final_val)
                setattr(newpack,na,final_val)
        if len(newpack)>0:
            newpack.extend(keyvalue)
            setattr(newpack,catkey,keyvalue)
        return newpack


    def switch_numpy(self,to_val):
        pass

    def change_type(self,itemname,inval):
        import numpy
        if inval == "?": return inval
        change_function = convert_type(self[itemname])
        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
            newval = list([change_function(a) for a in inval])
        else:
            newval = change_function(inval)
        return newval

    def install_validation_functions(self):
        """Install the DDL-appropriate validation checks"""
        if self.diclang != 'DDLm':
          self.item_validation_funs = [
            self.validate_item_type,
            self.validate_item_esd,
            self.validate_item_enum,   # functions which check conformance
            self.validate_enum_range,
            self.validate_looping]
          self.loop_validation_funs = [
            self.validate_loop_membership,
            self.validate_loop_key,
            self.validate_loop_references]    # functions checking loop values
          self.global_validation_funs = [
            self.validate_exclusion,
            self.validate_parent,
            self.validate_child,
            self.validate_dependents,
            self.validate_uniqueness] # where we need to look at other values
          self.block_validation_funs = [  # where only a full block will do
            self.validate_mandatory_category]
          self.global_remove_validation_funs = [
            self.validate_remove_parent_child] # removal is quicker with special checks
        elif self.diclang == 'DDLm':
            self.item_validation_funs = [
                self.validate_item_enum,
                self.validate_item_esd_ddlm,
                ]
            self.loop_validation_funs = [
                self.validate_looping_ddlm,
                self.validate_loop_key_ddlm,
                self.validate_loop_membership
                ]
            self.global_validation_funs = []
            self.block_validation_funs = [
                self.check_mandatory_items,
                self.check_prohibited_items
                ]
            self.global_remove_validation_funs = []
        self.optimize = False        # default value
        self.done_parents = []
        self.done_children = []
        self.done_keys = []

    def validate_item_type(self,item_name,item_value):
        def mymatch(m,a):
            res = m.match(a)
            if res != None: return res.group()
            else: return ""
        target_type = self[item_name].get(self.type_spec)
        if target_type == None:          # e.g. a category definition
            return {"result":True}                  # not restricted in any way
        matchexpr = self.typedic[target_type]
        item_values = listify(item_value)
        #for item in item_values:
            #print("Type match " + item_name + " " + item + ":",)
        #skip dots and question marks
        check_all = [a for a in item_values if a !="." and a != "?"]
        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def decide(self,result_list):
        """Construct the return list"""
        if len(result_list)==0:
               return {"result":True}
        else:
               return {"result":False,"bad_values":result_list}

    def validate_item_container(self, item_name,item_value):
        container_type = self[item_name]['_type.container']
        item_values = listify(item_value)
        if container_type == 'Single':
           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
           return decide(okcheck)
        if container_type in ('Multiple','List'):
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           return decide(okcheck)
        if container_type == 'Array':    #A list with numerical values
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           first_check = decide(okcheck)
           if not first_check['result']: return first_check
           #num_check = [a for a in item_values if len([b for b in a if not isinstance

    def validate_item_esd(self,item_name,item_value):
        if self[item_name].get(self.primitive_type) != 'numb':
            return {"result":None}
        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
        if can_esd: return {"result":True}         #must be OK!
        item_values = listify(item_value)
        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_item_esd_ddlm(self,item_name,item_value):
        if self[item_name].get('self.primitive_type') not in \
        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
            return {"result":None}
        can_esd = True
        if self[item_name].get('_type.purpose') != 'Measurand':
            can_esd = False
        item_values = listify(item_value)
        check_all = [get_number_with_esd(a)[1] for a in item_values]
        check_all = [v for v in check_all if (can_esd and v == None) or \
                 (not can_esd and v != None)]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_enum_range(self,item_name,item_value):
        if "_item_range.minimum" not in self[item_name] and \
           "_item_range.maximum" not in self[item_name]:
            return {"result":None}
        minvals = self[item_name].get("_item_range.minimum",default = ["."])
        maxvals = self[item_name].get("_item_range.maximum",default = ["."])
        def makefloat(a):
            if a == ".": return a
            else: return float(a)
        maxvals = map(makefloat, maxvals)
        minvals = map(makefloat, minvals)
        rangelist = list(zip(minvals,maxvals))
        item_values = listify(item_value)
        def map_check(rangelist,item_value):
            if item_value == "?" or item_value == ".": return True
            iv,esd = get_number_with_esd(item_value)
            if iv==None: return None  #shouldn't happen as is numb type
            for lower,upper in rangelist:
                #check the minima
                if lower == ".": lower = iv - 1
                if upper == ".": upper = iv + 1
                if iv > lower and iv < upper: return True
                if upper == lower and iv == upper: return True
            # debug
            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
            return False
        check_all = [a for a in item_values if map_check(rangelist,a) != True]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_item_enum(self,item_name,item_value):
        try:
            enum_list = self[item_name][self.enum_spec][:]
        except KeyError:
            return {"result":None}
        enum_list.append(".")   #default value
        enum_list.append("?")   #unknown
        item_values = listify(item_value)
        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
        check_all = [a for a in item_values if a not in enum_list]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_looping(self,item_name,item_value):
        try:
            must_loop = self[item_name][self.must_loop_spec]
        except KeyError:
            return {"result":None}
        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
            return {"result":False}      #this could be triggered
        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
            return {"result":False}
        return {"result":True}

    def validate_looping_ddlm(self,loop_names):
        """Check that all names are loopable"""
        truly_loopy = self.get_final_cats(loop_names)
        if len(truly_loopy)<len(loop_names):  #some are bad
            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
            return {"result":False,"bad_items":not_looped}
        return {"result":True}


    def validate_loop_membership(self,loop_names):
        final_cat = self.get_final_cats(loop_names)
        bad_items =  [a for a in final_cat if a != final_cat[0]]
        if len(bad_items)>0:
            return {"result":False,"bad_items":bad_items}
        else: return {"result":True}

    def get_final_cats(self,loop_names):
        """Return a list of the uppermost parent categories for the loop_names. Names
        that are not from loopable categories are ignored."""
        try:
            categories = [self[a][self.cat_spec].lower() for a in loop_names]
        except KeyError:       #category is mandatory
            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
        return [self.parent_lookup[a] for a in truly_looped]

    def validate_loop_key(self,loop_names):
        category = self[loop_names[0]][self.cat_spec]
        # find any unique values which must be present 
        key_spec = self[category].get(self.key_spec,[])
        for names_to_check in key_spec:
            if isinstance(names_to_check,unicode):   #only one
                names_to_check = [names_to_check]
            for loop_key in names_to_check:
                if loop_key not in loop_names:
                    #is this one of those dang implicit items?
                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
                        continue          #it is virtually there...
                    alternates = self.get_alternates(loop_key)
                    if alternates == []:
                        return {"result":False,"bad_items":loop_key}
                    for alt_names in alternates:
                        alt = [a for a in alt_names if a in loop_names]
                        if len(alt) == 0:
                            return {"result":False,"bad_items":loop_key}  # no alternates
        return {"result":True}

    def validate_loop_key_ddlm(self,loop_names):
        """Make sure at least one of the necessary keys are available"""
        final_cats = self.get_final_cats(loop_names)
        if len(final_cats)>0:
            poss_keys = self.cat_key_table[final_cats[0]]
            found_keys = [a for a in poss_keys if a in loop_names]
            if len(found_keys)>0:
                return {"result":True}
            else:
                return {"result":False,"bad_items":poss_keys}
        else:
            return {"result":True}

    def validate_loop_references(self,loop_names):
        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
        must_haves = [a for a in must_haves if a != None]
        # build a flat list.  For efficiency we don't remove duplicates,as
        # we expect no more than the order of 10 or 20 looped names.
        def flat_func(a,b):
            if isinstance(b,unicode):
               a.append(b)       #single name
            else:
               a.extend(b)       #list of names
            return a
        flat_mh = []
        [flat_func(flat_mh,a) for a in must_haves]
        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
        res = [a for a in single_mh if a not in loop_names]
        def check_gr(s_item, name_list):
            nl = map(lambda a:a[:len(s_item)],name_list)
            if s_item in nl: return True
            return False
        res_g = [a for a in group_mh if check_gr(a,loop_names)]
        if len(res) == 0 and len(res_g) == 0: return {"result":True}
        # construct alternate list
        alternates = map(lambda a: (a,self.get_alternates(a)),res)
        alternates = [a for a in alternates if a[1] != []]
        # next line purely for error reporting
        missing_alts = [a[0] for a in alternates if a[1] == []]
        if len(alternates) != len(res):
           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
                                                       #doesn't have an altern
        #loop over alternates
        for orig_name,alt_names in alternates:
             alt = [a for a in alt_names if a in loop_names]
             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
        return {"result":True}        #found alternates

    def get_alternates(self,main_name,exclusive_only=False):
        alternates = self[main_name].get(self.related_func,None)
        alt_names = []
        if alternates != None:
            alt_names =  self[main_name].get(self.related_item,None)
            if isinstance(alt_names,unicode):
                alt_names = [alt_names]
                alternates = [alternates]
            together = zip(alt_names,alternates)
            if exclusive_only:
                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
                                             or a[1]=="replace"]
            else:
                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
            alt_names = list([a[0] for a in alt_names])
        # now do the alias thing
        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
        alt_names.extend(alias_names)
        # print("Alternates for {}: {!r}".format(main_name, alt_names))
        return alt_names


    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
       item_name_list = [a.lower() for a in whole_block.keys()]
       item_name_list.extend([a.lower() for a in provisional_items.keys()])
       bad = [a for a in alternates if a in item_name_list]
       if len(bad)>0:
           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
           return {"result":False,"bad_items":bad}
       else: return {"result":True}

    # validate that parent exists and contains matching values
    def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        parent_item = self[item_name].get(self.parent_spec)
        if not parent_item: return {"result":None}   #no parent specified
        if isinstance(parent_item,list):
            parent_item = parent_item[0]
        if self.optimize:
            if parent_item in self.done_parents:
                return {"result":None}
            else:
                self.done_parents.append(parent_item)
                print("Done parents %s" % repr(self.done_parents))
        # initialise parent/child values
        if isinstance(item_value,unicode):
            child_values = [item_value]
        else: child_values = item_value[:]    #copy for safety
        # track down the parent
        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
        # we have collected all parent values into the global block - so no need to search
        # for them elsewhere.
        # print("Looking for {!r}".format(parent_item))
        parent_values = globals.get(parent_item)
        if not parent_values:
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
        if not parent_values:
            # go for alternates
            namespace = whole_block.keys()
            namespace.extend(provisional_items.keys())
            namespace.extend(globals.keys())
            alt_names = filter_present(self.get_alternates(parent_item),namespace)
            if len(alt_names) == 0:
                if len([a for a in child_values if a != "." and a != "?"])>0:
                    return {"result":False,"parent":parent_item}#no parent available -> error
                else:
                    return {"result":None}       #maybe True is more appropriate??
            parent_item = alt_names[0]           #should never be more than one??
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
            if not parent_values:   # check global block
                parent_values = globals.get(parent_item)
        if isinstance(parent_values,unicode):
            parent_values = [parent_values]
        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
        #                                          item_name, parent_values, child_values))
        missing = self.check_parent_child(parent_values,child_values)
        if len(missing) > 0:
            return {"result":False,"bad_values":missing,"parent":parent_item}
        return {"result":True}

    def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        try:
            child_items = self[item_name][self.child_spec][:]  #copy
        except KeyError:
            return {"result":None}    #not relevant
        # special case for dictionaries  -> we check parents of children only
        if item_name in globals:  #dictionary so skip
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        if isinstance(item_value,unicode): # single value
            parent_values = [item_value]
        else: parent_values = item_value[:]
        # expand child list with list of alternates
        for child_item in child_items[:]:
            child_items.extend(self.get_alternates(child_item))
        # now loop over the children
        for child_item in child_items:
            if self.optimize:
                if child_item in self.done_children:
                    return {"result":None}
                else:
                    self.done_children.append(child_item)
                    print("Done children %s" % repr(self.done_children))
            if child_item in provisional_items:
                child_values = provisional_items[child_item][:]
            elif child_item in whole_block:
                child_values = whole_block[child_item][:]
            else:  continue
            if isinstance(child_values,unicode):
                child_values = [child_values]
                # print("Checking child %s against %s, values %r/%r" % (child_item,
                #       item_name, child_values, parent_values))
            missing = self.check_parent_child(parent_values,child_values)
            if len(missing)>0:
                return {"result":False,"bad_values":missing,"child":child_item}
        return {"result":True}       #could mean that no child items present

    #a generic checker: all child vals should appear in parent_vals
    def check_parent_child(self,parent_vals,child_vals):
        # shield ourselves from dots and question marks
        pv = parent_vals[:]
        pv.extend([".","?"])
        res =  [a for a in child_vals if a not in pv]
        #print("Missing: %s" % res)
        return res

    def validate_remove_parent_child(self,item_name,whole_block):
        try:
            child_items = self[item_name][self.child_spec]
        except KeyError:
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        for child_item in child_items:
            if child_item in whole_block:
                return {"result":False,"child":child_item}
        return {"result":True}

    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
        try:
            dep_items = self[item_name][self.dep_spec][:]
        except KeyError:
            return {"result":None}    #not relevant
        if isinstance(dep_items,unicode):
            dep_items = [dep_items]
        actual_names = whole_block.keys()
        actual_names.extend(prov.keys())
        actual_names.extend(globals.keys())
        missing = [a for a in dep_items if a not in actual_names]
        if len(missing) > 0:
            alternates = map(lambda a:[self.get_alternates(a),a],missing)
            # compact way to get a list of alternative items which are
            # present
            have_check = [(filter_present(b[0],actual_names),
                                       b[1]) for b in alternates]
            have_check = list([a for a in have_check if len(a[0])==0])
            if len(have_check) > 0:
                have_check = [a[1] for a in have_check]
                return {"result":False,"bad_items":have_check}
        return {"result":True}

    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
                                                                  globals={}):
        category = self[item_name].get(self.cat_spec)
        if category == None:
            print("No category found for %s" % item_name)
            return {"result":None}
        # print("Category {!r} for item {}".format(category, item_name))
        # we make a copy in the following as we will be removing stuff later!
        unique_i = self[category].get("_category_key.name",[])[:]
        if isinstance(unique_i,unicode):
            unique_i = [unique_i]
        if item_name not in unique_i:       #no need to verify
            return {"result":None}
        if isinstance(item_value,unicode):  #not looped
            return {"result":None}
        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
        # check that we can't optimize by not doing this check
        if self.optimize:
            if unique_i in self.done_keys:
                return {"result":None}
            else:
                self.done_keys.append(unique_i)
        val_list = []
        # get the matching data from any other data items
        unique_i.remove(item_name)
        other_data = []
        if len(unique_i) > 0:            # i.e. do have others to think about
           for other_name in unique_i:
           # we look for the value first in the provisional dict, then the main block
           # the logic being that anything in the provisional dict overrides the
           # main block
               if other_name in provisional_items:
                   other_data.append(provisional_items[other_name])
               elif other_name in whole_block:
                   other_data.append(whole_block[other_name])
               elif self[other_name].get(self.must_exist_spec)=="implicit":
                   other_data.append([item_name]*len(item_value))  #placeholder
               else:
                   return {"result":False,"bad_items":other_name}#missing data name
        # ok, so we go through all of our values
        # this works by comparing lists of strings to one other, and
        # so could be fooled if you think that '1.' and '1' are
        # identical
        for i in range(len(item_value)):
            #print("Value no. %d" % i, end=" ")
            this_entry = item_value[i]
            for j in range(len(other_data)):
                this_entry = " ".join([this_entry,other_data[j][i]])
            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
            if this_entry in val_list:
                return {"result":False,"bad_values":this_entry}
            val_list.append(this_entry)
        return {"result":True}


    def validate_mandatory_category(self,whole_block):
        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
        if len(mand_cats) == 0:
            return {"result":True}
        # print("Mandatory categories - {!r}".format(mand_cats)
        # find which categories each of our datanames belongs to
        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
        missing = set(mand_cats) - set(all_cats)
        if len(missing) > 0:
            return {"result":False,"bad_items":repr(missing)}
        return {"result":True}

    def check_mandatory_items(self,whole_block,default_scope='Item'):
        """Return an error if any mandatory items are missing"""
        if len(self.scopes_mandatory)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
        if len(missing)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":missing}

    def check_prohibited_items(self,whole_block,default_scope='Item'):
        """Return an error if any prohibited items are present"""
        if len(self.scopes_naughty)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
        if len(present)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":present}


    def run_item_validation(self,item_name,item_value):
        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}

    def run_loop_validation(self,loop_names):
        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}

    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
        return {item_name:results}

    def run_block_validation(self,whole_block,block_scope='Item'):
        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
        # fix up the return values
        return {"whole_block":results}

    def optimize_on(self):
        self.optimize = True
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

    def optimize_off(self):
        self.optimize = False
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

Ancestors (in MRO)

  • CifDic
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def DDL1_normalise(

self)

def DDL1_normalise(self):
    # switch off block name collision checks
    self.standard = None
    # add default type information in DDL2 style
    # initial types and constructs
    base_types = ["char","numb","null"]
    prim_types = base_types[:]
    base_constructs = [".*",
        '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
        "\"\" "]
    for key,value in self.items():
       newnames = [key]  #keep by default
       if "_name" in value:
           real_name = value["_name"]
           if isinstance(real_name,list):        #looped values
               for looped_name in real_name:
                  new_value = value.copy()
                  new_value["_name"] = looped_name  #only looped name
                  self[looped_name] = new_value
               newnames = real_name
           else:
                  self[real_name] = value
                  newnames = [real_name]
       # delete the old one
       if key not in newnames:
          del self[key]
    # loop again to normalise the contents of each definition
    for key,value in self.items():
       #unlock the block
       save_overwrite = value.overwrite
       value.overwrite = True
       # deal with a missing _list, _type_conditions
       if "_list" not in value: value["_list"] = 'no'
       if "_type_conditions" not in value: value["_type_conditions"] = 'none'
       # deal with enumeration ranges
       if "_enumeration_range" in value:
           max,min = self.getmaxmin(value["_enumeration_range"])
           if min == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
           elif max == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
           else:
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
       #add any type construct information
       if "_type_construct" in value:
           base_types.append(value["_name"]+"_type")   #ie dataname_type
           base_constructs.append(value["_type_construct"]+"$")
           prim_types.append(value["_type"])     #keep a record
           value["_type"] = base_types[-1]   #the new type name
    #make categories conform with ddl2
    #note that we must remove everything from the last underscore
       if value.get("_category",None) == "category_overview":
            last_under = value["_name"].rindex("_")
            catid = value["_name"][1:last_under]
            value["_category.id"] = catid  #remove square bracks
            if catid not in self.cat_list: self.cat_list.append(catid)
       value.overwrite = save_overwrite
    # we now add any missing categories before filling in the rest of the
    # information
    for key,value in self.items():
        #print('processing ddl1 definition %s' % key)
        if "_category" in self[key]:
            if self[key]["_category"] not in self.cat_list:
                # rogue category, add it in
                newcat = self[key]["_category"]
                fake_name = "_" + newcat + "_[]"
                newcatdata = CifBlock()
                newcatdata["_category"] = "category_overview"
                newcatdata["_category.id"] = newcat
                newcatdata["_type"] = "null"
                self[fake_name] = newcatdata
                self.cat_list.append(newcat)
    # write out the type information in DDL2 style
    self.master_block.AddLoopItem((
        ("_item_type_list.code","_item_type_list.construct",
          "_item_type_list.primitive_code"),
        (base_types,base_constructs,prim_types)
        ))

def DDL2_normalise(

self)

def DDL2_normalise(self):
   listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
   # now filter out all the single element lists!
   dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
   for item_def in dodgy_defs:
            # print("DDL2 norm: processing %s" % item_def)
            thisdef = self[item_def]
            packet_no = thisdef['_item.name'].index(item_def)
            realcat = thisdef['_item.category_id'][packet_no]
            realmand = thisdef['_item.mandatory_code'][packet_no]
            # first add in all the missing categories
            # we don't replace the entry in the list corresponding to the
            # current item, as that would wipe out the information we want
            for child_no in range(len(thisdef['_item.name'])):
                if child_no == packet_no: continue
                child_name = thisdef['_item.name'][child_no]
                child_cat = thisdef['_item.category_id'][child_no]
                child_mand = thisdef['_item.mandatory_code'][child_no]
                if child_name not in self:
                    self[child_name] = CifBlock()
                    self[child_name]['_item.name'] = child_name
                self[child_name]['_item.category_id'] = child_cat
                self[child_name]['_item.mandatory_code'] = child_mand
            self[item_def]['_item.name'] = item_def
            self[item_def]['_item.category_id'] = realcat
            self[item_def]['_item.mandatory_code'] = realmand
   target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                 '_item_linked.parent_name' in self[a]]
   # now dodgy_defs contains all definition blocks with more than one child/parent link
   for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
   for item_def in dodgy_defs:
         print('Processing %s' % item_def)
         thisdef = self[item_def]
         child_list = thisdef['_item_linked.child_name']
         parents = thisdef['_item_linked.parent_name']
         # for each parent, find the list of children.
         family = list(zip(parents,child_list))
         notmychildren = family         #We aim to remove non-children
         # Loop over the parents, relocating as necessary
         while len(notmychildren):
            # get all children of first entry
            mychildren = [a for a in family if a[0]==notmychildren[0][0]]
            print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
            for parent,child in mychildren:   #parent is the same for all
                     # Make sure that we simply add in the new entry for the child, not replace it,
                     # otherwise we might spoil the child entry loop structure
                     try:
                         childloop = self[child].GetLoop('_item_linked.parent_name')
                     except KeyError:
                         print('Creating new parent entry %s for definition %s' % (parent,child))
                         self[child]['_item_linked.parent_name'] = [parent]
                         childloop = self[child].GetLoop('_item_linked.parent_name')
                         childloop.AddLoopItem(('_item_linked.child_name',[child]))
                         continue
                     else:
                         # A parent loop already exists and so will a child loop due to the
                         # call to create_pcloop above
                         pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                         goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                         if len(goodpars)>0:   #no need to add it
                             print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                             continue
                         print('Adding %s to %s entry' % (parent,child))
                         newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                         setattr(newpacket,'_item_linked.child_name',child)
                         setattr(newpacket,'_item_linked.parent_name',parent)
                         childloop.AddPacket(newpacket)
            #
            # Make sure the parent also points to the children.  We get
            # the current entry, then add our
            # new values if they are not there already
            #
            parent_name = mychildren[0][0]
            old_children = self[parent_name].get('_item_linked.child_name',[])
            old_parents = self[parent_name].get('_item_linked.parent_name',[])
            oldfamily = zip(old_parents,old_children)
            newfamily = []
            print('Old parents -> %s' % repr(old_parents))
            for jj, childname in mychildren:
                alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                if len(alreadythere)>0: continue
                'Adding new child %s to parent definition at %s' % (childname,parent_name)
                old_children.append(childname)
                old_parents.append(parent_name)
            # Now output the loop, blowing away previous definitions.  If there is something
            # else in this category, we are destroying it.
            newloop = CifLoopBlock(dimension=1)
            newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
            newloop.AddLoopItem(('_item_linked.child_name',old_children))
            del self[parent_name]['_item_linked.parent_name']
            del self[parent_name]['_item_linked.child_name']
            self[parent_name].insert_loop(newloop)
            print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
            # now make a new,smaller list
            notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]
   # now flatten any single element lists
   single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
   for flat_def in single_defs:
       flat_keys = self[flat_def].GetLoop('_item.name').keys()
       for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
   # now deal with the multiple lists
   # next we do aliases
   all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
   for aliased in all_aliases:
      my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
      for alias in my_aliases:
          self[alias] = self[aliased].copy()   #we are going to delete stuff...
          del self[alias]["_item_aliases.alias_name"]

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

def SetTemplate(

self, template_file)

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, **kwargs)

def WriteOut(self,**kwargs):
    myblockorder = self.get_full_child_list()
    self.set_grammar(self.grammar)
    self.standard = 'Dic'
    return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

class CifError

class CifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Format error: '+ self.value

Ancestors (in MRO)

  • CifError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class CifFile

class CifFile(StarFile.StarFile):
    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
        self.strict = strict
        self.header_comment = \
"""
##########################################################################
#               Crystallographic Information Format file
#               Produced by PyCifRW module
#
#  This is a CIF file.  CIF has been adopted by the International
#  Union of Crystallography as the standard for data archiving and
#  transmission.
#
#  For information on this file format, follow the CIF links at
#  http://www.iucr.org
##########################################################################
"""

Ancestors (in MRO)

  • CifFile
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

def SetTemplate(

self, template_file)

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class CifLoopBlock

class CifLoopBlock(StarFile.LoopBlock):
    def __init__(self,data=(),**kwargs):
        super(CifLoopBlock,self).__init__(data,**kwargs)

Ancestors (in MRO)

  • CifLoopBlock
  • CifFile.StarFile.LoopBlock
  • __builtin__.object

Methods

def AddPacket(

self, packet)

def AddPacket(self,packet):
    for myitem in self.parent_block.loops[self.loop_no]:
        old_values = self.parent_block[myitem]
        old_values.append(packet.__getattribute__(myitem))
        self.parent_block[myitem] = old_values

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Change the position at which itemname appears when printing out to newpos.

def ChangeItemOrder(self,itemname,newpos):
    """Change the position at which `itemname` appears when printing out to `newpos`."""
    self.parent_block.loops[self.loop_no].remove(itemname.lower())
    self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

def GetItemOrder(

self)

Return a list of datanames in this LoopBlock in the order that they will be printed

def GetItemOrder(self):
    """Return a list of datanames in this `LoopBlock` in the order that they will be
    printed"""
    return self.parent_block.loops[self.loop_no][:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetPacket(

self, index)

def GetPacket(self,index):
    thispack = StarPacket([])
    for myitem in self.parent_block.loops[self.loop_no]:
        thispack.append(self[myitem][index])
        setattr(thispack,myitem,thispack[-1])
    return thispack

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

class CifRecursionError

class CifRecursionError(Exception):
    def __init__(self,key_value,call_stack):
        self.key_value = key_value
        self.call_stack = call_stack
    def __str__(self):
        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))

Ancestors (in MRO)

class ValidCifBlock

A CifBlock that is valid with respect to a given CIF dictionary. Methods of CifBlock are overridden where necessary to disallow addition of invalid items to the CifBlock.

Initialisation

  • dic is a CifDic object to be used for validation.
class ValidCifBlock(CifBlock):
    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
    `CifBlock`.

    ## Initialisation

    * `dic` is a `CifDic` object to be used for validation.

    """
    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
        CifBlock.__init__(self,*args,**kwords)
        if dic and diclist:
            print("Warning: diclist argument ignored when initialising ValidCifBlock")
        if isinstance(dic,CifDic):
            self.fulldic = dic
        else:
            raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
        if len(diclist)==0 and not dic:
            raise ValidCifError( "At least one dictionary must be specified")
        if diclist and not dic:
            self.fulldic = merge_dic(diclist,mergemode)
        if not self.run_data_checks()[0]:
            raise ValidCifError( self.report())

    def run_data_checks(self,verbose=False):
        self.v_result = {}
        self.fulldic.optimize_on()
        for dataname in self.keys():
            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
        for loop_names in self.loops.values():
            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
        # now run block-level checks
        update_value(self.v_result,self.fulldic.run_block_validation(self))
        # return false and list of baddies if anything didn't match
        self.fulldic.optimize_off()
        all_keys = list(self.v_result.keys()) #dictionary will change
        for test_key in all_keys:
            #print("%s: %r" % (test_key, self.v_result[test_key]))
            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
            if len(self.v_result[test_key]) == 0:
                del self.v_result[test_key]
        isvalid = len(self.v_result)==0
        #if not isvalid:
        #    print("Baddies: {!r}".format(self.v_result))
        return isvalid,self.v_result

    def single_item_check(self,item_name,item_value):
        #self.match_single_item(item_name)
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_item_validation(item_name,item_value)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def loop_item_check(self,loop_names):
        in_dic_names = list([a for a in loop_names if a in self.fulldic])
        if len(in_dic_names)==0:
            result = {loop_names[0]:[]}
        else:
            result = self.fulldic.run_loop_validation(in_dic_names)
        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
        return isvalid,baddies

    def global_item_check(self,item_name,item_value,provisional_items={}):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_global_validation(item_name,
               item_value,self,provisional_items = provisional_items)
        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def remove_global_item_check(self,item_name):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_remove_global_validation(item_name,self,False)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def AddToLoop(self,dataname,loopdata):
        # single item checks
        paired_data = loopdata.items()
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems)
        # loop item checks; merge with current loop
        found = 0
        for aloop in self.block["loops"]:
            if dataname in aloop:
                loopnames = aloop.keys()
                for new_name in loopdata.keys():
                    if new_name not in loopnames: loopnames.append(new_name)
                valid,problems = self.looped_item_check(loopnames)
                self.report_if_invalid(valid,problems)
        prov_dict = loopdata.copy()
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems)
        CifBlock.AddToLoop(self,dataname,loopdata)

    def AddCifItem(self,data):
        if isinstance(data[0],(unicode,str)):   # single item
            valid,problems = self.single_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
            valid,problems = self.global_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
        elif isinstance(data[0],tuple) or isinstance(data[0],list):
            paired_data = list(zip(data[0],data[1]))
            for name,value in paired_data:
                valid,problems = self.single_item_check(name,value)
                self.report_if_invalid(valid,problems,name)
            valid,problems = self.loop_item_check(data[0])
            self.report_if_invalid(valid,problems,data[0])
            prov_dict = {}            # for storing temporary items
            for name,value in paired_data: prov_dict[name]=value
            for name,value in paired_data:
                del prov_dict[name]   # remove temporarily
                valid,problems = self.global_item_check(name,value,prov_dict)
                prov_dict[name] = value  # add back in
                self.report_if_invalid(valid,problems,name)
        else:
            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
        super(ValidCifBlock,self).AddCifItem(data)

    def AddItem(self,key,value,**kwargs):
        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
        valid,problems = self.single_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        valid,problems = self.global_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        super(ValidCifBlock,self).AddItem(key,value,**kwargs)

    # utility function
    def report_if_invalid(self,valid,bad_list,data_name):
        if not valid:
            bad_tests = [a[0] for a in bad_list]
            error_string = ",".join(bad_tests)
            error_string = repr(data_name) + " fails following validity checks: "  + error_string
            raise ValidCifError( error_string)

    def __delitem__(self,key):
        # we don't need to run single item checks; we do need to run loop and
        # global checks.
        if key in self:
            try:
                loop_items = self.GetLoop(key)
            except TypeError:
                loop_items = []
            if loop_items:             #need to check loop conformance
                loop_names = [a[0] for a in loop_items if a[0] != key]
                valid,problems = self.loop_item_check(loop_names)
                self.report_if_invalid(valid,problems)
            valid,problems = self.remove_global_item_check(key)
            self.report_if_invalid(valid,problems)
        self.RemoveCifItem(key)


    def report(self):
       outstr = StringIO()
       outstr.write( "Validation results\n")
       outstr.write( "------------------\n")
       print("%d invalid items found\n" % len(self.v_result))
       for item_name,val_func_list in self.v_result.items():
           outstr.write("%s fails following tests:\n" % item_name)
           for val_func in val_func_list:
               outstr.write("\t%s\n")
       return outstr.getvalue()

Ancestors (in MRO)

Methods

def AddCifItem(

self, data)

Inheritance: CifBlock.AddCifItem

DEPRECATED. Use AddItem instead.

def AddCifItem(self,data):
    if isinstance(data[0],(unicode,str)):   # single item
        valid,problems = self.single_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
        valid,problems = self.global_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
    elif isinstance(data[0],tuple) or isinstance(data[0],list):
        paired_data = list(zip(data[0],data[1]))
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems,name)
        valid,problems = self.loop_item_check(data[0])
        self.report_if_invalid(valid,problems,data[0])
        prov_dict = {}            # for storing temporary items
        for name,value in paired_data: prov_dict[name]=value
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems,name)
    else:
        raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
    super(ValidCifBlock,self).AddCifItem(data)

def AddItem(

self, key, value, **kwargs)

Inheritance: CifBlock.AddItem

Set value of dataname key to value after checking for conformance with CIF dictionary

def AddItem(self,key,value,**kwargs):
    """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
    valid,problems = self.single_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    valid,problems = self.global_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    super(ValidCifBlock,self).AddItem(key,value,**kwargs)

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Inheritance: CifBlock.AddLoopItem

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Inheritance: CifBlock.AddLoopName

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddSingleCifItem(

self, key, value)

Inheritance: CifBlock.AddSingleCifItem

Deprecated. Use AddItem instead

def AddSingleCifItem(self,key,value):
    """*Deprecated*. Use `AddItem` instead"""
    """Add a single data item. If it is part of a loop, a separate call should be made"""
    self.AddItem(key,value)

def AddToLoop(

self, dataname, loopdata)

Inheritance: CifBlock.AddToLoop

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    # single item checks
    paired_data = loopdata.items()
    for name,value in paired_data:
        valid,problems = self.single_item_check(name,value)
        self.report_if_invalid(valid,problems)
    # loop item checks; merge with current loop
    found = 0
    for aloop in self.block["loops"]:
        if dataname in aloop:
            loopnames = aloop.keys()
            for new_name in loopdata.keys():
                if new_name not in loopnames: loopnames.append(new_name)
            valid,problems = self.looped_item_check(loopnames)
            self.report_if_invalid(valid,problems)
    prov_dict = loopdata.copy()
    for name,value in paired_data:
        del prov_dict[name]   # remove temporarily
        valid,problems = self.global_item_check(name,value,prov_dict)
        prov_dict[name] = value  # add back in
        self.report_if_invalid(valid,problems)
    CifBlock.AddToLoop(self,dataname,loopdata)

def ChangeItemOrder(

self, itemname, newpos)

Inheritance: CifBlock.ChangeItemOrder

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Inheritance: CifBlock.CreateLoop

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Inheritance: CifBlock.FindLoop

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Inheritance: CifBlock.GetCompoundKeyedPacket

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Inheritance: CifBlock.GetFullItemValue

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Inheritance: CifBlock.GetItemOrder

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

Inheritance: CifBlock.GetItemPosition

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Inheritance: CifBlock.GetItemValue

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Inheritance: CifBlock.GetKeyedPacket

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Inheritance: CifBlock.GetKeyedSemanticPacket

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Inheritance: CifBlock.GetLoop

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Inheritance: CifBlock.GetLoopNames

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Inheritance: CifBlock.GetMultiKeyedSemanticPacket

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveCifItem(

self, itemname)

Inheritance: CifBlock.RemoveCifItem

Remove itemname from the CifBlock

def RemoveCifItem(self,itemname):
    """Remove `itemname` from the CifBlock"""
    self.RemoveItem(itemname)

def RemoveItem(

self, itemname)

Inheritance: CifBlock.RemoveItem

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Inheritance: CifBlock.RemoveKeyedPacket

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Inheritance: CifBlock.RemoveLoopItem

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Inheritance: CifBlock.SetOutputLength

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class ValidCifError

class ValidCifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Validity error: ' + self.value

Ancestors (in MRO)

  • ValidCifError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class ValidCifFile

A CIF file for which all datablocks are valid. Argument dic to initialisation specifies a CifDic object to use for validation.

class ValidCifFile(CifFile):
    """A CIF file for which all datablocks are valid.  Argument `dic` to
    initialisation specifies a `CifDic` object to use for validation."""
    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
        if not diclist and not dic and not hasattr(self,'bigdic'):
            raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
        if not dic and diclist:     #merge here for speed
            self.bigdic = merge_dic(diclist,mergemode)
        elif dic and not diclist:
            self.bigdic = dic
        CifFile.__init__(self,*args,**kwargs)
        for blockname in self.keys():
            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)

    def NewBlock(self,blockname,blockcontents,**kwargs):
        CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
        # dictionary[blockname] is now a CifBlock object.  We
        # turn it into a ValidCifBlock object
        self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                         data=self.dictionary[blockname])

Ancestors (in MRO)

  • ValidCifFile
  • CifFile
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def NewBlock(

self, blockname, blockcontents, **kwargs)

Inheritance: CifFile.NewBlock

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents,**kwargs):
    CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
    # dictionary[blockname] is now a CifBlock object.  We
    # turn it into a ValidCifBlock object
    self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                     data=self.dictionary[blockname])

def SetTemplate(

self, template_file)

Inheritance: CifFile.SetTemplate

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Inheritance: CifFile.WriteOut

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class ValidationResult

Represents validation result. It is initialised with

class ValidationResult:
    """Represents validation result. It is initialised with """
    def __init__(self,results):
        """results is return value of validate function"""
        self.valid_result, self.no_matches = results

    def report(self,use_html):
        """Return string with human-readable description of validation result"""
        return validate_report((self.valid_result, self.no_matches),use_html)

    def is_valid(self,block_name=None):
        """Return True for valid CIF file, otherwise False"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.valid_result.iterkeys()
        for block_name in block_names:
            if not self.valid_result[block_name] == (True,{}):
                valid = False
                break
            else:
                valid = True
        return valid

    def has_no_match_items(self,block_name=None):
        """Return true if some items are not found in dictionary"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.no_matches.iter_keys()
        for block_name in block_names:
            if self.no_matches[block_name]:
                has_no_match_items = True
                break
            else:
                has_no_match_items = False
        return has_no_match_items

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/CifFile_module.m.html000066400000000000000000062271021345362224200213010ustar00rootroot00000000000000 CifFile.CifFile_module API documentation Top

CifFile.CifFile_module module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO

# Python 2,3 compatibility
try:
    from urllib import urlopen         # for arbitrary opening
    from urlparse import urlparse, urljoin
except:
    from urllib.request import urlopen
    from urllib.parse import urlparse, urljoin

# The unicode type does not exist in Python3 as the str type
# encompasses unicode.  PyCIFRW tests for 'unicode' would fail
# Suggestions for a better approach welcome.

if isinstance(u"abc",str):   #Python3
    unicode = str

__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""


import re,sys
from . import StarFile
from .StarFile import StarList  #put in global scope for exec statement
try:
    import numpy                   #put in global scope for exec statement
    from .drel import drel_runtime  #put in global scope for exec statement
except ImportError:
    pass                       #will fail when using dictionaries for calcs
from copy import copy          #must be in global scope for exec statement

def track_recursion(in_this_func):
    """Keep an eye on a function call to make sure that the key argument hasn't been
    seen before"""
    def wrapper(*args,**kwargs):
        key_arg = args[1]
        if key_arg in wrapper.called_list:
            print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg)))
            raise CifRecursionError( key_arg,wrapper.called_list[:])    #failure
        if len(wrapper.called_list) == 0:   #first time
            wrapper.stored_use_defaults = kwargs.get("allow_defaults",False)
            print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults))
        else:
            kwargs["allow_defaults"] = wrapper.stored_use_defaults
        wrapper.called_list.append(key_arg)
        print('Recursion watch: call stack: ' + repr(wrapper.called_list))
        try:
            result = in_this_func(*args,**kwargs)
        except StarFile.StarDerivationError as s:
            if len(wrapper.called_list) == 1: #no more
                raise StarFile.StarDerivationFailure(wrapper.called_list[0])
            else:
                raise
        finally:
            wrapper.called_list.pop()
            if len(wrapper.called_list) == 0:
                wrapper.stored_used_defaults = 'error'
        return result
    wrapper.called_list = []
    return wrapper

class CifBlock(StarFile.StarBlock):
    """
    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
    a Python dictionary, in particular, individual items can be accessed using square
    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.

    ## Initialisation

    When provided, `data` should be another `CifBlock` whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
        """When provided, `data` should be another CifBlock whose contents will be copied to
        this block.

        * if `strict` is set, maximum name lengths will be enforced

        * `maxoutlength` is the maximum length for output lines

        * `wraplength` is the ideal length to make output lines

        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
        is raised).

        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
        after setting the dataitem value.
        """
        if strict: maxnamelength=75
        else:
           maxnamelength=-1
        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
        self.dictionary = None   #DDL dictionary referring to this block
        self.compat_mode = compat_mode   #old-style behaviour of setitem

    def RemoveCifItem(self,itemname):
        """Remove `itemname` from the CifBlock"""
        self.RemoveItem(itemname)

    def __setitem__(self,key,value):
        self.AddItem(key,value)
        # for backwards compatibility make a single-element loop
        if self.compat_mode:
            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
                 # single element loop
                 self.CreateLoop([key])

    def copy(self):
        newblock = super(CifBlock,self).copy()
        return self.copy.im_class(newblock)   #catch inheritance

    def AddCifItem(self,data):
        """ *DEPRECATED*. Use `AddItem` instead."""
        # we accept only tuples, strings and lists!!
        if not (isinstance(data[0],(unicode,tuple,list,str))):
                  raise TypeError('Cif datanames are either a string, tuple or list')
        # we catch single item loops as well...
        if isinstance(data[0],(unicode,str)):
            self.AddSingleCifItem(data[0],list(data[1]))
            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
                self.CreateLoop([data[0]])
            return
        # otherwise, we loop over the datanames
        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
        [self.AddSingleCifItem(a,b) for a,b in keyvals]
        # and create the loop
        self.CreateLoop(data[0][0])

    def AddSingleCifItem(self,key,value):
        """*Deprecated*. Use `AddItem` instead"""
        """Add a single data item. If it is part of a loop, a separate call should be made"""
        self.AddItem(key,value)

    def loopnames(self):
        return [self.loops[a] for a in self.loops]


class CifFile(StarFile.StarFile):
    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
        self.strict = strict
        self.header_comment = \
"""
##########################################################################
#               Crystallographic Information Format file
#               Produced by PyCifRW module
#
#  This is a CIF file.  CIF has been adopted by the International
#  Union of Crystallography as the standard for data archiving and
#  transmission.
#
#  For information on this file format, follow the CIF links at
#  http://www.iucr.org
##########################################################################
"""


class CifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Format error: '+ self.value

class ValidCifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Validity error: ' + self.value

class CifRecursionError(Exception):
    def __init__(self,key_value,call_stack):
        self.key_value = key_value
        self.call_stack = call_stack
    def __str__(self):
        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))


class DicBlock(StarFile.StarBlock):
    """A definition block within a dictionary, which allows imports
    to be transparently followed"""

    def __init__(self,*args,**kwargs):
        super(DicBlock,self).__init__(*args,**kwargs)
        self._import_cache = {}
        
    def __getitem__(self,dataname):
        value = None
        if super(DicBlock,self).has_key("_import.get") and self._import_cache:
            value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) 
        try:
            final_value = super(DicBlock,self).__getitem__(dataname)
        except KeyError:    #not there
            final_value = value
        if final_value is None:
            raise KeyError("%s not found" % dataname)
        return final_value

    def has_key(self,key):
        try:
            self[key]
        except KeyError:
            return False
        return True
    
    def add_dict_cache(self,name,cached):
        """Add a loaded dictionary to this block's cache"""
        self._import_cache[name]=cached
        
    def follow_import(self,import_info,dataname):
        """Find the dataname values from the imported dictionary. `import_info`
        is a list of import locations"""
        latest_value = None
        for import_ref in import_info:
            file_loc = import_ref["file"]
            if file_loc not in self._import_cache:
                raise ValueError("Dictionary for import %s not found" % file_loc)
            import_from = self._import_cache[file_loc]
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                    raise CifError('Import frame %s not found in %s' % (target_key,file_loc))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if mode == "contents":   #only this is used at this level
                latest_value = import_target.get(dataname,latest_value)
        return latest_value
    
class CifDic(StarFile.StarFile):
    """Create a Cif Dictionary object from the provided source, which can
    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
    only):

    * do_minimum (Boolean):
         Do not set up the dREL system for auto-calculation or perform
         imports.  This implies do_imports=False and do_dREL=False

    * do_imports = No/Full/Contents/All:
         If not 'No', intepret _import.get statements for
         Full mode/Contents mode/Both respectively. See also option 'heavy'

    * do_dREL = True/False:
         Parse and convert all dREL methods to Python. Implies do_imports=All

    * heavy = True/False:
         (Experimental). If True, importation overwrites definitions. If False,
         attributes are resolved dynamically.
    """
    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
                 grammar='auto',heavy=True,**kwargs):
        self.do_minimum = do_minimum
        if do_minimum:
            do_imports = 'No'
            do_dREL = False
        if do_dREL: do_imports = 'All'
        if heavy == 'Light' and do_imports not in ('contents','No'):
            raise(ValueError,"Light imports only available for mode 'contents'")
        self.template_cache = {}    #for DDLm imports
        self.ddlm_functions = {}    #for DDLm functions
        self.switch_numpy(False)    #no Numpy arrays returned
        super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs)
        self.standard = 'Dic'    #for correct output order
        self.scoping = 'dictionary'
        (self.dicname,self.diclang) = self.dic_determine()
        print('%s is a %s dictionary' % (self.dicname,self.diclang))
        self.scopes_mandatory = {}
        self.scopes_naughty = {}
        # rename and expand out definitions using "_name" in DDL dictionaries
        if self.diclang == "DDL1":
            self.DDL1_normalise()   #this removes any non-definition entries
        self.create_def_block_table() #From now on, [] uses definition_id
        if self.diclang == "DDL1":
            self.ddl1_cat_load()
        elif self.diclang == "DDL2":
            self.DDL2_normalise()   #iron out some DDL2 tricky bits
        elif self.diclang == "DDLm":
            self.scoping = 'dictionary'   #expose all save frames
            if do_imports is not 'No':
                self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine
            self.create_alias_table()
            self.create_cat_obj_table()
            self.create_cat_key_table()
            if do_dREL:
                print('Doing full dictionary initialisation')
                self.initialise_drel()
        self.add_category_info(full=do_dREL)
        # initialise type information
        self.typedic={}
        self.primdic = {}   #typecode<->primitive type translation
        self.add_type_info()
        self.install_validation_functions()

    def dic_determine(self):
        if "on_this_dictionary" in self:
            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
            self.def_id_spec = "_name"
            self.cat_id_spec = "_category.id"   #we add this ourselves
            self.type_spec = "_type"
            self.enum_spec = "_enumeration"
            self.cat_spec = "_category"
            self.esd_spec = "_type_conditions"
            self.must_loop_spec = "_list"
            self.must_exist_spec = "_list_mandatory"
            self.list_ref_spec = "_list_reference"
            self.key_spec = "_list_mandatory"
            self.unique_spec = "_list_uniqueness"
            self.child_spec = "_list_link_child"
            self.parent_spec = "_list_link_parent"
            self.related_func = "_related_function"
            self.related_item = "_related_item"
            self.primitive_type = "_type"
            self.dep_spec = "xxx"
            self.cat_list = []   #to save searching all the time
            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
            return (name+version,"DDL1")
        elif len(self.get_roots()) == 1:              # DDL2/DDLm
            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])
            # now change to dictionary scoping
            self.scoping = 'dictionary'
            name = self.master_block["_dictionary.title"]
            version = self.master_block["_dictionary.version"]
            if self.master_block.has_key("_dictionary.class"):   #DDLm
                self.enum_spec = '_enumeration_set.state'
                self.key_spec = '_category.key_id'
                self.must_exist_spec = None
                self.cat_spec = '_name.category_id'
                self.primitive_type = '_type.contents'
                self.cat_id_spec = "_definition.id"
                self.def_id_spec = "_definition.id"
                return(name+version,"DDLm")
            else:   #DDL2
                self.cat_id_spec = "_category.id"
                self.def_id_spec = "_item.name"
                self.key_spec = "_category_mandatory.name"
                self.type_spec = "_item_type.code"
                self.enum_spec = "_item_enumeration.value"
                self.esd_spec = "_item_type_conditions.code"
                self.cat_spec = "_item.category_id"
                self.loop_spec = "there_is_no_loop_spec!"
                self.must_loop_spec = "xxx"
                self.must_exist_spec = "_item.mandatory_code"
                self.child_spec = "_item_linked.child_name"
                self.parent_spec = "_item_linked.parent_name"
                self.related_func = "_item_related.function_code"
                self.related_item = "_item_related.related_name"
                self.unique_spec = "_category_key.name"
                self.list_ref_spec = "xxx"
                self.primitive_type = "_type"
                self.dep_spec = "_item_dependent.dependent_name"
                return (name+version,"DDL2")
        else:
            raise CifError("Unable to determine dictionary DDL version")

    def DDL1_normalise(self):
        # switch off block name collision checks
        self.standard = None
        # add default type information in DDL2 style
        # initial types and constructs
        base_types = ["char","numb","null"]
        prim_types = base_types[:]
        base_constructs = [".*",
            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
            "\"\" "]
        for key,value in self.items():
           newnames = [key]  #keep by default
           if "_name" in value:
               real_name = value["_name"]
               if isinstance(real_name,list):        #looped values
                   for looped_name in real_name:
                      new_value = value.copy()
                      new_value["_name"] = looped_name  #only looped name
                      self[looped_name] = new_value
                   newnames = real_name
               else:
                      self[real_name] = value
                      newnames = [real_name]
           # delete the old one
           if key not in newnames:
              del self[key]
        # loop again to normalise the contents of each definition
        for key,value in self.items():
           #unlock the block
           save_overwrite = value.overwrite
           value.overwrite = True
           # deal with a missing _list, _type_conditions
           if "_list" not in value: value["_list"] = 'no'
           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
           # deal with enumeration ranges
           if "_enumeration_range" in value:
               max,min = self.getmaxmin(value["_enumeration_range"])
               if min == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
               elif max == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
               else:
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
           #add any type construct information
           if "_type_construct" in value:
               base_types.append(value["_name"]+"_type")   #ie dataname_type
               base_constructs.append(value["_type_construct"]+"$")
               prim_types.append(value["_type"])     #keep a record
               value["_type"] = base_types[-1]   #the new type name

        #make categories conform with ddl2
        #note that we must remove everything from the last underscore
           if value.get("_category",None) == "category_overview":
                last_under = value["_name"].rindex("_")
                catid = value["_name"][1:last_under]
                value["_category.id"] = catid  #remove square bracks
                if catid not in self.cat_list: self.cat_list.append(catid)
           value.overwrite = save_overwrite
        # we now add any missing categories before filling in the rest of the
        # information
        for key,value in self.items():
            #print('processing ddl1 definition %s' % key)
            if "_category" in self[key]:
                if self[key]["_category"] not in self.cat_list:
                    # rogue category, add it in
                    newcat = self[key]["_category"]
                    fake_name = "_" + newcat + "_[]"
                    newcatdata = CifBlock()
                    newcatdata["_category"] = "category_overview"
                    newcatdata["_category.id"] = newcat
                    newcatdata["_type"] = "null"
                    self[fake_name] = newcatdata
                    self.cat_list.append(newcat)
        # write out the type information in DDL2 style
        self.master_block.AddLoopItem((
            ("_item_type_list.code","_item_type_list.construct",
              "_item_type_list.primitive_code"),
            (base_types,base_constructs,prim_types)
            ))

    def ddl1_cat_load(self):
        deflist = self.keys()       #slight optimization
        cat_mand_dic = {}
        cat_unique_dic = {}
        # a function to extract any necessary information from each definition
        def get_cat_info(single_def):
            if self[single_def].get(self.must_exist_spec)=='yes':
                thiscat = self[single_def]["_category"]
                curval = cat_mand_dic.get(thiscat,[])
                curval.append(single_def)
                cat_mand_dic[thiscat] = curval
            # now the unique items...
            # cif_core.dic throws us a curly one: the value of list_uniqueness is
            # not the same as the defined item for publ_body_label, so we have
            # to collect both together.  We assume a non-listed entry, which
            # is true for all current (May 2005) ddl1 dictionaries.
            if self[single_def].get(self.unique_spec,None)!=None:
                thiscat = self[single_def]["_category"]
                new_unique = self[single_def][self.unique_spec]
                uis = cat_unique_dic.get(thiscat,[])
                if single_def not in uis: uis.append(single_def)
                if new_unique not in uis: uis.append(new_unique)
                cat_unique_dic[thiscat] = uis

        [get_cat_info(a) for a in deflist] # apply the above function
        for cat in cat_mand_dic.keys():
            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
        for cat in cat_unique_dic.keys():
            self[cat]["_category_key.name"] = cat_unique_dic[cat]

    def create_pcloop(self,definition):
        old_children = self[definition].get('_item_linked.child_name',[])
        old_parents = self[definition].get('_item_linked.parent_name',[])
        if isinstance(old_children,unicode):
             old_children = [old_children]
        if isinstance(old_parents,unicode):
             old_parents = [old_parents]
        if (len(old_children)==0 and len(old_parents)==0) or \
           (len(old_children) > 1 and len(old_parents)>1):
             return
        if len(old_children)==0:
             old_children = [definition]*len(old_parents)
        if len(old_parents)==0:
             old_parents = [definition]*len(old_children)
        newloop = CifLoopBlock(dimension=1)
        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
        newloop.AddLoopItem(('_item_linked.child_name',old_children))
        try:
            del self[definition]['_item_linked.parent_name']
            del self[definition]['_item_linked.child_name']
        except KeyError:
            pass
        self[definition].insert_loop(newloop)



    def DDL2_normalise(self):
       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
       # now filter out all the single element lists!
       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
       for item_def in dodgy_defs:
                # print("DDL2 norm: processing %s" % item_def)
                thisdef = self[item_def]
                packet_no = thisdef['_item.name'].index(item_def)
                realcat = thisdef['_item.category_id'][packet_no]
                realmand = thisdef['_item.mandatory_code'][packet_no]
                # first add in all the missing categories
                # we don't replace the entry in the list corresponding to the
                # current item, as that would wipe out the information we want
                for child_no in range(len(thisdef['_item.name'])):
                    if child_no == packet_no: continue
                    child_name = thisdef['_item.name'][child_no]
                    child_cat = thisdef['_item.category_id'][child_no]
                    child_mand = thisdef['_item.mandatory_code'][child_no]
                    if child_name not in self:
                        self[child_name] = CifBlock()
                        self[child_name]['_item.name'] = child_name
                    self[child_name]['_item.category_id'] = child_cat
                    self[child_name]['_item.mandatory_code'] = child_mand
                self[item_def]['_item.name'] = item_def
                self[item_def]['_item.category_id'] = realcat
                self[item_def]['_item.mandatory_code'] = realmand

       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                     '_item_linked.parent_name' in self[a]]
       # now dodgy_defs contains all definition blocks with more than one child/parent link
       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
       for item_def in dodgy_defs:
             print('Processing %s' % item_def)
             thisdef = self[item_def]
             child_list = thisdef['_item_linked.child_name']
             parents = thisdef['_item_linked.parent_name']
             # for each parent, find the list of children.
             family = list(zip(parents,child_list))
             notmychildren = family         #We aim to remove non-children
             # Loop over the parents, relocating as necessary
             while len(notmychildren):
                # get all children of first entry
                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
                for parent,child in mychildren:   #parent is the same for all
                         # Make sure that we simply add in the new entry for the child, not replace it,
                         # otherwise we might spoil the child entry loop structure
                         try:
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                         except KeyError:
                             print('Creating new parent entry %s for definition %s' % (parent,child))
                             self[child]['_item_linked.parent_name'] = [parent]
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
                             continue
                         else:
                             # A parent loop already exists and so will a child loop due to the
                             # call to create_pcloop above
                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                             if len(goodpars)>0:   #no need to add it
                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                                 continue
                             print('Adding %s to %s entry' % (parent,child))
                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                             setattr(newpacket,'_item_linked.child_name',child)
                             setattr(newpacket,'_item_linked.parent_name',parent)
                             childloop.AddPacket(newpacket)
                #
                # Make sure the parent also points to the children.  We get
                # the current entry, then add our
                # new values if they are not there already
                #
                parent_name = mychildren[0][0]
                old_children = self[parent_name].get('_item_linked.child_name',[])
                old_parents = self[parent_name].get('_item_linked.parent_name',[])
                oldfamily = zip(old_parents,old_children)
                newfamily = []
                print('Old parents -> %s' % repr(old_parents))
                for jj, childname in mychildren:
                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                    if len(alreadythere)>0: continue
                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
                    old_children.append(childname)
                    old_parents.append(parent_name)
                # Now output the loop, blowing away previous definitions.  If there is something
                # else in this category, we are destroying it.
                newloop = CifLoopBlock(dimension=1)
                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
                newloop.AddLoopItem(('_item_linked.child_name',old_children))
                del self[parent_name]['_item_linked.parent_name']
                del self[parent_name]['_item_linked.child_name']
                self[parent_name].insert_loop(newloop)
                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
                # now make a new,smaller list
                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]

       # now flatten any single element lists
       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
       for flat_def in single_defs:
           flat_keys = self[flat_def].GetLoop('_item.name').keys()
           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
       # now deal with the multiple lists
       # next we do aliases
       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
       for aliased in all_aliases:
          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
          for alias in my_aliases:
              self[alias] = self[aliased].copy()   #we are going to delete stuff...
              del self[alias]["_item_aliases.alias_name"]

    def ddlm_parse_valid(self):
        if "_dictionary_valid.application" not in self.master_block:
            return
        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
            scope = getattr(scope_pack,"_dictionary_valid.application")
            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
            if scope[1] == "Mandatory":
                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
            elif scope[1] == "Prohibited":
                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)

    def obtain_imports(self,import_mode,heavy=False):
        """Collate import information"""
        self._import_dics = []
        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
        print('Import mode %s applied to following frames' % import_mode)
        print(str([a[0] for a in import_frames]))
        if import_mode != 'All':
           for i in range(len(import_frames)):
                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()])
           print('Importing following frames in mode %s' % import_mode)
           print(str(import_frames))
        #resolve all references
        for parent_block,import_list in import_frames:
          for import_ref in import_list:
            file_loc = import_ref["file"]
            full_uri = self.resolve_path(file_loc)
            if full_uri not in self.template_cache:
                dic_as_cif = CifFile(full_uri,grammar=self.grammar)
                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False)  #this will recurse internal imports
                print('Added %s to cached dictionaries' % full_uri)
            import_from = self.template_cache[full_uri]
            dupl = import_ref.get('dupl','Exit')
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if target_key in self and mode=='full':  #so blockname will be duplicated
                if dupl == 'Exit':
                    raise CifError('Import frame %s already in dictionary' % target_key)
                elif dupl == 'Ignore':
                    continue
            if heavy:
                self.ddlm_import(parent_block,import_from,import_target,target_key,mode)
            else:
                self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode)
                
    def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'):
            """Import other dictionaries in place"""
            if mode == 'contents':   #merge attributes only
                self[parent_block].merge(import_target)
            elif mode =="full":
                # Do the syntactic merge
                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
                from_cat_head = import_target['_name.object_id']
                child_frames = import_from.ddlm_all_children(from_cat_head)
                 # Check for Head merging Head
                if self[parent_block].get('_definition.class','Datum')=='Head' and \
                   import_target.get('_definition.class','Datum')=='Head':
                      head_to_head = True
                else:
                      head_to_head = False
                      child_frames.remove(from_cat_head)
                # As we are in syntax land, we call the CifFile methods
                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
                child_blocks = super(CifDic,import_from).makebc(child_blocks)
                # Prune out any datablocks that have identical definitions
                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
                print('Definitions for %s superseded' % repr(double_defs))
                for b in double_defs:
                    del child_blocks[b[0]]
                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
                   mode,len(self)))
                # Now the semantic merge
                # First expand our definition <-> blockname tree
                self.create_def_block_table()
                merging_cat = self[parent_block]['_name.object_id']      #new parent
                if head_to_head:
                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
                    #the new parent is the importing category for all old children
                    for f in child_frames:
                        self[f].overwrite = True
                        self[f]['_name.category_id'] = merging_cat
                        self[f].overwrite = False
                    # remove the old head
                    del self[from_cat_head]
                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
                else:  #imported category is only child
                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
                    self[child_frame]['_name.category_id'] = merging_cat
                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
            # it will never happen again...
            del self[parent_block]["_import.get"]

    def resolve_path(self,file_loc):
        url_comps = urlparse(file_loc)
        if url_comps[0]: return file_loc    #already full URI
        new_url = urljoin(self.my_uri,file_loc)
        #print("Transformed %s to %s for import " % (file_loc,new_url))
        return new_url

    def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'):
        """Register the imported dictionaries but do not alter any definitions. `parent_block`
        contains the id of the block that is importing. `import_target` is the block that
        should be imported. `import_from` is the CifFile that contains the definitions."""
        if mode == 'contents':   #merge attributes only
            self[parent_block].add_dict_cache(file_loc,import_from)
        elif mode =="full":
             # Check for Head merging Head
            if self[parent_block].get('_definition.class','Datum')=='Head' and \
               import_target.get('_definition.class','Datum')=='Head':
                   head_to_head = True
            else:
                   head_to_head = False
            # Figure out the actual definition ID
            head_id = import_target["_definition.id"]
            # Adjust parent information
            merging_cat = self[parent_block]['_name.object_id']
            from_cat_head = import_target['_name.object_id']
            if not head_to_head:   # imported category is only child
                import_target["_name.category_id"]=merging_cat
            self._import_dics = [(import_from,head_id)]+self._import_dics #prepend

    def lookup_imports(self,key):
        """Check the list of imported dictionaries for this definition"""
        for one_dic,head_def in self._import_dics:
            from_cat_head = one_dic[head_def]['_name.object_id']
            possible_keys = one_dic.ddlm_all_children(from_cat_head)
            if key in possible_keys:
                return one_dic[key]
        raise KeyError("%s not found in import dictionaries" % key)
        


    def create_def_block_table(self):
        """ Create an internal table matching definition to block id """
        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
        # now get the actual ids instead of blocks
        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
        # remove non-definitions
        if self.diclang != "DDL1":
            top_blocks = list([a[0].lower() for a in self.get_roots()])
        else:
            top_blocks = ["on_this_dictionary"]
        # catch dodgy duplicates
        uniques = set([a[0] for a in proto_table])
        if len(uniques)<len(proto_table):
            def_names = list([a[0] for a in proto_table])
            dodgy = [a for a in def_names if def_names.count(a)>1]
            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])

    def __getitem__(self,key):
        """Access a datablock by definition id, after the lookup has been created"""
        try:
            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
        except AttributeError:   #block_id_table not present yet
            return super(CifDic,self).__getitem__(key)
        except KeyError: # key is missing
            try: # print(Definition for %s not found, reverting to CifFile' % key)
                return super(CifDic,self).__getitem__(key)
            except KeyError: # try imports
                return self.lookup_imports(key)

    def __setitem__(self,key,value):
        """Add a new definition block"""
        super(CifDic,self).__setitem__(key,value)
        try:
            self.block_id_table[value['_definition.id']]=key
        except AttributeError:   #does not exist yet
            pass

    def NewBlock(self,*args,**kwargs):
        newname = super(CifDic,self).NewBlock(*args,**kwargs)
        try:
            self.block_id_table[self[newname]['_definition.id']]=newname
        except AttributeError: #no block_id table
            pass
                
    def __delitem__(self,key):
        """Remove a definition"""
        try:
            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
            del self.block_id_table[key.lower()]
        except (AttributeError,KeyError):   #block_id_table not present yet
            super(CifDic,self).__delitem__(key)
            return
        # fix other datastructures
        # cat_obj table

    def keys(self):
        """Return all definitions"""
        try:
            return self.block_id_table.keys()
        except AttributeError:
            return super(CifDic,self).keys()

    def has_key(self,key):
        return key in self

    def __contains__(self,key):
        try:
            return key.lower() in self.block_id_table
        except AttributeError:
            return super(CifDic,self).__contains__(key)

    def items(self):
        """Return (key,value) pairs"""
        return list([(a,self[a]) for a in self.keys()])

    def unlock(self):
        """Allow overwriting of all definitions in this collection"""
        for a in self.keys():
            self[a].overwrite=True

    def lock(self):
        """Disallow changes in definitions"""
        for a in self.keys():
            self[a].overwrite=False

    def rename(self,oldname,newname,blockname_as_well=True):
        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
        change the underlying blockname too."""
        if blockname_as_well:
            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)
            self.block_id_table[newname.lower()]=newname
            if oldname.lower() in self.block_id_table: #not removed
               del self.block_id_table[oldname.lower()]
        else:
            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
            del self.block_id_table[oldname.lower()]
            return

    def get_root_category(self):
        """Get the single 'Head' category of this dictionary"""
        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
        if len(root_cats)>1 or len(root_cats)==0:
            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
        return root_cats[0]

    def ddlm_immediate_children(self,catname):
        """Return a list of datanames for the immediate children of catname.  These are
        semantic children (i.e. based on _name.category_id), not structural children as
        in the case of StarFile.get_immediate_children"""

        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
        return list(straight_children)

    def ddlm_all_children(self,catname):
        """Return a list of all children, including the `catname`"""
        all_children = self.ddlm_immediate_children(catname)
        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
        for c in cat_children:
            all_children.remove(c)
            all_children += self.ddlm_all_children(c)
        return all_children + [catname]

    def is_semantic_child(self,parent,maybe_child):
        """Return true if `maybe_child` is a child of `parent`"""
        all_children = self.ddlm_all_children(parent)
        return maybe_child in all_children

    def ddlm_danglers(self):
        """Return a list of definitions that do not have a category defined
        for them, or are children of an unattached category"""
        top_block = self.get_root_category()
        connected = set(self.ddlm_all_children(top_block))
        all_keys = set(self.keys())
        unconnected = all_keys - connected
        return list(unconnected)

    def get_ddlm_parent(self,itemname):
        """Get the parent category of itemname"""
        parent = self[itemname].get('_name.category_id','')
        if parent == '':  # use the top block by default
            raise CifError("%s has no parent" % itemname)
        return parent

    def expand_category_opt(self,name_list):
        """Return a list of all non-category items in a category or return the name
           if the name is not a category"""
        new_list = []
        for name in name_list:
          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
            new_list += self.expand_category_opt([a for a in self.keys() if \
                     self[a].get('_name.category_id','').lower() == name.lower()])
          else:
            new_list.append(name)
        return new_list

    def get_categories(self):
        """Return a list of category names"""
        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])

    def names_in_cat(self,cat,names_only=False):
        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
        if not names_only:
            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
        else:
            return list([self[a]["_name.object_id"] for a in names])



    def create_alias_table(self):
        """Populate an alias table that we can look up when searching for a dataname"""
        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])

    def create_cat_obj_table(self):
        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
        loopable = self.get_loopable_cats()
        loopers = [self.ddlm_immediate_children(a) for a in loopable]
        print('Loopable cats:' + repr(loopable))
        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
        print("Expansion list:" + repr(expand_list))
        extra_table = {}   #for debugging we keep it separate from base_table until the end
        def expand_base_table(parent_cat,child_cats):
            extra_names = []
            # first deal with all the child categories
            for child_cat in child_cats:
              nn = []
              if child_cat in expand_list:  # a nested category: grab its names
                nn = expand_base_table(child_cat,expand_list[child_cat])
                # store child names
                extra_names += nn
              # add all child names to the table
              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
              child_names += extra_names
              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
            # and the repeated ones get appended instead
            repeats = [a for a in child_names if a in extra_table]
            for obj,name in repeats:
                extra_table[(parent_cat,obj)] += [name]
            # and finally, add our own names to the return list
            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
            return child_names
        [expand_base_table(parent,child) for parent,child in expand_list.items()]
        print('Expansion cat/obj values: ' + repr(extra_table))
        # append repeated ones
        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
        repeats = [a for a in extra_table.keys() if a in base_table]
        base_table.update(non_repeats)
        for k in repeats:
            base_table[k] += extra_table[k]
        self.cat_obj_lookup_table = base_table
        self.loop_expand_list = expand_list

    def get_loopable_cats(self):
        """A short utility function which returns a list of looped categories. This
        is preferred to a fixed attribute as that fixed attribute would need to be
        updated after any edits"""
        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']

    def create_cat_key_table(self):
        """Create a utility table with a list of keys applicable to each category. A key is
        a compound key, that is, it is a list"""
        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
        def collect_keys(parent_cat,child_cats):
                kk = []
                for child_cat in child_cats:
                    if child_cat in self.loop_expand_list:
                        kk += collect_keys(child_cat)
                    # add these keys to our list
                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
                return kk
        for k,v in self.loop_expand_list.items():
            collect_keys(k,v)
        print('Keys for categories' + repr(self.cat_key_table))

    def add_type_info(self):
        if "_item_type_list.construct" in self.master_block:
            types = self.master_block["_item_type_list.code"]
            prim_types = self.master_block["_item_type_list.primitive_code"]
            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
            # add in \r wherever we see \n, and change \{ to \\{
            def regex_fiddle(mm_regex):
                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
                fixed_regexp = mm_regex[:]  #copy
                # fix the brackets
                bm = re.match(brack_match,mm_regex)
                if bm != None:
                    fixed_regexp = bm.expand(r"\2\\\\{\4")
                # fix missing \r
                rm = re.match(ret_match,fixed_regexp)
                if rm != None:
                    fixed_regexp = rm.expand(r"\2\3\\r\4")
                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
                return fixed_regexp
            constructs = map(regex_fiddle,constructs)
            for typecode,construct in zip(types,constructs):
                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
            # now make a primitive <-> type construct mapping
            for typecode,primtype in zip(types,prim_types):
                self.primdic[typecode] = primtype

    def add_category_info(self,full=True):
        if self.diclang == "DDLm":
            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
            self.parent_lookup = {}
            for one_cat in looped_cats:
                parent_cat = one_cat
                parent_def = self[parent_cat]
                next_up = parent_def['_name.category_id'].lower()
                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
                    parent_def = self[next_up]
                    parent_cat = next_up
                    next_up = parent_def['_name.category_id'].lower()
                self.parent_lookup[one_cat] = parent_cat

            if full:
                self.key_equivs = {}
                for one_cat in looped_cats:   #follow them up
                    lower_keys = listify(self[one_cat]['_category_key.name'])
                    start_keys = lower_keys[:]
                    while len(lower_keys)>0:
                        this_cat = self[lower_keys[0]]['_name.category_id']
                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
                        if len(parent)>1:
                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
                        if len(parent)==0: break
                        parent = parent[0]
                        parent_keys = listify(self[parent]['_category_key.name'])
                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
                        # sanity check
                        if set(parent_keys) != set(linked_keys):
                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
                            # now add in our information
                        for parent,child in zip(linked_keys,start_keys):
                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
                        lower_keys = linked_keys  #preserves order of start keys

        else:
            self.parent_lookup = {}
            self.key_equivs = {}

    def change_category_name(self,oldname,newname):
        self.unlock()
        """Change the category name from [[oldname]] to [[newname]]"""
        if oldname not in self:
            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
        if newname in self:
            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
        child_defs = self.ddlm_immediate_children(oldname)
        self.rename(oldname,newname)   #NB no name integrity checks
        self[newname]['_name.object_id']=newname
        self[newname]['_definition.id']=newname
        for child_def in child_defs:
            self[child_def]['_name.category_id'] = newname
            if self[child_def].get('_definition.scope','Item')=='Item':
                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
                self[child_def]['_definition.id']=newid
                self.rename(child_def,newid[1:])  #no underscore at the beginning
        self.lock()

    def create_catobj_name(self,cat,obj):
        """Combine category and object in approved fashion to create id"""
        return ('_'+cat+'.'+obj)

    def change_category(self,itemname,catname):
        """Move itemname into catname, return new handle"""
        defid = self[itemname]
        if defid['_name.category_id'].lower()==catname.lower():
            print('Already in category, no change')
            return itemname
        if catname not in self:    #don't have it
            print('No such category %s' % catname)
            return itemname
        self.unlock()
        objid = defid['_name.object_id']
        defid['_name.category_id'] = catname
        newid = itemname # stays the same for categories
        if defid.get('_definition.scope','Item') == 'Item':
            newid = self.create_catobj_name(catname,objid)
            defid['_definition.id']= newid
            self.rename(itemname,newid)
        self.set_parent(catname,newid)
        self.lock()
        return newid

    def change_name(self,one_def,newobj):
        """Change the object_id of one_def to newobj. This is not used for
        categories, but can be used for dictionaries"""
        if '_dictionary.title' not in self[one_def]:  #a dictionary block
            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
            self.unlock()
            self.rename(one_def,newid)
            self[newid]['_definition.id']=newid
            self[newid]['_name.object_id']=newobj
        else:
            self.unlock()
            newid = newobj
            self.rename(one_def,newobj)
            self[newid]['_dictionary.title'] = newid
        self.lock()
        return newid

    # Note that our semantic parent is given by catparent, but our syntactic parent is
    # always just the root block
    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
        """Add a new category to the dictionary with name [[catname]].
           If [[catparent]] is None, the category will be a child of
           the topmost 'Head' category or else the top data block. If
           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
           is true, the parent category does not have to exist."""
        if catname in self:
            raise CifError('Attempt to add existing category %s' % catname)
        self.unlock()
        syntactic_root = self.get_roots()[0][0]
        if catparent is None:
            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
            if len(semantic_root)>0:
                semantic_root = semantic_root[0]
            else:
                semantic_root = syntactic_root
        else:
            semantic_root = catparent
        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
        self.block_id_table[catname.lower()]=realname
        self[catname]['_name.object_id'] = catname
        if not allow_dangler or catparent is None:
            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
        else:
            self[catname]['_name.category_id'] = catparent
        self[catname]['_definition.id'] = catname
        self[catname]['_definition.scope'] = 'Category'
        if is_loop:
            self[catname]['_definition.class'] = 'Loop'
        else:
            self[catname]['_definition.class'] = 'Set'
        self[catname]['_description.text'] = 'No definition provided'
        self.lock()
        return catname

    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
        """Add itemname to category [[catparent]]. If itemname contains periods,
        all text before the final period is ignored. If [[allow_dangler]] is True,
        no check for a parent category is made."""
        self.unlock()
        if '.' in itemname:
            objname = itemname.split('.')[-1]
        else:
            objname = itemname
        objname = objname.strip('_')
        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
            raise CifError('No category %s in dictionary' % catparent)
        fullname = '_'+catparent.lower()+'.'+objname
        print('New name: %s' % fullname)
        syntactic_root = self.get_roots()[0][0]
        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
        # update our dictionary structures
        self.block_id_table[fullname]=realname
        self[fullname]['_definition.id']=fullname
        self[fullname]['_name.object_id']=objname
        self[fullname]['_name.category_id']=catparent
        self[fullname]['_definition.class']='Datum'
        self[fullname]['_description.text']=def_text

    def remove_definition(self,defname):
        """Remove a definition from the dictionary."""
        if defname not in self:
            return
        if self[defname].get('_definition.scope')=='Category':
            children = self.ddlm_immediate_children(defname)
            [self.remove_definition(a) for a in children]
            cat_id = self[defname]['_definition.id'].lower()
        del self[defname]

    def get_cat_obj(self,name):
        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
        cat,obj = name.split('.')
        return (cat.strip('_'),obj)

    def get_name_by_cat_obj(self,category,object,give_default=False):
        """Return the dataname corresponding to the given category and object"""
        if category[0] == '_':    #accidentally left in
           true_cat = category[1:].lower()
        else:
           true_cat = category.lower()
        try:
            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
        except KeyError:
            if give_default:
               return '_'+true_cat+'.'+object
        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))


    def WriteOut(self,**kwargs):
        myblockorder = self.get_full_child_list()
        self.set_grammar(self.grammar)
        self.standard = 'Dic'
        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

    def get_full_child_list(self):
        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
        top_block = self.get_roots()[0][0]
        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
        if len(root_cat) == 1:
            all_names = [top_block] + self.recurse_child_list(root_cat[0])
            unrooted = self.ddlm_danglers()
            double_names =  set(unrooted).intersection(set(all_names))
            if len(double_names)>0:
                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
            remaining = unrooted[:]
            for no_root in unrooted:
                if self[no_root].get('_definition.scope','Item')=='Category':
                    all_names += [no_root]
                    remaining.remove(no_root)
                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
                    all_names += these_children
                    [remaining.remove(n) for n in these_children]
            # now sort by category
            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
            for e in ext_cats:
                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
                [remaining.remove(n) for n in cat_items]
                all_names += cat_items
            if len(remaining)>0:
                print('WARNING: following items do not seem to belong to a category??')
                print(repr(remaining))
                all_names += remaining
            print('Final block order: ' + repr(all_names))
            return all_names
        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')

    def cat_from_name(self,one_name):
        """Guess the category from the name. This should be used only when this is not important semantic information,
        for example, when printing out"""
        (cat,obj) = one_name.split(".")
        if cat[0] == "_": cat = cat[1:]
        return cat

    def recurse_child_list(self,parentname):
        """Recursively expand the logical child list of [[parentname]]"""
        final_list = [parentname]
        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
        child_blocks.sort()    #we love alphabetical order
        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
        final_list += child_items
        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
        for child_cat in child_cats:
            final_list += self.recurse_child_list(child_cat)
        return final_list



    def get_key_pack(self,category,value,data):
        keyname = self[category][self.unique_spec]
        onepack = data.GetPackKey(keyname,value)
        return onepack

    def get_number_with_esd(numstring):
        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
        our_match = re.match(numb_re,numstring)
        if our_match:
            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
        else:
            return None,None
        if dot or q: return None,None     #a dot or question mark
        if exp:          #has exponent
           exp = exp.replace("d","e")     # mop up old fashioned numbers
           exp = exp.replace("D","e")
           base_num = base_num + exp
        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
        base_num = float(base_num)
        # work out esd, if present.
        if esd:
            esd = float(esd[1:-1])    # no brackets
            if dad:                   # decimal point + digits
                esd = esd * (10 ** (-1* len(dad)))
            if exp:
                esd = esd * (10 ** (float(exp[1:])))
        return base_num,esd

    def getmaxmin(self,rangeexp):
        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
        regexp = regexp + ":" + regexp
        regexp = re.match(regexp,rangeexp)
        try:
            minimum = regexp.group(1)
            maximum = regexp.group(7)
        except AttributeError:
            print("Can't match %s" % rangeexp)
        if minimum == None: minimum = "."
        else: minimum = float(minimum)
        if maximum == None: maximum = "."
        else: maximum = float(maximum)
        return maximum,minimum

    def initialise_drel(self):
        """Parse drel functions and prepare data structures in dictionary"""
        self.ddlm_parse_valid() #extract validity information from data block
        self.transform_drel()   #parse the drel functions
        self.add_drel_funcs()   #put the drel functions into the namespace

    def transform_drel(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        import traceback
        parser = drel_ast_yacc.parser
        lexer = drel_ast_yacc.lexer
        my_namespace = self.keys()
        my_namespace = dict(zip(my_namespace,my_namespace))
        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
                              and self[a].get("_name.category_id","")!= "function"]
        for derivable in derivable_list:
            target_id = derivable
            # reset the list of visible names for parser
            special_ids = [dict(zip(self.keys(),self.keys()))]
            print("Target id: %s" % derivable)
            drel_exprs = self[derivable]["_method.expression"]
            drel_purposes = self[derivable]["_method.purpose"]
            all_methods = []
            if not isinstance(drel_exprs,list):
                drel_exprs = [drel_exprs]
                drel_purposes = [drel_purposes]
            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
                if drel_purpose != 'Evaluation':
                    continue
                drel_expr = "\n".join(drel_expr.splitlines())
                # print("Transforming %s" % drel_expr)
                # List categories are treated differently...
                try:
                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
                except:
                    print('Syntax error in method for %s; leaving as is' % derivable)
                    a,b = sys.exc_info()[:2]
                    print((repr(a),repr(b)))
                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
                    # reset the lexer
                    lexer.begin('INITIAL')
                    continue
                # Construct the python method
                cat_meth = False
                if self[derivable].get('_definition.scope','Item') == 'Category':
                    cat_meth = True
                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
                                                                           loopable=loop_info,
                                                             cif_dic = self,cat_meth=cat_meth)
                all_methods.append(pyth_meth)
            if len(all_methods)>0:
                save_overwrite = self[derivable].overwrite
                self[derivable].overwrite = True
                self[derivable]["_method.py_expression"] = all_methods
                self[derivable].overwrite = save_overwrite
            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))

    def add_drel_funcs(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
        funcnames = [(self[a]["_name.object_id"],
                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
        # create executable python code...
        parser = drel_ast_yacc.parser
        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        for funcname,funcbody in funcnames:
            newline_body = "\n".join(funcbody.splitlines())
            parser.target_id = funcname
            res_ast = parser.parse(newline_body)
            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
            #print('dREL library function ->\n' + py_function)
            global_table = globals()
            exec(py_function, global_table)    #add to namespace
        #print('Globals after dREL functions added:' + repr(globals()))
        self.ddlm_functions = globals()  #for outside access

    @track_recursion
    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
        key = start_key   #starting value
        result = None     #success is a non-None value
        default_result = False #we have not used a default value
        # check for aliases
        # check for an older form of a new value
        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
        if len(found_it)>0:
            corrected_type = self.change_type(key,cifdata[found_it[0]])
            return corrected_type
        # now do the reverse check - any alternative form
        alias_name = [a for a in self.alias_table.items() if key in a[1]]
        print('Aliases for %s: %s' % (key,repr(alias_name)))
        if len(alias_name)==1:
            key = alias_name[0][0]   #actual definition name
            if key in cifdata: return self.change_type(key,cifdata[key])
            found_it = [k for k in alias_name[0][1] if k in cifdata]
            if len(found_it)>0:
                return self.change_type(key,cifdata[found_it[0]])
        elif len(alias_name)>1:
            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))

        the_category = self[key]["_name.category_id"]
        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        # store any default value in case we have a problem
        def_val = self[key].get("_enumeration.default","")
        def_index_val = self[key].get("_enumeration.def_index_id","")
        if len(has_cat_names)==0: # try category method
            cat_result = {}
            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
            pulled_from_cats = [(k,[
                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
                               ) for k in pulled_from_cats]
            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
            if '_category_construct_local.type' in self[the_category]:
                print("**Now constructing category %s using DDLm attributes**" % the_category)
                try:
                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s (error)' % the_category)
            # Trying a pull-back when the category is partially populated
            # will not work, hence we test that cat_result has no keys
            if len(pulled_to_cats)>0 and len(cat_result)==0:
                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
                try:
                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s from pullback information (error)' % the_category)
            if '_method.py_expression' in self[the_category] and key not in cat_result:
                print("**Now applying category method for %s in search of %s**" % (the_category,key))
                cat_result = self.derive_item(the_category,cifdata,store_value=True)
            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
            # do we now have our value?
            if key in cat_result:
                return cat_result[key]

        # Recalculate in case it actually worked
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        the_funcs = self[key].get('_method.py_expression',"")
        if the_funcs:   #attempt to calculate it
            #global_table = globals()
            #global_table.update(self.ddlm_functions)
            for one_func in the_funcs:
                print('Executing function for %s:' % key)
                #print(one_func)
                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
                # print('in following global environment: ' + repr(global_table))
                stored_setting = cifdata.provide_value
                cifdata.provide_value = True
                try:
                    result = pyfunc(cifdata)
                except CifRecursionError as s:
                    print(s)
                    result = None
                except StarFile.StarDerivationError as s:
                    print(s)
                    result = None
                finally:
                    cifdata.provide_value = stored_setting
                if result is not None:
                    break
                #print("Function returned {!r}".format(result))

        if result is None and allow_defaults:   # try defaults
            if def_val:
                result = self.change_type(key,def_val)
                default_result = True
            elif def_index_val:            #derive a default value
                index_vals = self[key]["_enumeration_default.index"]
                val_to_index = cifdata[def_index_val]     #what we are keying on
                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
                    lcase_comp = True
                    index_vals = [a.lower() for a in index_vals]
                # Handle loops
                if isinstance(val_to_index,list):
                    if lcase_comp:
                        val_to_index = [a.lower() for a in val_to_index]
                    keypos = [index_vals.index(a) for a in val_to_index]
                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
                else:
                    if lcase_comp:
                        val_to_index = val_to_index.lower()
                    keypos = index_vals.index(val_to_index)   #value error if no such value available
                    result = self[key]["_enumeration_default.value"][keypos]
                    default_result = True   #flag that it must be extended
                result = self.change_type(key,result)
                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))

        # read it in
        if result is None:   #can't do anything else
            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
            raise StarFile.StarDerivationError(start_key)
        is_looped = False
        if self[the_category].get('_definition.class','Set')=='Loop':
            is_looped = True
            if len(has_cat_names)>0:   #this category already exists
                if result is None or default_result: #need to create a list of values
                    loop_len = len(cifdata[has_cat_names[0]])
                    out_result = [result]*loop_len
                    result = out_result
            else:   #nothing exists in this category, we can't store this at all
                print('Resetting result %s for %s to null list as category is empty' % (key,result))
                result = []

        # now try to insert the new information into the right place
        # find if items of this category already appear...
        # Never cache empty values
        if not (isinstance(result,list) and len(result)==0) and\
          store_value:
            if self[key].get("_definition.scope","Item")=='Item':
                if is_looped:
                    result = self.store_new_looped_value(key,cifdata,result,default_result)
                else:
                    result = self.store_new_unlooped_value(key,cifdata,result)
            else:
                self.store_new_cat_values(cifdata,result,the_category)
        return result

    def store_new_looped_value(self,key,cifdata,result,default_result):
          """Store a looped value from the dREL system into a CifFile"""
          # try to change any matrices etc. to lists
          the_category = self[key]["_name.category_id"]
          out_result = result
          if result is not None and not default_result:
                  # find any numpy arrays
                  def conv_from_numpy(one_elem):
                      if not hasattr(one_elem,'dtype'):
                         if isinstance(one_elem,(list,tuple)):
                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
                         return one_elem
                      if one_elem.size > 1:   #so is not a float
                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
                      else:
                          try:
                            return one_elem.item(0)
                          except:
                            return one_elem
                  out_result = [conv_from_numpy(a) for a in result]
          # so out_result now contains a value suitable for storage
          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
          has_cat_names = [a for a in cat_names if a in cifdata]
          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
          if len(has_cat_names)>0:   #this category already exists
              cifdata[key] = out_result      #lengths must match or else!!
              cifdata.AddLoopName(has_cat_names[0],key)
          else:
              cifdata[key] = out_result
              cifdata.CreateLoop([key])
          print('Loop info:' + repr(cifdata.loops))
          return out_result

    def store_new_unlooped_value(self,key,cifdata,result):
          """Store a single value from the dREL system"""
          if result is not None and hasattr(result,'dtype'):
              if result.size > 1:
                  out_result = StarFile.StarList(result.tolist())
                  cifdata[key] = out_result
              else:
                  cifdata[key] = result.item(0)
          else:
              cifdata[key] = result
          return result

    def construct_category(self,category,cifdata,store_value=True):
        """Construct a category using DDLm attributes"""
        con_type = self[category].get('_category_construct_local.type',None)
        if con_type == None:
            return {}
        if con_type == 'Pullback' or con_type == 'Filter':
            morphisms  = self[category]['_category_construct_local.components']
            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
            cats = [self[a]['_name.category_id'] for a in morphisms]
            cat_keys = [self[a]['_category.key_id'] for a in cats]
            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
            if con_type == 'Filter':
                int_filter = self[category].get('_category_construct_local.integer_filter',None)
                text_filter = self[category].get('_category_construct_local.text_filter',None)
                if int_filter is not None:
                    morph_values.append([int(a) for a in int_filter])
                if text_filter is not None:
                    morph_values.append(text_filter)
                cat_values.append(range(len(morph_values[-1])))
            # create the mathematical product filtered by equality of dataname values
            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
            # now prepare for return
            if len(pullback_ids)==0:
                return {}
            newids = self[category]['_category_construct_local.new_ids']
            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
            if con_type == 'Pullback':
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
            elif con_type == 'Filter':   #simple filter
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
            if store_value:
                self.store_new_cat_values(cifdata,final_results,category)
            return final_results

    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
        """Each of the categories in source_categories are pullbacks that include
        the target_category"""
        target_key = self[target_category]['_category.key_id']
        result = {target_key:[]}
        first_time = True
        # for each source category, determine which element goes to the target
        for sc in source_categories:
            components = self[sc]['_category_construct_local.components']
            comp_cats = [self[c]['_name.category_id'] for c in components]
            new_ids = self[sc]['_category_construct_local.new_ids']
            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
            if len(components) == 2:  # not a filter
                element_pos = comp_cats.index(target_category)
                old_id = source_ids[element_pos]
                print('Using %s to populate %s' % (old_id,target_key))
                result[target_key].extend(cifdata[old_id])
                # project through all identical names
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
                # we only include keys that are common to all categories
                if first_time:
                    result.update(extra_result)
                else:
                    for k in extra_result.keys():
                        if k in result:
                            print('Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
            else:
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
                    result[target_key].extend(cifdata[source_ids[0]])
                    for k in extra_result.keys():
                        if k in result:
                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
                        else:
                            result[k]=extra_result[k]
    # Bonus derivation if there is a singleton filter
                    if self[sc]['_category_construct_local.type'] == 'Filter':
                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
                        if int_filter is not None:
                            filter_values = int_filter
                        else:
                            filter_values = text_filter
                        if len(filter_values)==1:    #a singleton
                            extra_dataname = self[sc]['_category_construct_local.components'][0]
                            if int_filter is not None:
                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
                            else:
                                new_value = filter_values * len(cifdata[source_ids[0]])
                            if extra_dataname not in result:
                                result[extra_dataname] = new_value
                            else:
                                result[extra_dataname].extend(new_value)
                    else:
                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
            first_time = False
        # check for sanity - all dataname lengths must be identical
        datalen = len(set([len(a) for a in result.values()]))
        if datalen != 1:
            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
        if store_value:
            print('Now storing ' + repr(result))
            self.store_new_cat_values(cifdata,result,target_category)
        return result

    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
        """Copy across datanames for which the from_category key equals [[key_vals]]"""
        result = {}
        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
        can_project = s_names_in_cat & t_names_in_cat
        can_project -= set(skip_names)  #already dealt with
        source_key = self[from_category]['_category.key_id']
        print('Source dataname set: ' + repr(s_names_in_cat))
        print('Target dataname set: ' + repr(t_names_in_cat))
        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
        for project_name in can_project:
            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
            if key_vals is None:
                try:
                    result[full_to_name] = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
            else:
                all_key_vals = cifdata[source_key]
                filter_pos = [all_key_vals.index(a) for a in key_vals]
                try:
                    all_data_vals = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
        return result

    def store_new_cat_values(self,cifdata,result,the_category):
        """Store the values in [[result]] into [[cifdata]]"""
        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
        double_names = [a for a in result.keys() if a in cifdata]
        if len(double_names)>0:
            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
            if set(already_present) != set(result.keys()):
                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
                return
            #check key values
            old_keys = set(cifdata[the_key])
            common_keys = old_keys & set(result[the_key])
            if len(common_keys)>0:
                print("Category %s not updated, key values in common:" % (common_keys))
                return
            #extend result values with old values
            for one_name,one_value in result.items():
                result[one_name].extend(cifdata[one_name])
        for one_name, one_value in result.items():
            try:
                self.store_new_looped_value(one_name,cifdata,one_value,False)
            except StarFile.StarError:
                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
        #put the key as the first item
        print('Fixing item order for {}'.format(repr(the_key)))
        for one_key in the_key:  #should only be one
            cifdata.ChangeItemOrder(one_key,0)


    def generate_default_packet(self,catname,catkey,keyvalue):
        """Return a StarPacket with items from ``catname`` and a key value
        of ``keyvalue``"""
        newpack = StarPacket()
        for na in self.names_in_cat(catname):
            def_val = self[na].get("_enumeration.default","")
            if def_val:
                final_val = self.change_type(na,def_val)
                newpack.extend(final_val)
                setattr(newpack,na,final_val)
        if len(newpack)>0:
            newpack.extend(keyvalue)
            setattr(newpack,catkey,keyvalue)
        return newpack


    def switch_numpy(self,to_val):
        pass

    def change_type(self,itemname,inval):
        if inval == "?": return inval
        change_function = convert_type(self[itemname])
        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
            newval = list([change_function(a) for a in inval])
        else:
            newval = change_function(inval)
        return newval

    def install_validation_functions(self):
        """Install the DDL-appropriate validation checks"""
        if self.diclang != 'DDLm':
            # functions which check conformance
            self.item_validation_funs = [
                self.validate_item_type,
                self.validate_item_esd,
                self.validate_item_enum,
                self.validate_enum_range,
                self.validate_looping
            ]
            # functions checking loop values
            self.loop_validation_funs = [
                self.validate_loop_membership,
                self.validate_loop_key,
                self.validate_loop_references
            ]
            # where we need to look at other values
            self.global_validation_funs = [
                self.validate_exclusion,
                self.validate_parent,
                self.validate_child,
                self.validate_dependents,
                self.validate_uniqueness
            ]
            # where only a full block will do
            self.block_validation_funs = [
                self.validate_mandatory_category
            ]
            # removal is quicker with special checks
            self.global_remove_validation_funs = [
                self.validate_remove_parent_child
            ]
        elif self.diclang == 'DDLm':
            self.item_validation_funs = [
                self.validate_item_enum,
                self.validate_item_esd_ddlm,
                ]
            self.loop_validation_funs = [
                self.validate_looping_ddlm,
                self.validate_loop_key_ddlm,
                self.validate_loop_membership
                ]
            self.global_validation_funs = []
            self.block_validation_funs = [
                self.check_mandatory_items,
                self.check_prohibited_items
                ]
            self.global_remove_validation_funs = []
        self.optimize = False        # default value
        self.done_parents = []
        self.done_children = []
        self.done_keys = []

    def validate_item_type(self,item_name,item_value):
        def mymatch(m,a):
            res = m.match(a)
            if res != None: return res.group()
            else: return ""
        target_type = self[item_name].get(self.type_spec)
        if target_type == None:          # e.g. a category definition
            return {"result":True}                  # not restricted in any way
        matchexpr = self.typedic[target_type]
        item_values = listify(item_value)
        #for item in item_values:
            #print("Type match " + item_name + " " + item + ":",)
        #skip dots and question marks
        check_all = [a for a in item_values if a !="." and a != "?"]
        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def decide(self,result_list):
        """Construct the return list"""
        if len(result_list)==0:
               return {"result":True}
        else:
               return {"result":False,"bad_values":result_list}

    def validate_item_container(self, item_name,item_value):
        container_type = self[item_name]['_type.container']
        item_values = listify(item_value)
        if container_type == 'Single':
           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
           return decide(okcheck)
        if container_type in ('Multiple','List'):
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           return decide(okcheck)
        if container_type == 'Array':    #A list with numerical values
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           first_check = decide(okcheck)
           if not first_check['result']: return first_check
           #num_check = [a for a in item_values if len([b for b in a if not isinstance

    def validate_item_esd(self,item_name,item_value):
        if self[item_name].get(self.primitive_type) != 'numb':
            return {"result":None}
        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
        if can_esd: return {"result":True}         #must be OK!
        item_values = listify(item_value)
        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_item_esd_ddlm(self,item_name,item_value):
        if self[item_name].get('self.primitive_type') not in \
        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
            return {"result":None}
        can_esd = True
        if self[item_name].get('_type.purpose') != 'Measurand':
            can_esd = False
        item_values = listify(item_value)
        check_all = [get_number_with_esd(a)[1] for a in item_values]
        check_all = [v for v in check_all if (can_esd and v == None) or \
                 (not can_esd and v != None)]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_enum_range(self,item_name,item_value):
        if "_item_range.minimum" not in self[item_name] and \
           "_item_range.maximum" not in self[item_name]:
            return {"result":None}
        minvals = self[item_name].get("_item_range.minimum",default = ["."])
        maxvals = self[item_name].get("_item_range.maximum",default = ["."])
        def makefloat(a):
            if a == ".": return a
            else: return float(a)
        maxvals = map(makefloat, maxvals)
        minvals = map(makefloat, minvals)
        rangelist = list(zip(minvals,maxvals))
        item_values = listify(item_value)
        def map_check(rangelist,item_value):
            if item_value == "?" or item_value == ".": return True
            iv,esd = get_number_with_esd(item_value)
            if iv==None: return None  #shouldn't happen as is numb type
            for lower,upper in rangelist:
                #check the minima
                if lower == ".": lower = iv - 1
                if upper == ".": upper = iv + 1
                if iv > lower and iv < upper: return True
                if upper == lower and iv == upper: return True
            # debug
            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
            return False
        check_all = [a for a in item_values if map_check(rangelist,a) != True]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_item_enum(self,item_name,item_value):
        try:
            enum_list = self[item_name][self.enum_spec][:]
        except KeyError:
            return {"result":None}
        enum_list.append(".")   #default value
        enum_list.append("?")   #unknown
        item_values = listify(item_value)
        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
        check_all = [a for a in item_values if a not in enum_list]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_looping(self,item_name,item_value):
        try:
            must_loop = self[item_name][self.must_loop_spec]
        except KeyError:
            return {"result":None}
        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
            return {"result":False}      #this could be triggered
        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
            return {"result":False}
        return {"result":True}

    def validate_looping_ddlm(self,loop_names):
        """Check that all names are loopable"""
        truly_loopy = self.get_final_cats(loop_names)
        if len(truly_loopy)<len(loop_names):  #some are bad
            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
            return {"result":False,"bad_items":not_looped}
        return {"result":True}


    def validate_loop_membership(self,loop_names):
        final_cat = self.get_final_cats(loop_names)
        bad_items =  [a for a in final_cat if a != final_cat[0]]
        if len(bad_items)>0:
            return {"result":False,"bad_items":bad_items}
        else: return {"result":True}

    def get_final_cats(self,loop_names):
        """Return a list of the uppermost parent categories for the loop_names. Names
        that are not from loopable categories are ignored."""
        try:
            categories = [self[a][self.cat_spec].lower() for a in loop_names]
        except KeyError:       #category_id is mandatory
            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
        return [self.parent_lookup[a] for a in truly_looped]

    def validate_loop_key(self,loop_names):
        category = self[loop_names[0]][self.cat_spec]
        # find any unique values which must be present
        key_spec = self[category].get(self.key_spec,[])
        for names_to_check in key_spec:
            if isinstance(names_to_check,unicode):   #only one
                names_to_check = [names_to_check]
            for loop_key in names_to_check:
                if loop_key not in loop_names:
                    #is this one of those dang implicit items?
                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
                        continue          #it is virtually there...
                    alternates = self.get_alternates(loop_key)
                    if alternates == []:
                        return {"result":False,"bad_items":loop_key}
                    for alt_names in alternates:
                        alt = [a for a in alt_names if a in loop_names]
                        if len(alt) == 0:
                            return {"result":False,"bad_items":loop_key}  # no alternates
        return {"result":True}

    def validate_loop_key_ddlm(self,loop_names):
        """Make sure at least one of the necessary keys are available"""
        final_cats = self.get_final_cats(loop_names)
        if len(final_cats)>0:
            poss_keys = self.cat_key_table[final_cats[0]][0] # 
            found_keys = [a for a in poss_keys if a in loop_names]
            if len(found_keys)>0:
                return {"result":True}
            else:
                return {"result":False,"bad_items":poss_keys}
        else:
            return {"result":True}

    def validate_loop_references(self,loop_names):
        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
        must_haves = [a for a in must_haves if a != None]
        # build a flat list.  For efficiency we don't remove duplicates,as
        # we expect no more than the order of 10 or 20 looped names.
        def flat_func(a,b):
            if isinstance(b,unicode):
               a.append(b)       #single name
            else:
               a.extend(b)       #list of names
            return a
        flat_mh = []
        [flat_func(flat_mh,a) for a in must_haves]
        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
        res = [a for a in single_mh if a not in loop_names]
        def check_gr(s_item, name_list):
            nl = map(lambda a:a[:len(s_item)],name_list)
            if s_item in nl: return True
            return False
        res_g = [a for a in group_mh if check_gr(a,loop_names)]
        if len(res) == 0 and len(res_g) == 0: return {"result":True}
        # construct alternate list
        alternates = map(lambda a: (a,self.get_alternates(a)),res)
        alternates = [a for a in alternates if a[1] != []]
        # next line purely for error reporting
        missing_alts = [a[0] for a in alternates if a[1] == []]
        if len(alternates) != len(res):
           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
                                                       #doesn't have an altern
        #loop over alternates
        for orig_name,alt_names in alternates:
             alt = [a for a in alt_names if a in loop_names]
             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
        return {"result":True}        #found alternates

    def get_alternates(self,main_name,exclusive_only=False):
        alternates = self[main_name].get(self.related_func,None)
        alt_names = []
        if alternates != None:
            alt_names =  self[main_name].get(self.related_item,None)
            if isinstance(alt_names,unicode):
                alt_names = [alt_names]
                alternates = [alternates]
            together = zip(alt_names,alternates)
            if exclusive_only:
                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
                                             or a[1]=="replace"]
            else:
                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
            alt_names = list([a[0] for a in alt_names])
        # now do the alias thing
        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
        alt_names.extend(alias_names)
        # print("Alternates for {}: {!r}".format(main_name, alt_names))
        return alt_names


    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
       item_name_list = [a.lower() for a in whole_block.keys()]
       item_name_list.extend([a.lower() for a in provisional_items.keys()])
       bad = [a for a in alternates if a in item_name_list]
       if len(bad)>0:
           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
           return {"result":False,"bad_items":bad}
       else: return {"result":True}

    # validate that parent exists and contains matching values
    def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        parent_item = self[item_name].get(self.parent_spec)
        if not parent_item: return {"result":None}   #no parent specified
        if isinstance(parent_item,list):
            parent_item = parent_item[0]
        if self.optimize:
            if parent_item in self.done_parents:
                return {"result":None}
            else:
                self.done_parents.append(parent_item)
                print("Done parents %s" % repr(self.done_parents))
        # initialise parent/child values
        if isinstance(item_value,unicode):
            child_values = [item_value]
        else: child_values = item_value[:]    #copy for safety
        # track down the parent
        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
        # we have collected all parent values into the global block - so no need to search
        # for them elsewhere.
        # print("Looking for {!r}".format(parent_item))
        parent_values = globals.get(parent_item)
        if not parent_values:
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
        if not parent_values:
            # go for alternates
            namespace = whole_block.keys()
            namespace.extend(provisional_items.keys())
            namespace.extend(globals.keys())
            alt_names = filter_present(self.get_alternates(parent_item),namespace)
            if len(alt_names) == 0:
                if len([a for a in child_values if a != "." and a != "?"])>0:
                    return {"result":False,"parent":parent_item}#no parent available -> error
                else:
                    return {"result":None}       #maybe True is more appropriate??
            parent_item = alt_names[0]           #should never be more than one??
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
            if not parent_values:   # check global block
                parent_values = globals.get(parent_item)
        if isinstance(parent_values,unicode):
            parent_values = [parent_values]
        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
        #                                          item_name, parent_values, child_values))
        missing = self.check_parent_child(parent_values,child_values)
        if len(missing) > 0:
            return {"result":False,"bad_values":missing,"parent":parent_item}
        return {"result":True}

    def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        try:
            child_items = self[item_name][self.child_spec][:]  #copy
        except KeyError:
            return {"result":None}    #not relevant
        # special case for dictionaries  -> we check parents of children only
        if item_name in globals:  #dictionary so skip
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        if isinstance(item_value,unicode): # single value
            parent_values = [item_value]
        else: parent_values = item_value[:]
        # expand child list with list of alternates
        for child_item in child_items[:]:
            child_items.extend(self.get_alternates(child_item))
        # now loop over the children
        for child_item in child_items:
            if self.optimize:
                if child_item in self.done_children:
                    return {"result":None}
                else:
                    self.done_children.append(child_item)
                    print("Done children %s" % repr(self.done_children))
            if child_item in provisional_items:
                child_values = provisional_items[child_item][:]
            elif child_item in whole_block:
                child_values = whole_block[child_item][:]
            else:  continue
            if isinstance(child_values,unicode):
                child_values = [child_values]
                # print("Checking child %s against %s, values %r/%r" % (child_item,
                #       item_name, child_values, parent_values))
            missing = self.check_parent_child(parent_values,child_values)
            if len(missing)>0:
                return {"result":False,"bad_values":missing,"child":child_item}
        return {"result":True}       #could mean that no child items present

    #a generic checker: all child vals should appear in parent_vals
    def check_parent_child(self,parent_vals,child_vals):
        # shield ourselves from dots and question marks
        pv = parent_vals[:]
        pv.extend([".","?"])
        res =  [a for a in child_vals if a not in pv]
        #print("Missing: %s" % res)
        return res

    def validate_remove_parent_child(self,item_name,whole_block):
        try:
            child_items = self[item_name][self.child_spec]
        except KeyError:
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        for child_item in child_items:
            if child_item in whole_block:
                return {"result":False,"child":child_item}
        return {"result":True}

    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
        try:
            dep_items = self[item_name][self.dep_spec][:]
        except KeyError:
            return {"result":None}    #not relevant
        if isinstance(dep_items,unicode):
            dep_items = [dep_items]
        actual_names = whole_block.keys()
        actual_names.extend(prov.keys())
        actual_names.extend(globals.keys())
        missing = [a for a in dep_items if a not in actual_names]
        if len(missing) > 0:
            alternates = map(lambda a:[self.get_alternates(a),a],missing)
            # compact way to get a list of alternative items which are
            # present
            have_check = [(filter_present(b[0],actual_names),
                                       b[1]) for b in alternates]
            have_check = list([a for a in have_check if len(a[0])==0])
            if len(have_check) > 0:
                have_check = [a[1] for a in have_check]
                return {"result":False,"bad_items":have_check}
        return {"result":True}

    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
                                                                  globals={}):
        category = self[item_name].get(self.cat_spec)
        if category == None:
            print("No category found for %s" % item_name)
            return {"result":None}
        # print("Category {!r} for item {}".format(category, item_name))
        # we make a copy in the following as we will be removing stuff later!
        unique_i = self[category].get("_category_key.name",[])[:]
        if isinstance(unique_i,unicode):
            unique_i = [unique_i]
        if item_name not in unique_i:       #no need to verify
            return {"result":None}
        if isinstance(item_value,unicode):  #not looped
            return {"result":None}
        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
        # check that we can't optimize by not doing this check
        if self.optimize:
            if unique_i in self.done_keys:
                return {"result":None}
            else:
                self.done_keys.append(unique_i)
        val_list = []
        # get the matching data from any other data items
        unique_i.remove(item_name)
        other_data = []
        if len(unique_i) > 0:            # i.e. do have others to think about
           for other_name in unique_i:
           # we look for the value first in the provisional dict, then the main block
           # the logic being that anything in the provisional dict overrides the
           # main block
               if other_name in provisional_items:
                   other_data.append(provisional_items[other_name])
               elif other_name in whole_block:
                   other_data.append(whole_block[other_name])
               elif self[other_name].get(self.must_exist_spec)=="implicit":
                   other_data.append([item_name]*len(item_value))  #placeholder
               else:
                   return {"result":False,"bad_items":other_name}#missing data name
        # ok, so we go through all of our values
        # this works by comparing lists of strings to one other, and
        # so could be fooled if you think that '1.' and '1' are
        # identical
        for i in range(len(item_value)):
            #print("Value no. %d" % i, end=" ")
            this_entry = item_value[i]
            for j in range(len(other_data)):
                this_entry = " ".join([this_entry,other_data[j][i]])
            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
            if this_entry in val_list:
                return {"result":False,"bad_values":this_entry}
            val_list.append(this_entry)
        return {"result":True}


    def validate_mandatory_category(self,whole_block):
        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
        if len(mand_cats) == 0:
            return {"result":True}
        # print("Mandatory categories - {!r}".format(mand_cats)
        # find which categories each of our datanames belongs to
        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
        missing = set(mand_cats) - set(all_cats)
        if len(missing) > 0:
            return {"result":False,"bad_items":repr(missing)}
        return {"result":True}

    def check_mandatory_items(self,whole_block,default_scope='Item'):
        """Return an error if any mandatory items are missing"""
        if len(self.scopes_mandatory)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
        if len(missing)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":missing}

    def check_prohibited_items(self,whole_block,default_scope='Item'):
        """Return an error if any prohibited items are present"""
        if len(self.scopes_naughty)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
        if len(present)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":present}


    def run_item_validation(self,item_name,item_value):
        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}

    def run_loop_validation(self,loop_names):
        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}

    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
        return {item_name:results}

    def run_block_validation(self,whole_block,block_scope='Item'):
        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
        # fix up the return values
        return {"whole_block":results}

    def optimize_on(self):
        self.optimize = True
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

    def optimize_off(self):
        self.optimize = False
        self.done_keys = []
        self.done_children = []
        self.done_parents = []



class ValidCifBlock(CifBlock):
    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
    `CifBlock`.

    ## Initialisation

    * `dic` is a `CifDic` object to be used for validation.

    """
    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
        CifBlock.__init__(self,*args,**kwords)
        if dic and diclist:
            print("Warning: diclist argument ignored when initialising ValidCifBlock")
        if isinstance(dic,CifDic):
            self.fulldic = dic
        else:
            raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
        if len(diclist)==0 and not dic:
            raise ValidCifError( "At least one dictionary must be specified")
        if diclist and not dic:
            self.fulldic = merge_dic(diclist,mergemode)
        if not self.run_data_checks()[0]:
            raise ValidCifError( self.report())

    def run_data_checks(self,verbose=False):
        self.v_result = {}
        self.fulldic.optimize_on()
        for dataname in self.keys():
            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
        for loop_names in self.loops.values():
            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
        # now run block-level checks
        update_value(self.v_result,self.fulldic.run_block_validation(self))
        # return false and list of baddies if anything didn't match
        self.fulldic.optimize_off()
        all_keys = list(self.v_result.keys()) #dictionary will change
        for test_key in all_keys:
            #print("%s: %r" % (test_key, self.v_result[test_key]))
            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
            if len(self.v_result[test_key]) == 0:
                del self.v_result[test_key]
        isvalid = len(self.v_result)==0
        #if not isvalid:
        #    print("Baddies: {!r}".format(self.v_result))
        return isvalid,self.v_result

    def single_item_check(self,item_name,item_value):
        #self.match_single_item(item_name)
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_item_validation(item_name,item_value)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def loop_item_check(self,loop_names):
        in_dic_names = list([a for a in loop_names if a in self.fulldic])
        if len(in_dic_names)==0:
            result = {loop_names[0]:[]}
        else:
            result = self.fulldic.run_loop_validation(in_dic_names)
        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
        return isvalid,baddies

    def global_item_check(self,item_name,item_value,provisional_items={}):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_global_validation(item_name,
               item_value,self,provisional_items = provisional_items)
        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def remove_global_item_check(self,item_name):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_remove_global_validation(item_name,self,False)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def AddToLoop(self,dataname,loopdata):
        # single item checks
        paired_data = loopdata.items()
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems)
        # loop item checks; merge with current loop
        found = 0
        for aloop in self.block["loops"]:
            if dataname in aloop:
                loopnames = aloop.keys()
                for new_name in loopdata.keys():
                    if new_name not in loopnames: loopnames.append(new_name)
                valid,problems = self.looped_item_check(loopnames)
                self.report_if_invalid(valid,problems)
        prov_dict = loopdata.copy()
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems)
        CifBlock.AddToLoop(self,dataname,loopdata)

    def AddCifItem(self,data):
        if isinstance(data[0],(unicode,str)):   # single item
            valid,problems = self.single_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
            valid,problems = self.global_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
        elif isinstance(data[0],tuple) or isinstance(data[0],list):
            paired_data = list(zip(data[0],data[1]))
            for name,value in paired_data:
                valid,problems = self.single_item_check(name,value)
                self.report_if_invalid(valid,problems,name)
            valid,problems = self.loop_item_check(data[0])
            self.report_if_invalid(valid,problems,data[0])
            prov_dict = {}            # for storing temporary items
            for name,value in paired_data: prov_dict[name]=value
            for name,value in paired_data:
                del prov_dict[name]   # remove temporarily
                valid,problems = self.global_item_check(name,value,prov_dict)
                prov_dict[name] = value  # add back in
                self.report_if_invalid(valid,problems,name)
        else:
            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
        super(ValidCifBlock,self).AddCifItem(data)

    def AddItem(self,key,value,**kwargs):
        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
        valid,problems = self.single_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        valid,problems = self.global_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        super(ValidCifBlock,self).AddItem(key,value,**kwargs)

    # utility function
    def report_if_invalid(self,valid,bad_list,data_name):
        if not valid:
            bad_tests = [a[0] for a in bad_list]
            error_string = ",".join(bad_tests)
            error_string = repr(data_name) + " fails following validity checks: "  + error_string
            raise ValidCifError( error_string)

    def __delitem__(self,key):
        # we don't need to run single item checks; we do need to run loop and
        # global checks.
        if key in self:
            try:
                loop_items = self.GetLoop(key)
            except TypeError:
                loop_items = []
            if loop_items:             #need to check loop conformance
                loop_names = [a[0] for a in loop_items if a[0] != key]
                valid,problems = self.loop_item_check(loop_names)
                self.report_if_invalid(valid,problems)
            valid,problems = self.remove_global_item_check(key)
            self.report_if_invalid(valid,problems)
        self.RemoveCifItem(key)


    def report(self):
       outstr = StringIO()
       outstr.write( "Validation results\n")
       outstr.write( "------------------\n")
       print("%d invalid items found\n" % len(self.v_result))
       for item_name,val_func_list in self.v_result.items():
           outstr.write("%s fails following tests:\n" % item_name)
           for val_func in val_func_list:
               outstr.write("\t%s\n")
       return outstr.getvalue()


class ValidCifFile(CifFile):
    """A CIF file for which all datablocks are valid.  Argument `dic` to
    initialisation specifies a `CifDic` object to use for validation."""
    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
        if not diclist and not dic and not hasattr(self,'bigdic'):
            raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
        if not dic and diclist:     #merge here for speed
            self.bigdic = merge_dic(diclist,mergemode)
        elif dic and not diclist:
            self.bigdic = dic
        CifFile.__init__(self,*args,**kwargs)
        for blockname in self.keys():
            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)

    def NewBlock(self,blockname,blockcontents,**kwargs):
        CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
        # dictionary[blockname] is now a CifBlock object.  We
        # turn it into a ValidCifBlock object
        self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                         data=self.dictionary[blockname])


class ValidationResult:
    """Represents validation result. It is initialised with """
    def __init__(self,results):
        """results is return value of validate function"""
        self.valid_result, self.no_matches = results

    def report(self,use_html):
        """Return string with human-readable description of validation result"""
        return validate_report((self.valid_result, self.no_matches),use_html)

    def is_valid(self,block_name=None):
        """Return True for valid CIF file, otherwise False"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.valid_result.iterkeys()
        for block_name in block_names:
            if not self.valid_result[block_name] == (True,{}):
                valid = False
                break
            else:
                valid = True
        return valid

    def has_no_match_items(self,block_name=None):
        """Return true if some items are not found in dictionary"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.no_matches.iter_keys()
        for block_name in block_names:
            if self.no_matches[block_name]:
                has_no_match_items = True
                break
            else:
                has_no_match_items = False
        return has_no_match_items



def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
    if not isinstance(ciffile,CifFile):
        check_file = CifFile(ciffile)
    else:
        check_file = ciffile
    if not dic:
        fulldic = merge_dic(diclist,mergemode)
    else:
        fulldic = dic
    no_matches = {}
    valid_result = {}
    if isdic:          #assume one block only
        check_file.scoping = 'instance' #only data blocks visible
        top_level = check_file.keys()[0]
        check_file.scoping = 'dictionary'   #all blocks visible
        # collect a list of parents for speed
        if fulldic.diclang == 'DDL2':
            poss_parents = fulldic.get_all("_item_linked.parent_name")
            for parent in poss_parents:
                curr_parent = listify(check_file.get(parent,[]))
                new_vals = check_file.get_all(parent)
                new_vals.extend(curr_parent)
                if len(new_vals)>0:
                    check_file[parent] = new_vals
                print("Added %s (len %d)" % (parent,len(check_file[parent])))
    # now run the validations
    for block in check_file.keys():
        if isdic and block == top_level:
           block_scope = 'Dictionary'
        elif isdic:
           block_scope = 'Item'
        else:
           block_scope = 'Datablock'
        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
        # remove non-matching items
        print("Not matched: " + repr(no_matches[block]))
        for nogood in no_matches[block]:
             del check_file[block][nogood]
        print("Validating block %s, scope %s" % (block,block_scope))
        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    return valid_result,no_matches

def validate_report(val_result,use_html=False):
    valid_result,no_matches = val_result
    outstr = StringIO()
    if use_html:
        outstr.write("<h2>Validation results</h2>")
    else:
        outstr.write( "Validation results\n")
        outstr.write( "------------------\n")
    if len(valid_result) > 10:
        suppress_valid = True         #don't clutter with valid messages
        if use_html:
           outstr.write("<p>For brevity, valid blocks are not reported in the output.</p>")
    else:
        suppress_valid = False
    for block in valid_result.keys():
        block_result = valid_result[block]
        if block_result[0]:
            out_line = "Block '%s' is VALID" % block
        else:
            out_line = "Block '%s' is INVALID" % block
        if use_html:
            if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]:
                outstr.write( "<h3>%s</h3><p>" % out_line)
        else:
                outstr.write( "\n %s\n" % out_line)
        if len(no_matches[block])!= 0:
            if use_html:
                outstr.write( "<p>The following items were not found in the dictionary")
                outstr.write(" (note that this does not invalidate the data block):</p>")
                outstr.write("<p><table>\n")
                [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]]
                outstr.write("</table>\n")
            else:
                outstr.write( "\n The following items were not found in the dictionary:\n")
                outstr.write("Note that this does not invalidate the data block\n")
                [outstr.write("%s\n" % it) for it in no_matches[block]]
        # now organise our results by type of error, not data item...
        error_type_dic = {}
        for error_item, error_list in block_result[1].items():
            for func_name,bad_result in error_list:
                bad_result.update({"item_name":error_item})
                try:
                    error_type_dic[func_name].append(bad_result)
                except KeyError:
                    error_type_dic[func_name] = [bad_result]
        # make a table of test name, test message
        info_table = {\
        'validate_item_type':\
            "The following data items had badly formed values",
        'validate_item_esd':\
            "The following data items should not have esds appended",
        'validate_enum_range':\
            "The following data items have values outside permitted range",
        'validate_item_enum':\
            "The following data items have values outside permitted set",
        'validate_looping':\
            "The following data items violate looping constraints",
        'validate_loop_membership':\
            "The following looped data names are of different categories to the first looped data name",
        'validate_loop_key':\
            "A required dataname for this category is missing from the loop\n containing the dataname",
        'validate_loop_key_ddlm':\
            "A loop key is missing for the category containing the dataname",
        'validate_loop_references':\
            "A dataname required by the item is missing from the loop",
        'validate_parent':\
            "A parent dataname is missing or contains different values",
        'validate_child':\
            "A child dataname contains different values to the parent",
        'validate_uniqueness':\
            "One or more data items do not take unique values",
        'validate_dependents':\
            "A dataname required by the item is missing from the data block",
        'validate_exclusion': \
            "Both dataname and exclusive alternates or aliases are present in data block",
        'validate_mandatory_category':\
            "A required category is missing from this block",
        'check_mandatory_items':\
            "A required data attribute is missing from this block",
        'check_prohibited_items':\
            "A prohibited data attribute is present in this block"}

        for test_name,test_results in error_type_dic.items():
           if use_html:
               outstr.write(html_error_report(test_name,info_table[test_name],test_results))
           else:
               outstr.write(error_report(test_name,info_table[test_name],test_results))
               outstr.write("\n\n")
    return outstr.getvalue()

# A function to lay out a single error report.  We are passed
# the name of the error (one of our validation functions), the
# explanation to print out, and a dictionary with the error
# information.  We print no more than 50 characters of the item

def error_report(error_name,error_explanation,error_dics):
   retstring = "\n\n " + error_explanation + ":\n\n"
   headstring = "%-32s" % "Item name"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "%-20s" % "Bad value(s)"
   if "bad_items" in error_dics[0]:
      headstring += "%-20s" % "Bad dataname(s)"
   if "child" in error_dics[0]:
      headstring += "%-20s" % "Child"
   if "parent" in error_dics[0]:
      headstring += "%-20s" % "Parent"
   headstring +="\n"
   for error in error_dics:
      bodystring += "\n%-32s" % error["item_name"]
      if "bad_values" in error:
          out_vals = [repr(a)[:50] for a in error["bad_values"]]
          bodystring += "%-20s" % out_vals
      if "bad_items" in error:
          bodystring += "%-20s" % repr(error["bad_items"])
      if "child" in error:
          bodystring += "%-20s" % repr(error["child"])
      if "parent" in error:
          bodystring += "%-20s" % repr(error["parent"])
   return retstring + headstring + bodystring

#  This lays out an HTML error report

def html_error_report(error_name,error_explanation,error_dics,annotate=[]):
   retstring = "<h4>" + error_explanation + ":</h4>"
   retstring = retstring + "<table cellpadding=5><tr>"
   headstring = "<th>Item name</th>"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "<th>Bad value(s)</th>"
   if "bad_items" in error_dics[0]:
      headstring += "<th>Bad dataname(s)</th>"
   if "child" in error_dics[0]:
      headstring += "<th>Child</th>"
   if "parent" in error_dics[0]:
      headstring += "<th>Parent</th>"
   headstring +="</tr>\n"
   for error in error_dics:
      bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"]
      if "bad_values" in error:
          bodystring += "<td>%s</td>" % error["bad_values"]
      if "bad_items" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["bad_items"]
      if "child" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["child"]
      if "parent" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["parent"]
      bodystring += "</tr>\n"
   return retstring + headstring + bodystring + "</table>\n"

def run_data_checks(check_block,fulldic,block_scope='Item'):
    v_result = {}
    for key in check_block.keys():
        update_value(v_result, fulldic.run_item_validation(key,check_block[key]))
        update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block))
    for loopnames in check_block.loops.values():
        update_value(v_result, fulldic.run_loop_validation(loopnames))
    update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope))
    # return false and list of baddies if anything didn't match
    all_keys = list(v_result.keys())
    for test_key in all_keys:
        v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False]
        if len(v_result[test_key]) == 0:
            del v_result[test_key]
    # if even one false one is found, this should trigger
    # print("Baddies: {!r}".format(v_result))
    isvalid = len(v_result)==0
    return isvalid,v_result


def get_number_with_esd(numstring):
    numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
    our_match = re.match(numb_re,numstring)
    if our_match:
        a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
        # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
    else:
        return None,None
    if dot or q: return None,None     #a dot or question mark
    if exp:          #has exponent
       exp = exp.replace("d","e")     # mop up old fashioned numbers
       exp = exp.replace("D","e")
       base_num = base_num + exp
    # print("Debug: have %s for base_num from %s" % (base_num,numstring))
    base_num = float(base_num)
    # work out esd, if present.
    if esd:
        esd = float(esd[1:-1])    # no brackets
        if dad:                   # decimal point + digits
            esd = esd * (10 ** (-1* len(dad)))
        if exp:
            esd = esd * (10 ** (float(exp[1:])))
    return base_num,esd

def float_with_esd(inval):
    if isinstance(inval,unicode):
        j = inval.find("(")
        if j>=0:  return float(inval[:j])
    return float(inval)



def convert_type(definition):
    """Convert value to have the type given by definition"""
    #extract the actual required type information
    container = definition['_type.container']
    dimension = definition.get('_type.dimension',StarFile.StarList([]))
    structure = interpret_structure(definition['_type.contents'])
    if container == 'Single':   #a single value to convert
        return convert_single_value(structure)
    elif container == 'List':   #lots of the same value
        return convert_list_values(structure,dimension)
    elif container == 'Multiple': #no idea
        return None
    elif container in ('Array','Matrix'): #numpy array
        return convert_matrix_values(structure)
    return lambda a:a    #unable to convert

def convert_single_value(type_spec):
    """Convert a single item according to type_spec"""
    if type_spec == 'Real':
        return float_with_esd
    if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
        return int
    if type_spec == 'Complex':
        return complex
    if type_spec == 'Imag':
        return lambda a:complex(0,a)
    if type_spec in ('Code','Name','Tag'):  #case-insensitive -> lowercase
        return lambda a:a.lower()
    return lambda a:a   #can't do anything numeric

class convert_simple_list(object):

    """\
    Callable object that converts values in a simple list according
    to the specified element structure.
    """

    def __init__(self, structure):
        self.converters = [convert_single_value(tp) for tp in structure]
        return

    def __call__(self, element):
        if len(element) != len(self.converters):
            emsg = "Expected iterable of %i values, got %i." % (
                (len(self.converters), len(element)))
            raise ValueError(emsg)
        rv = [f(e) for f, e in zip(self.converters, element)]
        return rv

# End of class convert_single_value

def convert_list_values(structure, dimension):
    """Convert the values according to the element
       structure given in [[structure]]"""
    # simple repetition
    if isinstance(structure, (unicode, str)):
        fcnv = convert_single_value(structure)
    # assume structure is a list of types
    else:
        fcnv = convert_simple_list(structure)
    rv = fcnv
    # setup nested conversion function when dimension differs from 1.
    if len(dimension) > 0 and int(dimension[0]) != 1:
        rv = lambda args : [fcnv(a) for a in args]
    return rv

def convert_matrix_values(valtype):
    """Convert a dREL String or Float valued List structure to a numpy matrix structure"""
    # first convert to numpy array, then let numpy do the work
    try:
        import numpy
    except ImportError:
        return lambda a:a   #cannot do it
    if valtype == 'Real':
        dtype = float
    elif valtype == 'Integer':
        dtype = int
    elif valtype == 'Complex':
        dtype = complex
    else:
        raise ValueError('Unknown matrix value type')
    fcnv = lambda a : numpy.asarray(a, dtype=dtype)
    return fcnv

def interpret_structure(struc_spec):
    """Interpret a DDLm structure specification"""
    from . import TypeContentsParser as t
    p = t.TypeParser(t.TypeParserScanner(struc_spec))
    return getattr(p,"input")()


# A utility function to append to item values rather than replace them
def update_value(base_dict,new_items):
    for new_key in new_items.keys():
        if new_key in base_dict:
            base_dict[new_key].extend(new_items[new_key])
        else:
            base_dict[new_key] = new_items[new_key]

#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = []
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

# listify strings - used surprisingly often
def listify(item):
    if isinstance(item,(unicode,str)): return [item]
    else: return item

# given a list of search items, return a list of items
# actually contained in the given data block
def filter_present(namelist,datablocknames):
    return [a for a in namelist if a in datablocknames]

# Make an item immutable, used if we want a list to be a key
def make_immutable(values):
    """Turn list of StarList values into a list of immutable items"""
    if not isinstance(values[0],StarList):
        return values
    else:
        return [tuple(a) for a in values]

# merge ddl dictionaries.  We should be passed filenames or CifFile
# objects
def merge_dic(diclist,mergemode="replace",ddlspec=None):
    dic_as_cif_list = []
    for dic in diclist:
        if not isinstance(dic,CifFile) and \
           not isinstance(dic,(unicode,str)):
               raise TypeError("Require list of CifFile names/objects for dictionary merging")
        if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic))
        else: dic_as_cif_list.append(dic)
    # we now merge left to right
    basedic = dic_as_cif_list[0]
    if "on_this_dictionary" in basedic:   #DDL1 style only
        for dic in dic_as_cif_list[1:]:
           basedic.merge(dic,mode=mergemode,match_att=["_name"])
    elif len(basedic.keys()) == 1:                     #One block: DDL2/m style
        old_block = basedic[basedic.keys()[0]]
        for dic in dic_as_cif_list[1:]:
           new_block = dic[dic.keys()[0]]
           basedic.merge(dic,mode=mergemode,
                         single_block=[basedic.keys()[0],dic.keys()[0]],
                         match_att=["_item.name"],match_function=find_parent)
    return CifDic(basedic)

def find_parent(ddl2_def):
    if "_item.name" not in ddl2_def:
       return None
    if isinstance(ddl2_def["_item.name"],unicode):
        return ddl2_def["_item.name"]
    if "_item_linked.child_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no child_names")
    if "_item_linked.parent_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no parent_names")
    result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]])
    if len(result)>1 or len(result)==0:
        raise CifError("Unable to find single unique parent data item")
    return result[0]


def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF',
            permissive=False):
    """ Read in a CIF file, returning a `CifFile` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
    Jython uses native Java regular expressions
    to provide a speedup regardless of this argument (and does not yet support CIF2).

    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
    `instance` scoping makes nested save frames
    invisible outside their hierarchy, allowing duplicate save frame names in separate
    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
    other, thereby restricting all save frames to have unique names.
    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
    maximum length of 75 characters for datanames and has no other effect. """

    finalcif = CifFile(scoping=scoping,standard=standard)
    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype,
                             permissive=permissive)
    #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs)

class CifLoopBlock(StarFile.LoopBlock):
    def __init__(self,data=(),**kwargs):
        super(CifLoopBlock,self).__init__(data,**kwargs)

#No documentation flags

Functions

def ReadCif(

filename, grammar=u'auto', scantype=u'standard', scoping=u'instance', standard=u'CIF', permissive=False)

Read in a CIF file, returning a CifFile object.

  • filename may be a URL, a file path on the local system, or any object with a read method.

  • grammar chooses the CIF grammar variant. 1.0 is the original 1992 grammar and 1.1 is identical except for the exclusion of square brackets as the first characters in undelimited datanames. 2.0 will read files in the CIF2.0 standard, and STAR2 will read files according to the STAR2 publication. If grammar is None, autodetection will be attempted in the order 2.0, 1.1 and 1.0. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

  • scantype can be standard or flex. standard provides pure Python parsing at the cost of a factor of 10 or so in speed. flex will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2).

  • scoping is only relevant where nested save frames are expected (STAR2 only). instance scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. dictionary scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for standard is CIF, which when set enforces a maximum length of 75 characters for datanames and has no other effect.

def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF',
            permissive=False):
    """ Read in a CIF file, returning a `CifFile` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
    Jython uses native Java regular expressions
    to provide a speedup regardless of this argument (and does not yet support CIF2).

    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
    `instance` scoping makes nested save frames
    invisible outside their hierarchy, allowing duplicate save frame names in separate
    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
    other, thereby restricting all save frames to have unique names.
    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
    maximum length of 75 characters for datanames and has no other effect. """

    finalcif = CifFile(scoping=scoping,standard=standard)
    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype,
                             permissive=permissive)

def Validate(

ciffile, dic=u'', diclist=[], mergemode=u'replace', isdic=False)

Validate the ciffile conforms to the definitions in CifDic object dic, or if dic is missing, to the results of merging the CifDic objects in diclist according to mergemode. Flag isdic indicates that ciffile is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.

def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
    if not isinstance(ciffile,CifFile):
        check_file = CifFile(ciffile)
    else:
        check_file = ciffile
    if not dic:
        fulldic = merge_dic(diclist,mergemode)
    else:
        fulldic = dic
    no_matches = {}
    valid_result = {}
    if isdic:          #assume one block only
        check_file.scoping = 'instance' #only data blocks visible
        top_level = check_file.keys()[0]
        check_file.scoping = 'dictionary'   #all blocks visible
        # collect a list of parents for speed
        if fulldic.diclang == 'DDL2':
            poss_parents = fulldic.get_all("_item_linked.parent_name")
            for parent in poss_parents:
                curr_parent = listify(check_file.get(parent,[]))
                new_vals = check_file.get_all(parent)
                new_vals.extend(curr_parent)
                if len(new_vals)>0:
                    check_file[parent] = new_vals
                print("Added %s (len %d)" % (parent,len(check_file[parent])))
    # now run the validations
    for block in check_file.keys():
        if isdic and block == top_level:
           block_scope = 'Dictionary'
        elif isdic:
           block_scope = 'Item'
        else:
           block_scope = 'Datablock'
        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
        # remove non-matching items
        print("Not matched: " + repr(no_matches[block]))
        for nogood in no_matches[block]:
             del check_file[block][nogood]
        print("Validating block %s, scope %s" % (block,block_scope))
        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    return valid_result,no_matches

Classes

class CifBlock

A class to hold a single block of a CIF file. A CifBlock object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. b['_a_dataname']. All other Python dictionary methods are also available (e.g. keys(), values()). Looped datanames will return a list of values.

Initialisation

When provided, data should be another CifBlock whose contents will be copied to this block.

  • if strict is set, maximum name lengths will be enforced

  • maxoutlength is the maximum length for output lines

  • wraplength is the ideal length to make output lines

  • When set, overwrite allows the values of datanames to be changed (otherwise an error is raised).

  • compat_mode will allow deprecated behaviour of creating single-dataname loops using the syntax a[_dataname] = [1,2,3,4]. This should now be done by calling CreateLoop after setting the dataitem value.

class CifBlock(StarFile.StarBlock):
    """
    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
    a Python dictionary, in particular, individual items can be accessed using square
    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.

    ## Initialisation

    When provided, `data` should be another `CifBlock` whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
        """When provided, `data` should be another CifBlock whose contents will be copied to
        this block.

        * if `strict` is set, maximum name lengths will be enforced

        * `maxoutlength` is the maximum length for output lines

        * `wraplength` is the ideal length to make output lines

        * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
        is raised).

        * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
        the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
        after setting the dataitem value.
        """
        if strict: maxnamelength=75
        else:
           maxnamelength=-1
        super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
        self.dictionary = None   #DDL dictionary referring to this block
        self.compat_mode = compat_mode   #old-style behaviour of setitem

    def RemoveCifItem(self,itemname):
        """Remove `itemname` from the CifBlock"""
        self.RemoveItem(itemname)

    def __setitem__(self,key,value):
        self.AddItem(key,value)
        # for backwards compatibility make a single-element loop
        if self.compat_mode:
            if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
                 # single element loop
                 self.CreateLoop([key])

    def copy(self):
        newblock = super(CifBlock,self).copy()
        return self.copy.im_class(newblock)   #catch inheritance

    def AddCifItem(self,data):
        """ *DEPRECATED*. Use `AddItem` instead."""
        # we accept only tuples, strings and lists!!
        if not (isinstance(data[0],(unicode,tuple,list,str))):
                  raise TypeError('Cif datanames are either a string, tuple or list')
        # we catch single item loops as well...
        if isinstance(data[0],(unicode,str)):
            self.AddSingleCifItem(data[0],list(data[1]))
            if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
                self.CreateLoop([data[0]])
            return
        # otherwise, we loop over the datanames
        keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
        [self.AddSingleCifItem(a,b) for a,b in keyvals]
        # and create the loop
        self.CreateLoop(data[0][0])

    def AddSingleCifItem(self,key,value):
        """*Deprecated*. Use `AddItem` instead"""
        """Add a single data item. If it is part of a loop, a separate call should be made"""
        self.AddItem(key,value)

    def loopnames(self):
        return [self.loops[a] for a in self.loops]

Ancestors (in MRO)

  • CifBlock
  • CifFile.StarFile.StarBlock
  • __builtin__.object

Methods

def AddCifItem(

self, data)

DEPRECATED. Use AddItem instead.

def AddCifItem(self,data):
    """ *DEPRECATED*. Use `AddItem` instead."""
    # we accept only tuples, strings and lists!!
    if not (isinstance(data[0],(unicode,tuple,list,str))):
              raise TypeError('Cif datanames are either a string, tuple or list')
    # we catch single item loops as well...
    if isinstance(data[0],(unicode,str)):
        self.AddSingleCifItem(data[0],list(data[1]))
        if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
            self.CreateLoop([data[0]])
        return
    # otherwise, we loop over the datanames
    keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
    [self.AddSingleCifItem(a,b) for a,b in keyvals]
    # and create the loop
    self.CreateLoop(data[0][0])

def AddItem(

self, key, value, precheck=False)

Add dataname key to block with value value. value may be a single value, a list or a tuple. If precheck is False (the default), all values will be checked and converted to unicode strings as necessary. If precheck is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.

def AddItem(self,key,value,precheck=False):
    """Add dataname `key` to block with value `value`.  `value` may be
    a single value, a list or a tuple. If `precheck` is False (the default),
    all values will be checked and converted to unicode strings as necessary. If
    `precheck` is True, this checking is bypassed.  No checking is necessary
    when values are read from a CIF file as they are already in correct form."""
    if not isinstance(key,(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
    key = unicode(key)    #everything is unicode internally
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
    # check for overwriting
    if key in self:
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    if not precheck:   #need to sanitise
        regval,empty_val = self.regularise_data(value)
        pure_string = check_stringiness(regval)
        self.check_item_value(regval)
    else:
        regval,empty_val = value,None
        pure_string = True
    # update ancillary information first
    lower_key = key.lower()
    if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    try:
        del self.true_case[lower_key]
    except KeyError:
        pass
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddSingleCifItem(

self, key, value)

Deprecated. Use AddItem instead

def AddSingleCifItem(self,key,value):
    """*Deprecated*. Use `AddItem` instead"""
    """Add a single data item. If it is part of a loop, a separate call should be made"""
    self.AddItem(key,value)

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveCifItem(

self, itemname)

Remove itemname from the CifBlock

def RemoveCifItem(self,itemname):
    """Remove `itemname` from the CifBlock"""
    self.RemoveItem(itemname)

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class CifDic

Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only):

  • do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False

  • do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy'

  • do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All

  • heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically.

class CifDic(StarFile.StarFile):
    """Create a Cif Dictionary object from the provided source, which can
    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
    only):

    * do_minimum (Boolean):
         Do not set up the dREL system for auto-calculation or perform
         imports.  This implies do_imports=False and do_dREL=False

    * do_imports = No/Full/Contents/All:
         If not 'No', intepret _import.get statements for
         Full mode/Contents mode/Both respectively. See also option 'heavy'

    * do_dREL = True/False:
         Parse and convert all dREL methods to Python. Implies do_imports=All

    * heavy = True/False:
         (Experimental). If True, importation overwrites definitions. If False,
         attributes are resolved dynamically.
    """
    def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
                 grammar='auto',heavy=True,**kwargs):
        self.do_minimum = do_minimum
        if do_minimum:
            do_imports = 'No'
            do_dREL = False
        if do_dREL: do_imports = 'All'
        if heavy == 'Light' and do_imports not in ('contents','No'):
            raise(ValueError,"Light imports only available for mode 'contents'")
        self.template_cache = {}    #for DDLm imports
        self.ddlm_functions = {}    #for DDLm functions
        self.switch_numpy(False)    #no Numpy arrays returned
        super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs)
        self.standard = 'Dic'    #for correct output order
        self.scoping = 'dictionary'
        (self.dicname,self.diclang) = self.dic_determine()
        print('%s is a %s dictionary' % (self.dicname,self.diclang))
        self.scopes_mandatory = {}
        self.scopes_naughty = {}
        # rename and expand out definitions using "_name" in DDL dictionaries
        if self.diclang == "DDL1":
            self.DDL1_normalise()   #this removes any non-definition entries
        self.create_def_block_table() #From now on, [] uses definition_id
        if self.diclang == "DDL1":
            self.ddl1_cat_load()
        elif self.diclang == "DDL2":
            self.DDL2_normalise()   #iron out some DDL2 tricky bits
        elif self.diclang == "DDLm":
            self.scoping = 'dictionary'   #expose all save frames
            if do_imports is not 'No':
                self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine
            self.create_alias_table()
            self.create_cat_obj_table()
            self.create_cat_key_table()
            if do_dREL:
                print('Doing full dictionary initialisation')
                self.initialise_drel()
        self.add_category_info(full=do_dREL)
        # initialise type information
        self.typedic={}
        self.primdic = {}   #typecode<->primitive type translation
        self.add_type_info()
        self.install_validation_functions()

    def dic_determine(self):
        if "on_this_dictionary" in self:
            self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
            self.def_id_spec = "_name"
            self.cat_id_spec = "_category.id"   #we add this ourselves
            self.type_spec = "_type"
            self.enum_spec = "_enumeration"
            self.cat_spec = "_category"
            self.esd_spec = "_type_conditions"
            self.must_loop_spec = "_list"
            self.must_exist_spec = "_list_mandatory"
            self.list_ref_spec = "_list_reference"
            self.key_spec = "_list_mandatory"
            self.unique_spec = "_list_uniqueness"
            self.child_spec = "_list_link_child"
            self.parent_spec = "_list_link_parent"
            self.related_func = "_related_function"
            self.related_item = "_related_item"
            self.primitive_type = "_type"
            self.dep_spec = "xxx"
            self.cat_list = []   #to save searching all the time
            name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
            version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
            return (name+version,"DDL1")
        elif len(self.get_roots()) == 1:              # DDL2/DDLm
            self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])
            # now change to dictionary scoping
            self.scoping = 'dictionary'
            name = self.master_block["_dictionary.title"]
            version = self.master_block["_dictionary.version"]
            if self.master_block.has_key("_dictionary.class"):   #DDLm
                self.enum_spec = '_enumeration_set.state'
                self.key_spec = '_category.key_id'
                self.must_exist_spec = None
                self.cat_spec = '_name.category_id'
                self.primitive_type = '_type.contents'
                self.cat_id_spec = "_definition.id"
                self.def_id_spec = "_definition.id"
                return(name+version,"DDLm")
            else:   #DDL2
                self.cat_id_spec = "_category.id"
                self.def_id_spec = "_item.name"
                self.key_spec = "_category_mandatory.name"
                self.type_spec = "_item_type.code"
                self.enum_spec = "_item_enumeration.value"
                self.esd_spec = "_item_type_conditions.code"
                self.cat_spec = "_item.category_id"
                self.loop_spec = "there_is_no_loop_spec!"
                self.must_loop_spec = "xxx"
                self.must_exist_spec = "_item.mandatory_code"
                self.child_spec = "_item_linked.child_name"
                self.parent_spec = "_item_linked.parent_name"
                self.related_func = "_item_related.function_code"
                self.related_item = "_item_related.related_name"
                self.unique_spec = "_category_key.name"
                self.list_ref_spec = "xxx"
                self.primitive_type = "_type"
                self.dep_spec = "_item_dependent.dependent_name"
                return (name+version,"DDL2")
        else:
            raise CifError("Unable to determine dictionary DDL version")

    def DDL1_normalise(self):
        # switch off block name collision checks
        self.standard = None
        # add default type information in DDL2 style
        # initial types and constructs
        base_types = ["char","numb","null"]
        prim_types = base_types[:]
        base_constructs = [".*",
            '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
            "\"\" "]
        for key,value in self.items():
           newnames = [key]  #keep by default
           if "_name" in value:
               real_name = value["_name"]
               if isinstance(real_name,list):        #looped values
                   for looped_name in real_name:
                      new_value = value.copy()
                      new_value["_name"] = looped_name  #only looped name
                      self[looped_name] = new_value
                   newnames = real_name
               else:
                      self[real_name] = value
                      newnames = [real_name]
           # delete the old one
           if key not in newnames:
              del self[key]
        # loop again to normalise the contents of each definition
        for key,value in self.items():
           #unlock the block
           save_overwrite = value.overwrite
           value.overwrite = True
           # deal with a missing _list, _type_conditions
           if "_list" not in value: value["_list"] = 'no'
           if "_type_conditions" not in value: value["_type_conditions"] = 'none'
           # deal with enumeration ranges
           if "_enumeration_range" in value:
               max,min = self.getmaxmin(value["_enumeration_range"])
               if min == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
               elif max == ".":
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
               else:
                   self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
           #add any type construct information
           if "_type_construct" in value:
               base_types.append(value["_name"]+"_type")   #ie dataname_type
               base_constructs.append(value["_type_construct"]+"$")
               prim_types.append(value["_type"])     #keep a record
               value["_type"] = base_types[-1]   #the new type name

        #make categories conform with ddl2
        #note that we must remove everything from the last underscore
           if value.get("_category",None) == "category_overview":
                last_under = value["_name"].rindex("_")
                catid = value["_name"][1:last_under]
                value["_category.id"] = catid  #remove square bracks
                if catid not in self.cat_list: self.cat_list.append(catid)
           value.overwrite = save_overwrite
        # we now add any missing categories before filling in the rest of the
        # information
        for key,value in self.items():
            #print('processing ddl1 definition %s' % key)
            if "_category" in self[key]:
                if self[key]["_category"] not in self.cat_list:
                    # rogue category, add it in
                    newcat = self[key]["_category"]
                    fake_name = "_" + newcat + "_[]"
                    newcatdata = CifBlock()
                    newcatdata["_category"] = "category_overview"
                    newcatdata["_category.id"] = newcat
                    newcatdata["_type"] = "null"
                    self[fake_name] = newcatdata
                    self.cat_list.append(newcat)
        # write out the type information in DDL2 style
        self.master_block.AddLoopItem((
            ("_item_type_list.code","_item_type_list.construct",
              "_item_type_list.primitive_code"),
            (base_types,base_constructs,prim_types)
            ))

    def ddl1_cat_load(self):
        deflist = self.keys()       #slight optimization
        cat_mand_dic = {}
        cat_unique_dic = {}
        # a function to extract any necessary information from each definition
        def get_cat_info(single_def):
            if self[single_def].get(self.must_exist_spec)=='yes':
                thiscat = self[single_def]["_category"]
                curval = cat_mand_dic.get(thiscat,[])
                curval.append(single_def)
                cat_mand_dic[thiscat] = curval
            # now the unique items...
            # cif_core.dic throws us a curly one: the value of list_uniqueness is
            # not the same as the defined item for publ_body_label, so we have
            # to collect both together.  We assume a non-listed entry, which
            # is true for all current (May 2005) ddl1 dictionaries.
            if self[single_def].get(self.unique_spec,None)!=None:
                thiscat = self[single_def]["_category"]
                new_unique = self[single_def][self.unique_spec]
                uis = cat_unique_dic.get(thiscat,[])
                if single_def not in uis: uis.append(single_def)
                if new_unique not in uis: uis.append(new_unique)
                cat_unique_dic[thiscat] = uis

        [get_cat_info(a) for a in deflist] # apply the above function
        for cat in cat_mand_dic.keys():
            self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
        for cat in cat_unique_dic.keys():
            self[cat]["_category_key.name"] = cat_unique_dic[cat]

    def create_pcloop(self,definition):
        old_children = self[definition].get('_item_linked.child_name',[])
        old_parents = self[definition].get('_item_linked.parent_name',[])
        if isinstance(old_children,unicode):
             old_children = [old_children]
        if isinstance(old_parents,unicode):
             old_parents = [old_parents]
        if (len(old_children)==0 and len(old_parents)==0) or \
           (len(old_children) > 1 and len(old_parents)>1):
             return
        if len(old_children)==0:
             old_children = [definition]*len(old_parents)
        if len(old_parents)==0:
             old_parents = [definition]*len(old_children)
        newloop = CifLoopBlock(dimension=1)
        newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
        newloop.AddLoopItem(('_item_linked.child_name',old_children))
        try:
            del self[definition]['_item_linked.parent_name']
            del self[definition]['_item_linked.child_name']
        except KeyError:
            pass
        self[definition].insert_loop(newloop)



    def DDL2_normalise(self):
       listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
       # now filter out all the single element lists!
       dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
       for item_def in dodgy_defs:
                # print("DDL2 norm: processing %s" % item_def)
                thisdef = self[item_def]
                packet_no = thisdef['_item.name'].index(item_def)
                realcat = thisdef['_item.category_id'][packet_no]
                realmand = thisdef['_item.mandatory_code'][packet_no]
                # first add in all the missing categories
                # we don't replace the entry in the list corresponding to the
                # current item, as that would wipe out the information we want
                for child_no in range(len(thisdef['_item.name'])):
                    if child_no == packet_no: continue
                    child_name = thisdef['_item.name'][child_no]
                    child_cat = thisdef['_item.category_id'][child_no]
                    child_mand = thisdef['_item.mandatory_code'][child_no]
                    if child_name not in self:
                        self[child_name] = CifBlock()
                        self[child_name]['_item.name'] = child_name
                    self[child_name]['_item.category_id'] = child_cat
                    self[child_name]['_item.mandatory_code'] = child_mand
                self[item_def]['_item.name'] = item_def
                self[item_def]['_item.category_id'] = realcat
                self[item_def]['_item.mandatory_code'] = realmand

       target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                     '_item_linked.parent_name' in self[a]]
       # now dodgy_defs contains all definition blocks with more than one child/parent link
       for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
       for item_def in dodgy_defs:
             print('Processing %s' % item_def)
             thisdef = self[item_def]
             child_list = thisdef['_item_linked.child_name']
             parents = thisdef['_item_linked.parent_name']
             # for each parent, find the list of children.
             family = list(zip(parents,child_list))
             notmychildren = family         #We aim to remove non-children
             # Loop over the parents, relocating as necessary
             while len(notmychildren):
                # get all children of first entry
                mychildren = [a for a in family if a[0]==notmychildren[0][0]]
                print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
                for parent,child in mychildren:   #parent is the same for all
                         # Make sure that we simply add in the new entry for the child, not replace it,
                         # otherwise we might spoil the child entry loop structure
                         try:
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                         except KeyError:
                             print('Creating new parent entry %s for definition %s' % (parent,child))
                             self[child]['_item_linked.parent_name'] = [parent]
                             childloop = self[child].GetLoop('_item_linked.parent_name')
                             childloop.AddLoopItem(('_item_linked.child_name',[child]))
                             continue
                         else:
                             # A parent loop already exists and so will a child loop due to the
                             # call to create_pcloop above
                             pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                             goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                             if len(goodpars)>0:   #no need to add it
                                 print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                                 continue
                             print('Adding %s to %s entry' % (parent,child))
                             newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                             setattr(newpacket,'_item_linked.child_name',child)
                             setattr(newpacket,'_item_linked.parent_name',parent)
                             childloop.AddPacket(newpacket)
                #
                # Make sure the parent also points to the children.  We get
                # the current entry, then add our
                # new values if they are not there already
                #
                parent_name = mychildren[0][0]
                old_children = self[parent_name].get('_item_linked.child_name',[])
                old_parents = self[parent_name].get('_item_linked.parent_name',[])
                oldfamily = zip(old_parents,old_children)
                newfamily = []
                print('Old parents -> %s' % repr(old_parents))
                for jj, childname in mychildren:
                    alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                    if len(alreadythere)>0: continue
                    'Adding new child %s to parent definition at %s' % (childname,parent_name)
                    old_children.append(childname)
                    old_parents.append(parent_name)
                # Now output the loop, blowing away previous definitions.  If there is something
                # else in this category, we are destroying it.
                newloop = CifLoopBlock(dimension=1)
                newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
                newloop.AddLoopItem(('_item_linked.child_name',old_children))
                del self[parent_name]['_item_linked.parent_name']
                del self[parent_name]['_item_linked.child_name']
                self[parent_name].insert_loop(newloop)
                print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
                # now make a new,smaller list
                notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]

       # now flatten any single element lists
       single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
       for flat_def in single_defs:
           flat_keys = self[flat_def].GetLoop('_item.name').keys()
           for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
       # now deal with the multiple lists
       # next we do aliases
       all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
       for aliased in all_aliases:
          my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
          for alias in my_aliases:
              self[alias] = self[aliased].copy()   #we are going to delete stuff...
              del self[alias]["_item_aliases.alias_name"]

    def ddlm_parse_valid(self):
        if "_dictionary_valid.application" not in self.master_block:
            return
        for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
            scope = getattr(scope_pack,"_dictionary_valid.application")
            valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
            if scope[1] == "Mandatory":
                self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
            elif scope[1] == "Prohibited":
                self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)

    def obtain_imports(self,import_mode,heavy=False):
        """Collate import information"""
        self._import_dics = []
        import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
        print('Import mode %s applied to following frames' % import_mode)
        print(str([a[0] for a in import_frames]))
        if import_mode != 'All':
           for i in range(len(import_frames)):
                import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()])
           print('Importing following frames in mode %s' % import_mode)
           print(str(import_frames))
        #resolve all references
        for parent_block,import_list in import_frames:
          for import_ref in import_list:
            file_loc = import_ref["file"]
            full_uri = self.resolve_path(file_loc)
            if full_uri not in self.template_cache:
                dic_as_cif = CifFile(full_uri,grammar=self.grammar)
                self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False)  #this will recurse internal imports
                print('Added %s to cached dictionaries' % full_uri)
            import_from = self.template_cache[full_uri]
            dupl = import_ref.get('dupl','Exit')
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                   raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if target_key in self and mode=='full':  #so blockname will be duplicated
                if dupl == 'Exit':
                    raise CifError('Import frame %s already in dictionary' % target_key)
                elif dupl == 'Ignore':
                    continue
            if heavy:
                self.ddlm_import(parent_block,import_from,import_target,target_key,mode)
            else:
                self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode)
                
    def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'):
            """Import other dictionaries in place"""
            if mode == 'contents':   #merge attributes only
                self[parent_block].merge(import_target)
            elif mode =="full":
                # Do the syntactic merge
                syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
                from_cat_head = import_target['_name.object_id']
                child_frames = import_from.ddlm_all_children(from_cat_head)
                 # Check for Head merging Head
                if self[parent_block].get('_definition.class','Datum')=='Head' and \
                   import_target.get('_definition.class','Datum')=='Head':
                      head_to_head = True
                else:
                      head_to_head = False
                      child_frames.remove(from_cat_head)
                # As we are in syntax land, we call the CifFile methods
                child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
                child_blocks = super(CifDic,import_from).makebc(child_blocks)
                # Prune out any datablocks that have identical definitions
                from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
                double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
                print('Definitions for %s superseded' % repr(double_defs))
                for b in double_defs:
                    del child_blocks[b[0]]
                super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
                print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
                   mode,len(self)))
                # Now the semantic merge
                # First expand our definition <-> blockname tree
                self.create_def_block_table()
                merging_cat = self[parent_block]['_name.object_id']      #new parent
                if head_to_head:
                    child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
                    #the new parent is the importing category for all old children
                    for f in child_frames:
                        self[f].overwrite = True
                        self[f]['_name.category_id'] = merging_cat
                        self[f].overwrite = False
                    # remove the old head
                    del self[from_cat_head]
                    print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
                else:  #imported category is only child
                    from_frame = import_from[target_key]['_definition.id'] #so we can find it
                    child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
                    self[child_frame]['_name.category_id'] = merging_cat
                    print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
            # it will never happen again...
            del self[parent_block]["_import.get"]

    def resolve_path(self,file_loc):
        url_comps = urlparse(file_loc)
        if url_comps[0]: return file_loc    #already full URI
        new_url = urljoin(self.my_uri,file_loc)
        #print("Transformed %s to %s for import " % (file_loc,new_url))
        return new_url

    def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'):
        """Register the imported dictionaries but do not alter any definitions. `parent_block`
        contains the id of the block that is importing. `import_target` is the block that
        should be imported. `import_from` is the CifFile that contains the definitions."""
        if mode == 'contents':   #merge attributes only
            self[parent_block].add_dict_cache(file_loc,import_from)
        elif mode =="full":
             # Check for Head merging Head
            if self[parent_block].get('_definition.class','Datum')=='Head' and \
               import_target.get('_definition.class','Datum')=='Head':
                   head_to_head = True
            else:
                   head_to_head = False
            # Figure out the actual definition ID
            head_id = import_target["_definition.id"]
            # Adjust parent information
            merging_cat = self[parent_block]['_name.object_id']
            from_cat_head = import_target['_name.object_id']
            if not head_to_head:   # imported category is only child
                import_target["_name.category_id"]=merging_cat
            self._import_dics = [(import_from,head_id)]+self._import_dics #prepend

    def lookup_imports(self,key):
        """Check the list of imported dictionaries for this definition"""
        for one_dic,head_def in self._import_dics:
            from_cat_head = one_dic[head_def]['_name.object_id']
            possible_keys = one_dic.ddlm_all_children(from_cat_head)
            if key in possible_keys:
                return one_dic[key]
        raise KeyError("%s not found in import dictionaries" % key)
        


    def create_def_block_table(self):
        """ Create an internal table matching definition to block id """
        proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
        # now get the actual ids instead of blocks
        proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
        # remove non-definitions
        if self.diclang != "DDL1":
            top_blocks = list([a[0].lower() for a in self.get_roots()])
        else:
            top_blocks = ["on_this_dictionary"]
        # catch dodgy duplicates
        uniques = set([a[0] for a in proto_table])
        if len(uniques)<len(proto_table):
            def_names = list([a[0] for a in proto_table])
            dodgy = [a for a in def_names if def_names.count(a)>1]
            raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
        self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])

    def __getitem__(self,key):
        """Access a datablock by definition id, after the lookup has been created"""
        try:
            return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
        except AttributeError:   #block_id_table not present yet
            return super(CifDic,self).__getitem__(key)
        except KeyError: # key is missing
            try: # print(Definition for %s not found, reverting to CifFile' % key)
                return super(CifDic,self).__getitem__(key)
            except KeyError: # try imports
                return self.lookup_imports(key)

    def __setitem__(self,key,value):
        """Add a new definition block"""
        super(CifDic,self).__setitem__(key,value)
        try:
            self.block_id_table[value['_definition.id']]=key
        except AttributeError:   #does not exist yet
            pass

    def NewBlock(self,*args,**kwargs):
        newname = super(CifDic,self).NewBlock(*args,**kwargs)
        try:
            self.block_id_table[self[newname]['_definition.id']]=newname
        except AttributeError: #no block_id table
            pass
                
    def __delitem__(self,key):
        """Remove a definition"""
        try:
            super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
            del self.block_id_table[key.lower()]
        except (AttributeError,KeyError):   #block_id_table not present yet
            super(CifDic,self).__delitem__(key)
            return
        # fix other datastructures
        # cat_obj table

    def keys(self):
        """Return all definitions"""
        try:
            return self.block_id_table.keys()
        except AttributeError:
            return super(CifDic,self).keys()

    def has_key(self,key):
        return key in self

    def __contains__(self,key):
        try:
            return key.lower() in self.block_id_table
        except AttributeError:
            return super(CifDic,self).__contains__(key)

    def items(self):
        """Return (key,value) pairs"""
        return list([(a,self[a]) for a in self.keys()])

    def unlock(self):
        """Allow overwriting of all definitions in this collection"""
        for a in self.keys():
            self[a].overwrite=True

    def lock(self):
        """Disallow changes in definitions"""
        for a in self.keys():
            self[a].overwrite=False

    def rename(self,oldname,newname,blockname_as_well=True):
        """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
        change the underlying blockname too."""
        if blockname_as_well:
            super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)
            self.block_id_table[newname.lower()]=newname
            if oldname.lower() in self.block_id_table: #not removed
               del self.block_id_table[oldname.lower()]
        else:
            self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
            del self.block_id_table[oldname.lower()]
            return

    def get_root_category(self):
        """Get the single 'Head' category of this dictionary"""
        root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
        if len(root_cats)>1 or len(root_cats)==0:
            raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
        return root_cats[0]

    def ddlm_immediate_children(self,catname):
        """Return a list of datanames for the immediate children of catname.  These are
        semantic children (i.e. based on _name.category_id), not structural children as
        in the case of StarFile.get_immediate_children"""

        straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
        return list(straight_children)

    def ddlm_all_children(self,catname):
        """Return a list of all children, including the `catname`"""
        all_children = self.ddlm_immediate_children(catname)
        cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
        for c in cat_children:
            all_children.remove(c)
            all_children += self.ddlm_all_children(c)
        return all_children + [catname]

    def is_semantic_child(self,parent,maybe_child):
        """Return true if `maybe_child` is a child of `parent`"""
        all_children = self.ddlm_all_children(parent)
        return maybe_child in all_children

    def ddlm_danglers(self):
        """Return a list of definitions that do not have a category defined
        for them, or are children of an unattached category"""
        top_block = self.get_root_category()
        connected = set(self.ddlm_all_children(top_block))
        all_keys = set(self.keys())
        unconnected = all_keys - connected
        return list(unconnected)

    def get_ddlm_parent(self,itemname):
        """Get the parent category of itemname"""
        parent = self[itemname].get('_name.category_id','')
        if parent == '':  # use the top block by default
            raise CifError("%s has no parent" % itemname)
        return parent

    def expand_category_opt(self,name_list):
        """Return a list of all non-category items in a category or return the name
           if the name is not a category"""
        new_list = []
        for name in name_list:
          if self.get(name,{}).get('_definition.scope','Item') == 'Category':
            new_list += self.expand_category_opt([a for a in self.keys() if \
                     self[a].get('_name.category_id','').lower() == name.lower()])
          else:
            new_list.append(name)
        return new_list

    def get_categories(self):
        """Return a list of category names"""
        return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])

    def names_in_cat(self,cat,names_only=False):
        names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
        if not names_only:
            return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
        else:
            return list([self[a]["_name.object_id"] for a in names])



    def create_alias_table(self):
        """Populate an alias table that we can look up when searching for a dataname"""
        all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
        self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])

    def create_cat_obj_table(self):
        """Populate a table indexed by (cat,obj) and returning the correct dataname"""
        base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
                           for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
        loopable = self.get_loopable_cats()
        loopers = [self.ddlm_immediate_children(a) for a in loopable]
        print('Loopable cats:' + repr(loopable))
        loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
        expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
        print("Expansion list:" + repr(expand_list))
        extra_table = {}   #for debugging we keep it separate from base_table until the end
        def expand_base_table(parent_cat,child_cats):
            extra_names = []
            # first deal with all the child categories
            for child_cat in child_cats:
              nn = []
              if child_cat in expand_list:  # a nested category: grab its names
                nn = expand_base_table(child_cat,expand_list[child_cat])
                # store child names
                extra_names += nn
              # add all child names to the table
              child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                             for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
              child_names += extra_names
              extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
            # and the repeated ones get appended instead
            repeats = [a for a in child_names if a in extra_table]
            for obj,name in repeats:
                extra_table[(parent_cat,obj)] += [name]
            # and finally, add our own names to the return list
            child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                            for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
            return child_names
        [expand_base_table(parent,child) for parent,child in expand_list.items()]
        print('Expansion cat/obj values: ' + repr(extra_table))
        # append repeated ones
        non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
        repeats = [a for a in extra_table.keys() if a in base_table]
        base_table.update(non_repeats)
        for k in repeats:
            base_table[k] += extra_table[k]
        self.cat_obj_lookup_table = base_table
        self.loop_expand_list = expand_list

    def get_loopable_cats(self):
        """A short utility function which returns a list of looped categories. This
        is preferred to a fixed attribute as that fixed attribute would need to be
        updated after any edits"""
        return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']

    def create_cat_key_table(self):
        """Create a utility table with a list of keys applicable to each category. A key is
        a compound key, that is, it is a list"""
        self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
            [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
        def collect_keys(parent_cat,child_cats):
                kk = []
                for child_cat in child_cats:
                    if child_cat in self.loop_expand_list:
                        kk += collect_keys(child_cat)
                    # add these keys to our list
                    kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
                self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
                return kk
        for k,v in self.loop_expand_list.items():
            collect_keys(k,v)
        print('Keys for categories' + repr(self.cat_key_table))

    def add_type_info(self):
        if "_item_type_list.construct" in self.master_block:
            types = self.master_block["_item_type_list.code"]
            prim_types = self.master_block["_item_type_list.primitive_code"]
            constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
            # add in \r wherever we see \n, and change \{ to \\{
            def regex_fiddle(mm_regex):
                brack_match = r"((.*\[.+)(\\{)(.*\].*))"
                ret_match = r"((.*\[.+)(\\n)(.*\].*))"
                fixed_regexp = mm_regex[:]  #copy
                # fix the brackets
                bm = re.match(brack_match,mm_regex)
                if bm != None:
                    fixed_regexp = bm.expand(r"\2\\\\{\4")
                # fix missing \r
                rm = re.match(ret_match,fixed_regexp)
                if rm != None:
                    fixed_regexp = rm.expand(r"\2\3\\r\4")
                #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
                return fixed_regexp
            constructs = map(regex_fiddle,constructs)
            for typecode,construct in zip(types,constructs):
                self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
            # now make a primitive <-> type construct mapping
            for typecode,primtype in zip(types,prim_types):
                self.primdic[typecode] = primtype

    def add_category_info(self,full=True):
        if self.diclang == "DDLm":
            catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
            looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
            self.parent_lookup = {}
            for one_cat in looped_cats:
                parent_cat = one_cat
                parent_def = self[parent_cat]
                next_up = parent_def['_name.category_id'].lower()
                while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
                    parent_def = self[next_up]
                    parent_cat = next_up
                    next_up = parent_def['_name.category_id'].lower()
                self.parent_lookup[one_cat] = parent_cat

            if full:
                self.key_equivs = {}
                for one_cat in looped_cats:   #follow them up
                    lower_keys = listify(self[one_cat]['_category_key.name'])
                    start_keys = lower_keys[:]
                    while len(lower_keys)>0:
                        this_cat = self[lower_keys[0]]['_name.category_id']
                        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
                        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
                        if len(parent)>1:
                            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
                        if len(parent)==0: break
                        parent = parent[0]
                        parent_keys = listify(self[parent]['_category_key.name'])
                        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
                        # sanity check
                        if set(parent_keys) != set(linked_keys):
                            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
                            # now add in our information
                        for parent,child in zip(linked_keys,start_keys):
                            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
                        lower_keys = linked_keys  #preserves order of start keys

        else:
            self.parent_lookup = {}
            self.key_equivs = {}

    def change_category_name(self,oldname,newname):
        self.unlock()
        """Change the category name from [[oldname]] to [[newname]]"""
        if oldname not in self:
            raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
        if newname in self:
            raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
        child_defs = self.ddlm_immediate_children(oldname)
        self.rename(oldname,newname)   #NB no name integrity checks
        self[newname]['_name.object_id']=newname
        self[newname]['_definition.id']=newname
        for child_def in child_defs:
            self[child_def]['_name.category_id'] = newname
            if self[child_def].get('_definition.scope','Item')=='Item':
                newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
                self[child_def]['_definition.id']=newid
                self.rename(child_def,newid[1:])  #no underscore at the beginning
        self.lock()

    def create_catobj_name(self,cat,obj):
        """Combine category and object in approved fashion to create id"""
        return ('_'+cat+'.'+obj)

    def change_category(self,itemname,catname):
        """Move itemname into catname, return new handle"""
        defid = self[itemname]
        if defid['_name.category_id'].lower()==catname.lower():
            print('Already in category, no change')
            return itemname
        if catname not in self:    #don't have it
            print('No such category %s' % catname)
            return itemname
        self.unlock()
        objid = defid['_name.object_id']
        defid['_name.category_id'] = catname
        newid = itemname # stays the same for categories
        if defid.get('_definition.scope','Item') == 'Item':
            newid = self.create_catobj_name(catname,objid)
            defid['_definition.id']= newid
            self.rename(itemname,newid)
        self.set_parent(catname,newid)
        self.lock()
        return newid

    def change_name(self,one_def,newobj):
        """Change the object_id of one_def to newobj. This is not used for
        categories, but can be used for dictionaries"""
        if '_dictionary.title' not in self[one_def]:  #a dictionary block
            newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
            self.unlock()
            self.rename(one_def,newid)
            self[newid]['_definition.id']=newid
            self[newid]['_name.object_id']=newobj
        else:
            self.unlock()
            newid = newobj
            self.rename(one_def,newobj)
            self[newid]['_dictionary.title'] = newid
        self.lock()
        return newid

    # Note that our semantic parent is given by catparent, but our syntactic parent is
    # always just the root block
    def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
        """Add a new category to the dictionary with name [[catname]].
           If [[catparent]] is None, the category will be a child of
           the topmost 'Head' category or else the top data block. If
           [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
           is true, the parent category does not have to exist."""
        if catname in self:
            raise CifError('Attempt to add existing category %s' % catname)
        self.unlock()
        syntactic_root = self.get_roots()[0][0]
        if catparent is None:
            semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
            if len(semantic_root)>0:
                semantic_root = semantic_root[0]
            else:
                semantic_root = syntactic_root
        else:
            semantic_root = catparent
        realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
        self.block_id_table[catname.lower()]=realname
        self[catname]['_name.object_id'] = catname
        if not allow_dangler or catparent is None:
            self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
        else:
            self[catname]['_name.category_id'] = catparent
        self[catname]['_definition.id'] = catname
        self[catname]['_definition.scope'] = 'Category'
        if is_loop:
            self[catname]['_definition.class'] = 'Loop'
        else:
            self[catname]['_definition.class'] = 'Set'
        self[catname]['_description.text'] = 'No definition provided'
        self.lock()
        return catname

    def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
        """Add itemname to category [[catparent]]. If itemname contains periods,
        all text before the final period is ignored. If [[allow_dangler]] is True,
        no check for a parent category is made."""
        self.unlock()
        if '.' in itemname:
            objname = itemname.split('.')[-1]
        else:
            objname = itemname
        objname = objname.strip('_')
        if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
            raise CifError('No category %s in dictionary' % catparent)
        fullname = '_'+catparent.lower()+'.'+objname
        print('New name: %s' % fullname)
        syntactic_root = self.get_roots()[0][0]
        realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
        # update our dictionary structures
        self.block_id_table[fullname]=realname
        self[fullname]['_definition.id']=fullname
        self[fullname]['_name.object_id']=objname
        self[fullname]['_name.category_id']=catparent
        self[fullname]['_definition.class']='Datum'
        self[fullname]['_description.text']=def_text

    def remove_definition(self,defname):
        """Remove a definition from the dictionary."""
        if defname not in self:
            return
        if self[defname].get('_definition.scope')=='Category':
            children = self.ddlm_immediate_children(defname)
            [self.remove_definition(a) for a in children]
            cat_id = self[defname]['_definition.id'].lower()
        del self[defname]

    def get_cat_obj(self,name):
        """Return (cat,obj) tuple. [[name]] must contain only a single period"""
        cat,obj = name.split('.')
        return (cat.strip('_'),obj)

    def get_name_by_cat_obj(self,category,object,give_default=False):
        """Return the dataname corresponding to the given category and object"""
        if category[0] == '_':    #accidentally left in
           true_cat = category[1:].lower()
        else:
           true_cat = category.lower()
        try:
            return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
        except KeyError:
            if give_default:
               return '_'+true_cat+'.'+object
        raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))


    def WriteOut(self,**kwargs):
        myblockorder = self.get_full_child_list()
        self.set_grammar(self.grammar)
        self.standard = 'Dic'
        return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

    def get_full_child_list(self):
        """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
        top_block = self.get_roots()[0][0]
        root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
        if len(root_cat) == 1:
            all_names = [top_block] + self.recurse_child_list(root_cat[0])
            unrooted = self.ddlm_danglers()
            double_names =  set(unrooted).intersection(set(all_names))
            if len(double_names)>0:
                raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
            remaining = unrooted[:]
            for no_root in unrooted:
                if self[no_root].get('_definition.scope','Item')=='Category':
                    all_names += [no_root]
                    remaining.remove(no_root)
                    these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
                    all_names += these_children
                    [remaining.remove(n) for n in these_children]
            # now sort by category
            ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
            for e in ext_cats:
                cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
                [remaining.remove(n) for n in cat_items]
                all_names += cat_items
            if len(remaining)>0:
                print('WARNING: following items do not seem to belong to a category??')
                print(repr(remaining))
                all_names += remaining
            print('Final block order: ' + repr(all_names))
            return all_names
        raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')

    def cat_from_name(self,one_name):
        """Guess the category from the name. This should be used only when this is not important semantic information,
        for example, when printing out"""
        (cat,obj) = one_name.split(".")
        if cat[0] == "_": cat = cat[1:]
        return cat

    def recurse_child_list(self,parentname):
        """Recursively expand the logical child list of [[parentname]]"""
        final_list = [parentname]
        child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
        child_blocks.sort()    #we love alphabetical order
        child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
        final_list += child_items
        child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
        for child_cat in child_cats:
            final_list += self.recurse_child_list(child_cat)
        return final_list



    def get_key_pack(self,category,value,data):
        keyname = self[category][self.unique_spec]
        onepack = data.GetPackKey(keyname,value)
        return onepack

    def get_number_with_esd(numstring):
        numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
        our_match = re.match(numb_re,numstring)
        if our_match:
            a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
            # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
        else:
            return None,None
        if dot or q: return None,None     #a dot or question mark
        if exp:          #has exponent
           exp = exp.replace("d","e")     # mop up old fashioned numbers
           exp = exp.replace("D","e")
           base_num = base_num + exp
        # print("Debug: have %s for base_num from %s" % (base_num,numstring))
        base_num = float(base_num)
        # work out esd, if present.
        if esd:
            esd = float(esd[1:-1])    # no brackets
            if dad:                   # decimal point + digits
                esd = esd * (10 ** (-1* len(dad)))
            if exp:
                esd = esd * (10 ** (float(exp[1:])))
        return base_num,esd

    def getmaxmin(self,rangeexp):
        regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
        regexp = regexp + ":" + regexp
        regexp = re.match(regexp,rangeexp)
        try:
            minimum = regexp.group(1)
            maximum = regexp.group(7)
        except AttributeError:
            print("Can't match %s" % rangeexp)
        if minimum == None: minimum = "."
        else: minimum = float(minimum)
        if maximum == None: maximum = "."
        else: maximum = float(maximum)
        return maximum,minimum

    def initialise_drel(self):
        """Parse drel functions and prepare data structures in dictionary"""
        self.ddlm_parse_valid() #extract validity information from data block
        self.transform_drel()   #parse the drel functions
        self.add_drel_funcs()   #put the drel functions into the namespace

    def transform_drel(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        import traceback
        parser = drel_ast_yacc.parser
        lexer = drel_ast_yacc.lexer
        my_namespace = self.keys()
        my_namespace = dict(zip(my_namespace,my_namespace))
        # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
        derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
                              and self[a].get("_name.category_id","")!= "function"]
        for derivable in derivable_list:
            target_id = derivable
            # reset the list of visible names for parser
            special_ids = [dict(zip(self.keys(),self.keys()))]
            print("Target id: %s" % derivable)
            drel_exprs = self[derivable]["_method.expression"]
            drel_purposes = self[derivable]["_method.purpose"]
            all_methods = []
            if not isinstance(drel_exprs,list):
                drel_exprs = [drel_exprs]
                drel_purposes = [drel_purposes]
            for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
                if drel_purpose != 'Evaluation':
                    continue
                drel_expr = "\n".join(drel_expr.splitlines())
                # print("Transforming %s" % drel_expr)
                # List categories are treated differently...
                try:
                    meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
                except:
                    print('Syntax error in method for %s; leaving as is' % derivable)
                    a,b = sys.exc_info()[:2]
                    print((repr(a),repr(b)))
                    print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
                    # reset the lexer
                    lexer.begin('INITIAL')
                    continue
                # Construct the python method
                cat_meth = False
                if self[derivable].get('_definition.scope','Item') == 'Category':
                    cat_meth = True
                pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
                                                                           loopable=loop_info,
                                                             cif_dic = self,cat_meth=cat_meth)
                all_methods.append(pyth_meth)
            if len(all_methods)>0:
                save_overwrite = self[derivable].overwrite
                self[derivable].overwrite = True
                self[derivable]["_method.py_expression"] = all_methods
                self[derivable].overwrite = save_overwrite
            #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))

    def add_drel_funcs(self):
        from .drel import drel_ast_yacc
        from .drel import py_from_ast
        funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
        funcnames = [(self[a]["_name.object_id"],
                      getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
        # create executable python code...
        parser = drel_ast_yacc.parser
        # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
        loopable_cats = self.get_loopable_cats()
        loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
        loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
        cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
        loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
        for funcname,funcbody in funcnames:
            newline_body = "\n".join(funcbody.splitlines())
            parser.target_id = funcname
            res_ast = parser.parse(newline_body)
            py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
            #print('dREL library function ->\n' + py_function)
            global_table = globals()
            exec(py_function, global_table)    #add to namespace
        #print('Globals after dREL functions added:' + repr(globals()))
        self.ddlm_functions = globals()  #for outside access

    @track_recursion
    def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
        key = start_key   #starting value
        result = None     #success is a non-None value
        default_result = False #we have not used a default value
        # check for aliases
        # check for an older form of a new value
        found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
        if len(found_it)>0:
            corrected_type = self.change_type(key,cifdata[found_it[0]])
            return corrected_type
        # now do the reverse check - any alternative form
        alias_name = [a for a in self.alias_table.items() if key in a[1]]
        print('Aliases for %s: %s' % (key,repr(alias_name)))
        if len(alias_name)==1:
            key = alias_name[0][0]   #actual definition name
            if key in cifdata: return self.change_type(key,cifdata[key])
            found_it = [k for k in alias_name[0][1] if k in cifdata]
            if len(found_it)>0:
                return self.change_type(key,cifdata[found_it[0]])
        elif len(alias_name)>1:
            raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))

        the_category = self[key]["_name.category_id"]
        cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        # store any default value in case we have a problem
        def_val = self[key].get("_enumeration.default","")
        def_index_val = self[key].get("_enumeration.def_index_id","")
        if len(has_cat_names)==0: # try category method
            cat_result = {}
            pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
            pulled_from_cats = [(k,[
                                  self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
                               ) for k in pulled_from_cats]
            pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
            if '_category_construct_local.type' in self[the_category]:
                print("**Now constructing category %s using DDLm attributes**" % the_category)
                try:
                    cat_result = self.construct_category(the_category,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s (error)' % the_category)
            # Trying a pull-back when the category is partially populated
            # will not work, hence we test that cat_result has no keys
            if len(pulled_to_cats)>0 and len(cat_result)==0:
                print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
                try:
                    cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
                except (CifRecursionError,StarFile.StarDerivationError):
                    print('** Failed to construct category %s from pullback information (error)' % the_category)
            if '_method.py_expression' in self[the_category] and key not in cat_result:
                print("**Now applying category method for %s in search of %s**" % (the_category,key))
                cat_result = self.derive_item(the_category,cifdata,store_value=True)
            print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
            # do we now have our value?
            if key in cat_result:
                return cat_result[key]

        # Recalculate in case it actually worked
        has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
        the_funcs = self[key].get('_method.py_expression',"")
        if the_funcs:   #attempt to calculate it
            #global_table = globals()
            #global_table.update(self.ddlm_functions)
            for one_func in the_funcs:
                print('Executing function for %s:' % key)
                #print(one_func)
                exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
                # print('in following global environment: ' + repr(global_table))
                stored_setting = cifdata.provide_value
                cifdata.provide_value = True
                try:
                    result = pyfunc(cifdata)
                except CifRecursionError as s:
                    print(s)
                    result = None
                except StarFile.StarDerivationError as s:
                    print(s)
                    result = None
                finally:
                    cifdata.provide_value = stored_setting
                if result is not None:
                    break
                #print("Function returned {!r}".format(result))

        if result is None and allow_defaults:   # try defaults
            if def_val:
                result = self.change_type(key,def_val)
                default_result = True
            elif def_index_val:            #derive a default value
                index_vals = self[key]["_enumeration_default.index"]
                val_to_index = cifdata[def_index_val]     #what we are keying on
                if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
                    lcase_comp = True
                    index_vals = [a.lower() for a in index_vals]
                # Handle loops
                if isinstance(val_to_index,list):
                    if lcase_comp:
                        val_to_index = [a.lower() for a in val_to_index]
                    keypos = [index_vals.index(a) for a in val_to_index]
                    result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
                else:
                    if lcase_comp:
                        val_to_index = val_to_index.lower()
                    keypos = index_vals.index(val_to_index)   #value error if no such value available
                    result = self[key]["_enumeration_default.value"][keypos]
                    default_result = True   #flag that it must be extended
                result = self.change_type(key,result)
                print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))

        # read it in
        if result is None:   #can't do anything else
            print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
            raise StarFile.StarDerivationError(start_key)
        is_looped = False
        if self[the_category].get('_definition.class','Set')=='Loop':
            is_looped = True
            if len(has_cat_names)>0:   #this category already exists
                if result is None or default_result: #need to create a list of values
                    loop_len = len(cifdata[has_cat_names[0]])
                    out_result = [result]*loop_len
                    result = out_result
            else:   #nothing exists in this category, we can't store this at all
                print('Resetting result %s for %s to null list as category is empty' % (key,result))
                result = []

        # now try to insert the new information into the right place
        # find if items of this category already appear...
        # Never cache empty values
        if not (isinstance(result,list) and len(result)==0) and\
          store_value:
            if self[key].get("_definition.scope","Item")=='Item':
                if is_looped:
                    result = self.store_new_looped_value(key,cifdata,result,default_result)
                else:
                    result = self.store_new_unlooped_value(key,cifdata,result)
            else:
                self.store_new_cat_values(cifdata,result,the_category)
        return result

    def store_new_looped_value(self,key,cifdata,result,default_result):
          """Store a looped value from the dREL system into a CifFile"""
          # try to change any matrices etc. to lists
          the_category = self[key]["_name.category_id"]
          out_result = result
          if result is not None and not default_result:
                  # find any numpy arrays
                  def conv_from_numpy(one_elem):
                      if not hasattr(one_elem,'dtype'):
                         if isinstance(one_elem,(list,tuple)):
                            return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
                         return one_elem
                      if one_elem.size > 1:   #so is not a float
                         return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
                      else:
                          try:
                            return one_elem.item(0)
                          except:
                            return one_elem
                  out_result = [conv_from_numpy(a) for a in result]
          # so out_result now contains a value suitable for storage
          cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
          has_cat_names = [a for a in cat_names if a in cifdata]
          print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
          if len(has_cat_names)>0:   #this category already exists
              cifdata[key] = out_result      #lengths must match or else!!
              cifdata.AddLoopName(has_cat_names[0],key)
          else:
              cifdata[key] = out_result
              cifdata.CreateLoop([key])
          print('Loop info:' + repr(cifdata.loops))
          return out_result

    def store_new_unlooped_value(self,key,cifdata,result):
          """Store a single value from the dREL system"""
          if result is not None and hasattr(result,'dtype'):
              if result.size > 1:
                  out_result = StarFile.StarList(result.tolist())
                  cifdata[key] = out_result
              else:
                  cifdata[key] = result.item(0)
          else:
              cifdata[key] = result
          return result

    def construct_category(self,category,cifdata,store_value=True):
        """Construct a category using DDLm attributes"""
        con_type = self[category].get('_category_construct_local.type',None)
        if con_type == None:
            return {}
        if con_type == 'Pullback' or con_type == 'Filter':
            morphisms  = self[category]['_category_construct_local.components']
            morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
            cats = [self[a]['_name.category_id'] for a in morphisms]
            cat_keys = [self[a]['_category.key_id'] for a in cats]
            cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
            if con_type == 'Filter':
                int_filter = self[category].get('_category_construct_local.integer_filter',None)
                text_filter = self[category].get('_category_construct_local.text_filter',None)
                if int_filter is not None:
                    morph_values.append([int(a) for a in int_filter])
                if text_filter is not None:
                    morph_values.append(text_filter)
                cat_values.append(range(len(morph_values[-1])))
            # create the mathematical product filtered by equality of dataname values
            pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
                            if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
            # now prepare for return
            if len(pullback_ids)==0:
                return {}
            newids = self[category]['_category_construct_local.new_ids']
            fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
            if con_type == 'Pullback':
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
                final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
            elif con_type == 'Filter':   #simple filter
                final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
                final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
            if store_value:
                self.store_new_cat_values(cifdata,final_results,category)
            return final_results

    def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
        """Each of the categories in source_categories are pullbacks that include
        the target_category"""
        target_key = self[target_category]['_category.key_id']
        result = {target_key:[]}
        first_time = True
        # for each source category, determine which element goes to the target
        for sc in source_categories:
            components = self[sc]['_category_construct_local.components']
            comp_cats = [self[c]['_name.category_id'] for c in components]
            new_ids = self[sc]['_category_construct_local.new_ids']
            source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
            if len(components) == 2:  # not a filter
                element_pos = comp_cats.index(target_category)
                old_id = source_ids[element_pos]
                print('Using %s to populate %s' % (old_id,target_key))
                result[target_key].extend(cifdata[old_id])
                # project through all identical names
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
                # we only include keys that are common to all categories
                if first_time:
                    result.update(extra_result)
                else:
                    for k in extra_result.keys():
                        if k in result:
                            print('Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
            else:
                extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
                if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
                    result[target_key].extend(cifdata[source_ids[0]])
                    for k in extra_result.keys():
                        if k in result:
                            print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
                            result[k].extend(extra_result[k])
                        else:
                            result[k]=extra_result[k]
    # Bonus derivation if there is a singleton filter
                    if self[sc]['_category_construct_local.type'] == 'Filter':
                        int_filter = self[sc].get('_category_construct_local.integer_filter',None)
                        text_filter = self[sc].get('_category_construct_local.text_filter',None)
                        if int_filter is not None:
                            filter_values = int_filter
                        else:
                            filter_values = text_filter
                        if len(filter_values)==1:    #a singleton
                            extra_dataname = self[sc]['_category_construct_local.components'][0]
                            if int_filter is not None:
                                new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
                            else:
                                new_value = filter_values * len(cifdata[source_ids[0]])
                            if extra_dataname not in result:
                                result[extra_dataname] = new_value
                            else:
                                result[extra_dataname].extend(new_value)
                    else:
                        raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
            first_time = False
        # check for sanity - all dataname lengths must be identical
        datalen = len(set([len(a) for a in result.values()]))
        if datalen != 1:
            raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
        if store_value:
            print('Now storing ' + repr(result))
            self.store_new_cat_values(cifdata,result,target_category)
        return result

    def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
        """Copy across datanames for which the from_category key equals [[key_vals]]"""
        result = {}
        s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
        t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
        can_project = s_names_in_cat & t_names_in_cat
        can_project -= set(skip_names)  #already dealt with
        source_key = self[from_category]['_category.key_id']
        print('Source dataname set: ' + repr(s_names_in_cat))
        print('Target dataname set: ' + repr(t_names_in_cat))
        print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
        for project_name in can_project:
            full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
            full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
            if key_vals is None:
                try:
                    result[full_to_name] = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
            else:
                all_key_vals = cifdata[source_key]
                filter_pos = [all_key_vals.index(a) for a in key_vals]
                try:
                    all_data_vals = cifdata[full_from_name]
                except StarFile.StarDerivationError:
                    pass
                result[full_to_name] = [all_data_vals[i] for i in filter_pos]
        return result

    def store_new_cat_values(self,cifdata,result,the_category):
        """Store the values in [[result]] into [[cifdata]]"""
        the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
        double_names = [a for a in result.keys() if a in cifdata]
        if len(double_names)>0:
            already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
            if set(already_present) != set(result.keys()):
                print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
                return
            #check key values
            old_keys = set(cifdata[the_key])
            common_keys = old_keys & set(result[the_key])
            if len(common_keys)>0:
                print("Category %s not updated, key values in common:" % (common_keys))
                return
            #extend result values with old values
            for one_name,one_value in result.items():
                result[one_name].extend(cifdata[one_name])
        for one_name, one_value in result.items():
            try:
                self.store_new_looped_value(one_name,cifdata,one_value,False)
            except StarFile.StarError:
                print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
        #put the key as the first item
        print('Fixing item order for {}'.format(repr(the_key)))
        for one_key in the_key:  #should only be one
            cifdata.ChangeItemOrder(one_key,0)


    def generate_default_packet(self,catname,catkey,keyvalue):
        """Return a StarPacket with items from ``catname`` and a key value
        of ``keyvalue``"""
        newpack = StarPacket()
        for na in self.names_in_cat(catname):
            def_val = self[na].get("_enumeration.default","")
            if def_val:
                final_val = self.change_type(na,def_val)
                newpack.extend(final_val)
                setattr(newpack,na,final_val)
        if len(newpack)>0:
            newpack.extend(keyvalue)
            setattr(newpack,catkey,keyvalue)
        return newpack


    def switch_numpy(self,to_val):
        pass

    def change_type(self,itemname,inval):
        if inval == "?": return inval
        change_function = convert_type(self[itemname])
        if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
            newval = list([change_function(a) for a in inval])
        else:
            newval = change_function(inval)
        return newval

    def install_validation_functions(self):
        """Install the DDL-appropriate validation checks"""
        if self.diclang != 'DDLm':
            # functions which check conformance
            self.item_validation_funs = [
                self.validate_item_type,
                self.validate_item_esd,
                self.validate_item_enum,
                self.validate_enum_range,
                self.validate_looping
            ]
            # functions checking loop values
            self.loop_validation_funs = [
                self.validate_loop_membership,
                self.validate_loop_key,
                self.validate_loop_references
            ]
            # where we need to look at other values
            self.global_validation_funs = [
                self.validate_exclusion,
                self.validate_parent,
                self.validate_child,
                self.validate_dependents,
                self.validate_uniqueness
            ]
            # where only a full block will do
            self.block_validation_funs = [
                self.validate_mandatory_category
            ]
            # removal is quicker with special checks
            self.global_remove_validation_funs = [
                self.validate_remove_parent_child
            ]
        elif self.diclang == 'DDLm':
            self.item_validation_funs = [
                self.validate_item_enum,
                self.validate_item_esd_ddlm,
                ]
            self.loop_validation_funs = [
                self.validate_looping_ddlm,
                self.validate_loop_key_ddlm,
                self.validate_loop_membership
                ]
            self.global_validation_funs = []
            self.block_validation_funs = [
                self.check_mandatory_items,
                self.check_prohibited_items
                ]
            self.global_remove_validation_funs = []
        self.optimize = False        # default value
        self.done_parents = []
        self.done_children = []
        self.done_keys = []

    def validate_item_type(self,item_name,item_value):
        def mymatch(m,a):
            res = m.match(a)
            if res != None: return res.group()
            else: return ""
        target_type = self[item_name].get(self.type_spec)
        if target_type == None:          # e.g. a category definition
            return {"result":True}                  # not restricted in any way
        matchexpr = self.typedic[target_type]
        item_values = listify(item_value)
        #for item in item_values:
            #print("Type match " + item_name + " " + item + ":",)
        #skip dots and question marks
        check_all = [a for a in item_values if a !="." and a != "?"]
        check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def decide(self,result_list):
        """Construct the return list"""
        if len(result_list)==0:
               return {"result":True}
        else:
               return {"result":False,"bad_values":result_list}

    def validate_item_container(self, item_name,item_value):
        container_type = self[item_name]['_type.container']
        item_values = listify(item_value)
        if container_type == 'Single':
           okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
           return decide(okcheck)
        if container_type in ('Multiple','List'):
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           return decide(okcheck)
        if container_type == 'Array':    #A list with numerical values
           okcheck = [a for a in item_values if not isinstance(a,StarList)]
           first_check = decide(okcheck)
           if not first_check['result']: return first_check
           #num_check = [a for a in item_values if len([b for b in a if not isinstance

    def validate_item_esd(self,item_name,item_value):
        if self[item_name].get(self.primitive_type) != 'numb':
            return {"result":None}
        can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
        if can_esd: return {"result":True}         #must be OK!
        item_values = listify(item_value)
        check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_item_esd_ddlm(self,item_name,item_value):
        if self[item_name].get('self.primitive_type') not in \
        ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
            return {"result":None}
        can_esd = True
        if self[item_name].get('_type.purpose') != 'Measurand':
            can_esd = False
        item_values = listify(item_value)
        check_all = [get_number_with_esd(a)[1] for a in item_values]
        check_all = [v for v in check_all if (can_esd and v == None) or \
                 (not can_esd and v != None)]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        return {"result":True}

    def validate_enum_range(self,item_name,item_value):
        if "_item_range.minimum" not in self[item_name] and \
           "_item_range.maximum" not in self[item_name]:
            return {"result":None}
        minvals = self[item_name].get("_item_range.minimum",default = ["."])
        maxvals = self[item_name].get("_item_range.maximum",default = ["."])
        def makefloat(a):
            if a == ".": return a
            else: return float(a)
        maxvals = map(makefloat, maxvals)
        minvals = map(makefloat, minvals)
        rangelist = list(zip(minvals,maxvals))
        item_values = listify(item_value)
        def map_check(rangelist,item_value):
            if item_value == "?" or item_value == ".": return True
            iv,esd = get_number_with_esd(item_value)
            if iv==None: return None  #shouldn't happen as is numb type
            for lower,upper in rangelist:
                #check the minima
                if lower == ".": lower = iv - 1
                if upper == ".": upper = iv + 1
                if iv > lower and iv < upper: return True
                if upper == lower and iv == upper: return True
            # debug
            # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
            return False
        check_all = [a for a in item_values if map_check(rangelist,a) != True]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_item_enum(self,item_name,item_value):
        try:
            enum_list = self[item_name][self.enum_spec][:]
        except KeyError:
            return {"result":None}
        enum_list.append(".")   #default value
        enum_list.append("?")   #unknown
        item_values = listify(item_value)
        #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
        check_all = [a for a in item_values if a not in enum_list]
        if len(check_all)>0: return {"result":False,"bad_values":check_all}
        else: return {"result":True}

    def validate_looping(self,item_name,item_value):
        try:
            must_loop = self[item_name][self.must_loop_spec]
        except KeyError:
            return {"result":None}
        if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
            return {"result":False}      #this could be triggered
        if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
            return {"result":False}
        return {"result":True}

    def validate_looping_ddlm(self,loop_names):
        """Check that all names are loopable"""
        truly_loopy = self.get_final_cats(loop_names)
        if len(truly_loopy)<len(loop_names):  #some are bad
            categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
            not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
            return {"result":False,"bad_items":not_looped}
        return {"result":True}


    def validate_loop_membership(self,loop_names):
        final_cat = self.get_final_cats(loop_names)
        bad_items =  [a for a in final_cat if a != final_cat[0]]
        if len(bad_items)>0:
            return {"result":False,"bad_items":bad_items}
        else: return {"result":True}

    def get_final_cats(self,loop_names):
        """Return a list of the uppermost parent categories for the loop_names. Names
        that are not from loopable categories are ignored."""
        try:
            categories = [self[a][self.cat_spec].lower() for a in loop_names]
        except KeyError:       #category_id is mandatory
            raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
        truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
        return [self.parent_lookup[a] for a in truly_looped]

    def validate_loop_key(self,loop_names):
        category = self[loop_names[0]][self.cat_spec]
        # find any unique values which must be present
        key_spec = self[category].get(self.key_spec,[])
        for names_to_check in key_spec:
            if isinstance(names_to_check,unicode):   #only one
                names_to_check = [names_to_check]
            for loop_key in names_to_check:
                if loop_key not in loop_names:
                    #is this one of those dang implicit items?
                    if self[loop_key].get(self.must_exist_spec,None) == "implicit":
                        continue          #it is virtually there...
                    alternates = self.get_alternates(loop_key)
                    if alternates == []:
                        return {"result":False,"bad_items":loop_key}
                    for alt_names in alternates:
                        alt = [a for a in alt_names if a in loop_names]
                        if len(alt) == 0:
                            return {"result":False,"bad_items":loop_key}  # no alternates
        return {"result":True}

    def validate_loop_key_ddlm(self,loop_names):
        """Make sure at least one of the necessary keys are available"""
        final_cats = self.get_final_cats(loop_names)
        if len(final_cats)>0:
            poss_keys = self.cat_key_table[final_cats[0]][0] # 
            found_keys = [a for a in poss_keys if a in loop_names]
            if len(found_keys)>0:
                return {"result":True}
            else:
                return {"result":False,"bad_items":poss_keys}
        else:
            return {"result":True}

    def validate_loop_references(self,loop_names):
        must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
        must_haves = [a for a in must_haves if a != None]
        # build a flat list.  For efficiency we don't remove duplicates,as
        # we expect no more than the order of 10 or 20 looped names.
        def flat_func(a,b):
            if isinstance(b,unicode):
               a.append(b)       #single name
            else:
               a.extend(b)       #list of names
            return a
        flat_mh = []
        [flat_func(flat_mh,a) for a in must_haves]
        group_mh = filter(lambda a:a[-1]=="_",flat_mh)
        single_mh = filter(lambda a:a[-1]!="_",flat_mh)
        res = [a for a in single_mh if a not in loop_names]
        def check_gr(s_item, name_list):
            nl = map(lambda a:a[:len(s_item)],name_list)
            if s_item in nl: return True
            return False
        res_g = [a for a in group_mh if check_gr(a,loop_names)]
        if len(res) == 0 and len(res_g) == 0: return {"result":True}
        # construct alternate list
        alternates = map(lambda a: (a,self.get_alternates(a)),res)
        alternates = [a for a in alternates if a[1] != []]
        # next line purely for error reporting
        missing_alts = [a[0] for a in alternates if a[1] == []]
        if len(alternates) != len(res):
           return {"result":False,"bad_items":missing_alts}   #short cut; at least one
                                                       #doesn't have an altern
        #loop over alternates
        for orig_name,alt_names in alternates:
             alt = [a for a in alt_names if a in loop_names]
             if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
        return {"result":True}        #found alternates

    def get_alternates(self,main_name,exclusive_only=False):
        alternates = self[main_name].get(self.related_func,None)
        alt_names = []
        if alternates != None:
            alt_names =  self[main_name].get(self.related_item,None)
            if isinstance(alt_names,unicode):
                alt_names = [alt_names]
                alternates = [alternates]
            together = zip(alt_names,alternates)
            if exclusive_only:
                alt_names = [a for a in together if a[1]=="alternate_exclusive" \
                                             or a[1]=="replace"]
            else:
                alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
            alt_names = list([a[0] for a in alt_names])
        # now do the alias thing
        alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
        alt_names.extend(alias_names)
        # print("Alternates for {}: {!r}".format(main_name, alt_names))
        return alt_names


    def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
       alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
       item_name_list = [a.lower() for a in whole_block.keys()]
       item_name_list.extend([a.lower() for a in provisional_items.keys()])
       bad = [a for a in alternates if a in item_name_list]
       if len(bad)>0:
           print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
           return {"result":False,"bad_items":bad}
       else: return {"result":True}

    # validate that parent exists and contains matching values
    def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        parent_item = self[item_name].get(self.parent_spec)
        if not parent_item: return {"result":None}   #no parent specified
        if isinstance(parent_item,list):
            parent_item = parent_item[0]
        if self.optimize:
            if parent_item in self.done_parents:
                return {"result":None}
            else:
                self.done_parents.append(parent_item)
                print("Done parents %s" % repr(self.done_parents))
        # initialise parent/child values
        if isinstance(item_value,unicode):
            child_values = [item_value]
        else: child_values = item_value[:]    #copy for safety
        # track down the parent
        # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
        # if globals contains the parent values, we are doing a DDL2 dictionary, and so
        # we have collected all parent values into the global block - so no need to search
        # for them elsewhere.
        # print("Looking for {!r}".format(parent_item))
        parent_values = globals.get(parent_item)
        if not parent_values:
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
        if not parent_values:
            # go for alternates
            namespace = whole_block.keys()
            namespace.extend(provisional_items.keys())
            namespace.extend(globals.keys())
            alt_names = filter_present(self.get_alternates(parent_item),namespace)
            if len(alt_names) == 0:
                if len([a for a in child_values if a != "." and a != "?"])>0:
                    return {"result":False,"parent":parent_item}#no parent available -> error
                else:
                    return {"result":None}       #maybe True is more appropriate??
            parent_item = alt_names[0]           #should never be more than one??
            parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
            if not parent_values:   # check global block
                parent_values = globals.get(parent_item)
        if isinstance(parent_values,unicode):
            parent_values = [parent_values]
        #print("Checking parent %s against %s, values %r/%r" % (parent_item,
        #                                          item_name, parent_values, child_values))
        missing = self.check_parent_child(parent_values,child_values)
        if len(missing) > 0:
            return {"result":False,"bad_values":missing,"parent":parent_item}
        return {"result":True}

    def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
        try:
            child_items = self[item_name][self.child_spec][:]  #copy
        except KeyError:
            return {"result":None}    #not relevant
        # special case for dictionaries  -> we check parents of children only
        if item_name in globals:  #dictionary so skip
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        if isinstance(item_value,unicode): # single value
            parent_values = [item_value]
        else: parent_values = item_value[:]
        # expand child list with list of alternates
        for child_item in child_items[:]:
            child_items.extend(self.get_alternates(child_item))
        # now loop over the children
        for child_item in child_items:
            if self.optimize:
                if child_item in self.done_children:
                    return {"result":None}
                else:
                    self.done_children.append(child_item)
                    print("Done children %s" % repr(self.done_children))
            if child_item in provisional_items:
                child_values = provisional_items[child_item][:]
            elif child_item in whole_block:
                child_values = whole_block[child_item][:]
            else:  continue
            if isinstance(child_values,unicode):
                child_values = [child_values]
                # print("Checking child %s against %s, values %r/%r" % (child_item,
                #       item_name, child_values, parent_values))
            missing = self.check_parent_child(parent_values,child_values)
            if len(missing)>0:
                return {"result":False,"bad_values":missing,"child":child_item}
        return {"result":True}       #could mean that no child items present

    #a generic checker: all child vals should appear in parent_vals
    def check_parent_child(self,parent_vals,child_vals):
        # shield ourselves from dots and question marks
        pv = parent_vals[:]
        pv.extend([".","?"])
        res =  [a for a in child_vals if a not in pv]
        #print("Missing: %s" % res)
        return res

    def validate_remove_parent_child(self,item_name,whole_block):
        try:
            child_items = self[item_name][self.child_spec]
        except KeyError:
            return {"result":None}
        if isinstance(child_items,unicode): # only one child
            child_items = [child_items]
        for child_item in child_items:
            if child_item in whole_block:
                return {"result":False,"child":child_item}
        return {"result":True}

    def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
        try:
            dep_items = self[item_name][self.dep_spec][:]
        except KeyError:
            return {"result":None}    #not relevant
        if isinstance(dep_items,unicode):
            dep_items = [dep_items]
        actual_names = whole_block.keys()
        actual_names.extend(prov.keys())
        actual_names.extend(globals.keys())
        missing = [a for a in dep_items if a not in actual_names]
        if len(missing) > 0:
            alternates = map(lambda a:[self.get_alternates(a),a],missing)
            # compact way to get a list of alternative items which are
            # present
            have_check = [(filter_present(b[0],actual_names),
                                       b[1]) for b in alternates]
            have_check = list([a for a in have_check if len(a[0])==0])
            if len(have_check) > 0:
                have_check = [a[1] for a in have_check]
                return {"result":False,"bad_items":have_check}
        return {"result":True}

    def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
                                                                  globals={}):
        category = self[item_name].get(self.cat_spec)
        if category == None:
            print("No category found for %s" % item_name)
            return {"result":None}
        # print("Category {!r} for item {}".format(category, item_name))
        # we make a copy in the following as we will be removing stuff later!
        unique_i = self[category].get("_category_key.name",[])[:]
        if isinstance(unique_i,unicode):
            unique_i = [unique_i]
        if item_name not in unique_i:       #no need to verify
            return {"result":None}
        if isinstance(item_value,unicode):  #not looped
            return {"result":None}
        # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
        # check that we can't optimize by not doing this check
        if self.optimize:
            if unique_i in self.done_keys:
                return {"result":None}
            else:
                self.done_keys.append(unique_i)
        val_list = []
        # get the matching data from any other data items
        unique_i.remove(item_name)
        other_data = []
        if len(unique_i) > 0:            # i.e. do have others to think about
           for other_name in unique_i:
           # we look for the value first in the provisional dict, then the main block
           # the logic being that anything in the provisional dict overrides the
           # main block
               if other_name in provisional_items:
                   other_data.append(provisional_items[other_name])
               elif other_name in whole_block:
                   other_data.append(whole_block[other_name])
               elif self[other_name].get(self.must_exist_spec)=="implicit":
                   other_data.append([item_name]*len(item_value))  #placeholder
               else:
                   return {"result":False,"bad_items":other_name}#missing data name
        # ok, so we go through all of our values
        # this works by comparing lists of strings to one other, and
        # so could be fooled if you think that '1.' and '1' are
        # identical
        for i in range(len(item_value)):
            #print("Value no. %d" % i, end=" ")
            this_entry = item_value[i]
            for j in range(len(other_data)):
                this_entry = " ".join([this_entry,other_data[j][i]])
            #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
            if this_entry in val_list:
                return {"result":False,"bad_values":this_entry}
            val_list.append(this_entry)
        return {"result":True}


    def validate_mandatory_category(self,whole_block):
        mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
        if len(mand_cats) == 0:
            return {"result":True}
        # print("Mandatory categories - {!r}".format(mand_cats)
        # find which categories each of our datanames belongs to
        all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
        missing = set(mand_cats) - set(all_cats)
        if len(missing) > 0:
            return {"result":False,"bad_items":repr(missing)}
        return {"result":True}

    def check_mandatory_items(self,whole_block,default_scope='Item'):
        """Return an error if any mandatory items are missing"""
        if len(self.scopes_mandatory)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
        if len(missing)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":missing}

    def check_prohibited_items(self,whole_block,default_scope='Item'):
        """Return an error if any prohibited items are present"""
        if len(self.scopes_naughty)== 0: return {"result":True}
        if default_scope == 'Datablock':
            return {"result":True}     #is a data file
        scope = whole_block.get('_definition.scope',default_scope)
        if '_dictionary.title' in whole_block:
           scope = 'Dictionary'
        present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
        if len(present)==0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":present}


    def run_item_validation(self,item_name,item_value):
        return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}

    def run_loop_validation(self,loop_names):
        return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}

    def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
        results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
        return {item_name:results}

    def run_block_validation(self,whole_block,block_scope='Item'):
        results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
        # fix up the return values
        return {"whole_block":results}

    def optimize_on(self):
        self.optimize = True
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

    def optimize_off(self):
        self.optimize = False
        self.done_keys = []
        self.done_children = []
        self.done_parents = []

Ancestors (in MRO)

  • CifDic
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def DDL1_normalise(

self)

def DDL1_normalise(self):
    # switch off block name collision checks
    self.standard = None
    # add default type information in DDL2 style
    # initial types and constructs
    base_types = ["char","numb","null"]
    prim_types = base_types[:]
    base_constructs = [".*",
        '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
        "\"\" "]
    for key,value in self.items():
       newnames = [key]  #keep by default
       if "_name" in value:
           real_name = value["_name"]
           if isinstance(real_name,list):        #looped values
               for looped_name in real_name:
                  new_value = value.copy()
                  new_value["_name"] = looped_name  #only looped name
                  self[looped_name] = new_value
               newnames = real_name
           else:
                  self[real_name] = value
                  newnames = [real_name]
       # delete the old one
       if key not in newnames:
          del self[key]
    # loop again to normalise the contents of each definition
    for key,value in self.items():
       #unlock the block
       save_overwrite = value.overwrite
       value.overwrite = True
       # deal with a missing _list, _type_conditions
       if "_list" not in value: value["_list"] = 'no'
       if "_type_conditions" not in value: value["_type_conditions"] = 'none'
       # deal with enumeration ranges
       if "_enumeration_range" in value:
           max,min = self.getmaxmin(value["_enumeration_range"])
           if min == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
           elif max == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
           else:
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
       #add any type construct information
       if "_type_construct" in value:
           base_types.append(value["_name"]+"_type")   #ie dataname_type
           base_constructs.append(value["_type_construct"]+"$")
           prim_types.append(value["_type"])     #keep a record
           value["_type"] = base_types[-1]   #the new type name
    #make categories conform with ddl2
    #note that we must remove everything from the last underscore
       if value.get("_category",None) == "category_overview":
            last_under = value["_name"].rindex("_")
            catid = value["_name"][1:last_under]
            value["_category.id"] = catid  #remove square bracks
            if catid not in self.cat_list: self.cat_list.append(catid)
       value.overwrite = save_overwrite
    # we now add any missing categories before filling in the rest of the
    # information
    for key,value in self.items():
        #print('processing ddl1 definition %s' % key)
        if "_category" in self[key]:
            if self[key]["_category"] not in self.cat_list:
                # rogue category, add it in
                newcat = self[key]["_category"]
                fake_name = "_" + newcat + "_[]"
                newcatdata = CifBlock()
                newcatdata["_category"] = "category_overview"
                newcatdata["_category.id"] = newcat
                newcatdata["_type"] = "null"
                self[fake_name] = newcatdata
                self.cat_list.append(newcat)
    # write out the type information in DDL2 style
    self.master_block.AddLoopItem((
        ("_item_type_list.code","_item_type_list.construct",
          "_item_type_list.primitive_code"),
        (base_types,base_constructs,prim_types)
        ))

def DDL2_normalise(

self)

def DDL2_normalise(self):
   listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
   # now filter out all the single element lists!
   dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
   for item_def in dodgy_defs:
            # print("DDL2 norm: processing %s" % item_def)
            thisdef = self[item_def]
            packet_no = thisdef['_item.name'].index(item_def)
            realcat = thisdef['_item.category_id'][packet_no]
            realmand = thisdef['_item.mandatory_code'][packet_no]
            # first add in all the missing categories
            # we don't replace the entry in the list corresponding to the
            # current item, as that would wipe out the information we want
            for child_no in range(len(thisdef['_item.name'])):
                if child_no == packet_no: continue
                child_name = thisdef['_item.name'][child_no]
                child_cat = thisdef['_item.category_id'][child_no]
                child_mand = thisdef['_item.mandatory_code'][child_no]
                if child_name not in self:
                    self[child_name] = CifBlock()
                    self[child_name]['_item.name'] = child_name
                self[child_name]['_item.category_id'] = child_cat
                self[child_name]['_item.mandatory_code'] = child_mand
            self[item_def]['_item.name'] = item_def
            self[item_def]['_item.category_id'] = realcat
            self[item_def]['_item.mandatory_code'] = realmand
   target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                                 '_item_linked.parent_name' in self[a]]
   # now dodgy_defs contains all definition blocks with more than one child/parent link
   for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
   for item_def in dodgy_defs:
         print('Processing %s' % item_def)
         thisdef = self[item_def]
         child_list = thisdef['_item_linked.child_name']
         parents = thisdef['_item_linked.parent_name']
         # for each parent, find the list of children.
         family = list(zip(parents,child_list))
         notmychildren = family         #We aim to remove non-children
         # Loop over the parents, relocating as necessary
         while len(notmychildren):
            # get all children of first entry
            mychildren = [a for a in family if a[0]==notmychildren[0][0]]
            print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
            for parent,child in mychildren:   #parent is the same for all
                     # Make sure that we simply add in the new entry for the child, not replace it,
                     # otherwise we might spoil the child entry loop structure
                     try:
                         childloop = self[child].GetLoop('_item_linked.parent_name')
                     except KeyError:
                         print('Creating new parent entry %s for definition %s' % (parent,child))
                         self[child]['_item_linked.parent_name'] = [parent]
                         childloop = self[child].GetLoop('_item_linked.parent_name')
                         childloop.AddLoopItem(('_item_linked.child_name',[child]))
                         continue
                     else:
                         # A parent loop already exists and so will a child loop due to the
                         # call to create_pcloop above
                         pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                         goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                         if len(goodpars)>0:   #no need to add it
                             print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                             continue
                         print('Adding %s to %s entry' % (parent,child))
                         newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                         setattr(newpacket,'_item_linked.child_name',child)
                         setattr(newpacket,'_item_linked.parent_name',parent)
                         childloop.AddPacket(newpacket)
            #
            # Make sure the parent also points to the children.  We get
            # the current entry, then add our
            # new values if they are not there already
            #
            parent_name = mychildren[0][0]
            old_children = self[parent_name].get('_item_linked.child_name',[])
            old_parents = self[parent_name].get('_item_linked.parent_name',[])
            oldfamily = zip(old_parents,old_children)
            newfamily = []
            print('Old parents -> %s' % repr(old_parents))
            for jj, childname in mychildren:
                alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
                if len(alreadythere)>0: continue
                'Adding new child %s to parent definition at %s' % (childname,parent_name)
                old_children.append(childname)
                old_parents.append(parent_name)
            # Now output the loop, blowing away previous definitions.  If there is something
            # else in this category, we are destroying it.
            newloop = CifLoopBlock(dimension=1)
            newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
            newloop.AddLoopItem(('_item_linked.child_name',old_children))
            del self[parent_name]['_item_linked.parent_name']
            del self[parent_name]['_item_linked.child_name']
            self[parent_name].insert_loop(newloop)
            print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
            # now make a new,smaller list
            notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]
   # now flatten any single element lists
   single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
   for flat_def in single_defs:
       flat_keys = self[flat_def].GetLoop('_item.name').keys()
       for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
   # now deal with the multiple lists
   # next we do aliases
   all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
   for aliased in all_aliases:
      my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
      for alias in my_aliases:
          self[alias] = self[aliased].copy()   #we are going to delete stuff...
          del self[alias]["_item_aliases.alias_name"]

def NewBlock(

self, *args, **kwargs)

def NewBlock(self,*args,**kwargs):
    newname = super(CifDic,self).NewBlock(*args,**kwargs)
    try:
        self.block_id_table[self[newname]['_definition.id']]=newname
    except AttributeError: #no block_id table
        pass

def SetTemplate(

self, template_file)

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, **kwargs)

def WriteOut(self,**kwargs):
    myblockorder = self.get_full_child_list()
    self.set_grammar(self.grammar)
    self.standard = 'Dic'
    return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

class CifError

class CifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Format error: '+ self.value

Ancestors (in MRO)

  • CifError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class CifFile

class CifFile(StarFile.StarFile):
    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
        self.strict = strict
        self.header_comment = \
"""
##########################################################################
#               Crystallographic Information Format file
#               Produced by PyCifRW module
#
#  This is a CIF file.  CIF has been adopted by the International
#  Union of Crystallography as the standard for data archiving and
#  transmission.
#
#  For information on this file format, follow the CIF links at
#  http://www.iucr.org
##########################################################################
"""

Ancestors (in MRO)

  • CifFile
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

def SetTemplate(

self, template_file)

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class CifLoopBlock

class CifLoopBlock(StarFile.LoopBlock):
    def __init__(self,data=(),**kwargs):
        super(CifLoopBlock,self).__init__(data,**kwargs)

Ancestors (in MRO)

  • CifLoopBlock
  • CifFile.StarFile.LoopBlock
  • __builtin__.object

Methods

def AddPacket(

self, packet)

def AddPacket(self,packet):
    for myitem in self.parent_block.loops[self.loop_no]:
        old_values = self.parent_block[myitem]
        old_values.append(packet.__getattribute__(myitem))
        self.parent_block[myitem] = old_values

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Change the position at which itemname appears when printing out to newpos.

def ChangeItemOrder(self,itemname,newpos):
    """Change the position at which `itemname` appears when printing out to `newpos`."""
    self.parent_block.loops[self.loop_no].remove(itemname.lower())
    self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

def GetItemOrder(

self)

Return a list of datanames in this LoopBlock in the order that they will be printed

def GetItemOrder(self):
    """Return a list of datanames in this `LoopBlock` in the order that they will be
    printed"""
    return self.parent_block.loops[self.loop_no][:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetPacket(

self, index)

def GetPacket(self,index):
    thispack = StarPacket([])
    for myitem in self.parent_block.loops[self.loop_no]:
        thispack.append(self[myitem][index])
        setattr(thispack,myitem,thispack[-1])
    return thispack

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

class CifRecursionError

class CifRecursionError(Exception):
    def __init__(self,key_value,call_stack):
        self.key_value = key_value
        self.call_stack = call_stack
    def __str__(self):
        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))

Ancestors (in MRO)

class DicBlock

A definition block within a dictionary, which allows imports to be transparently followed

class DicBlock(StarFile.StarBlock):
    """A definition block within a dictionary, which allows imports
    to be transparently followed"""

    def __init__(self,*args,**kwargs):
        super(DicBlock,self).__init__(*args,**kwargs)
        self._import_cache = {}
        
    def __getitem__(self,dataname):
        value = None
        if super(DicBlock,self).has_key("_import.get") and self._import_cache:
            value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) 
        try:
            final_value = super(DicBlock,self).__getitem__(dataname)
        except KeyError:    #not there
            final_value = value
        if final_value is None:
            raise KeyError("%s not found" % dataname)
        return final_value

    def has_key(self,key):
        try:
            self[key]
        except KeyError:
            return False
        return True
    
    def add_dict_cache(self,name,cached):
        """Add a loaded dictionary to this block's cache"""
        self._import_cache[name]=cached
        
    def follow_import(self,import_info,dataname):
        """Find the dataname values from the imported dictionary. `import_info`
        is a list of import locations"""
        latest_value = None
        for import_ref in import_info:
            file_loc = import_ref["file"]
            if file_loc not in self._import_cache:
                raise ValueError("Dictionary for import %s not found" % file_loc)
            import_from = self._import_cache[file_loc]
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                    raise CifError('Import frame %s not found in %s' % (target_key,file_loc))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if mode == "contents":   #only this is used at this level
                latest_value = import_target.get(dataname,latest_value)
        return latest_value

Ancestors (in MRO)

  • DicBlock
  • CifFile.StarFile.StarBlock
  • __builtin__.object

Methods

def AddItem(

self, key, value, precheck=False)

Add dataname key to block with value value. value may be a single value, a list or a tuple. If precheck is False (the default), all values will be checked and converted to unicode strings as necessary. If precheck is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.

def AddItem(self,key,value,precheck=False):
    """Add dataname `key` to block with value `value`.  `value` may be
    a single value, a list or a tuple. If `precheck` is False (the default),
    all values will be checked and converted to unicode strings as necessary. If
    `precheck` is True, this checking is bypassed.  No checking is necessary
    when values are read from a CIF file as they are already in correct form."""
    if not isinstance(key,(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
    key = unicode(key)    #everything is unicode internally
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
    # check for overwriting
    if key in self:
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    if not precheck:   #need to sanitise
        regval,empty_val = self.regularise_data(value)
        pure_string = check_stringiness(regval)
        self.check_item_value(regval)
    else:
        regval,empty_val = value,None
        pure_string = True
    # update ancillary information first
    lower_key = key.lower()
    if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    try:
        del self.true_case[lower_key]
    except KeyError:
        pass
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class ValidCifBlock

A CifBlock that is valid with respect to a given CIF dictionary. Methods of CifBlock are overridden where necessary to disallow addition of invalid items to the CifBlock.

Initialisation

  • dic is a CifDic object to be used for validation.
class ValidCifBlock(CifBlock):
    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
    `CifBlock`.

    ## Initialisation

    * `dic` is a `CifDic` object to be used for validation.

    """
    def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
        CifBlock.__init__(self,*args,**kwords)
        if dic and diclist:
            print("Warning: diclist argument ignored when initialising ValidCifBlock")
        if isinstance(dic,CifDic):
            self.fulldic = dic
        else:
            raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
        if len(diclist)==0 and not dic:
            raise ValidCifError( "At least one dictionary must be specified")
        if diclist and not dic:
            self.fulldic = merge_dic(diclist,mergemode)
        if not self.run_data_checks()[0]:
            raise ValidCifError( self.report())

    def run_data_checks(self,verbose=False):
        self.v_result = {}
        self.fulldic.optimize_on()
        for dataname in self.keys():
            update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
            update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
        for loop_names in self.loops.values():
            update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
        # now run block-level checks
        update_value(self.v_result,self.fulldic.run_block_validation(self))
        # return false and list of baddies if anything didn't match
        self.fulldic.optimize_off()
        all_keys = list(self.v_result.keys()) #dictionary will change
        for test_key in all_keys:
            #print("%s: %r" % (test_key, self.v_result[test_key]))
            self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
            if len(self.v_result[test_key]) == 0:
                del self.v_result[test_key]
        isvalid = len(self.v_result)==0
        #if not isvalid:
        #    print("Baddies: {!r}".format(self.v_result))
        return isvalid,self.v_result

    def single_item_check(self,item_name,item_value):
        #self.match_single_item(item_name)
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_item_validation(item_name,item_value)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def loop_item_check(self,loop_names):
        in_dic_names = list([a for a in loop_names if a in self.fulldic])
        if len(in_dic_names)==0:
            result = {loop_names[0]:[]}
        else:
            result = self.fulldic.run_loop_validation(in_dic_names)
        baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
        return isvalid,baddies

    def global_item_check(self,item_name,item_value,provisional_items={}):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_global_validation(item_name,
               item_value,self,provisional_items = provisional_items)
        baddies = list([a for a in result[item_name] if a[1]["result"] is False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def remove_global_item_check(self,item_name):
        if item_name not in self.fulldic:
            result = {item_name:[]}
        else:
            result = self.fulldic.run_remove_global_validation(item_name,self,False)
        baddies = list([a for a in result[item_name] if a[1]["result"]==False])
        # if even one false one is found, this should trigger
        isvalid = (len(baddies) == 0)
        # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
        return isvalid,baddies

    def AddToLoop(self,dataname,loopdata):
        # single item checks
        paired_data = loopdata.items()
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems)
        # loop item checks; merge with current loop
        found = 0
        for aloop in self.block["loops"]:
            if dataname in aloop:
                loopnames = aloop.keys()
                for new_name in loopdata.keys():
                    if new_name not in loopnames: loopnames.append(new_name)
                valid,problems = self.looped_item_check(loopnames)
                self.report_if_invalid(valid,problems)
        prov_dict = loopdata.copy()
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems)
        CifBlock.AddToLoop(self,dataname,loopdata)

    def AddCifItem(self,data):
        if isinstance(data[0],(unicode,str)):   # single item
            valid,problems = self.single_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
            valid,problems = self.global_item_check(data[0],data[1])
            self.report_if_invalid(valid,problems,data[0])
        elif isinstance(data[0],tuple) or isinstance(data[0],list):
            paired_data = list(zip(data[0],data[1]))
            for name,value in paired_data:
                valid,problems = self.single_item_check(name,value)
                self.report_if_invalid(valid,problems,name)
            valid,problems = self.loop_item_check(data[0])
            self.report_if_invalid(valid,problems,data[0])
            prov_dict = {}            # for storing temporary items
            for name,value in paired_data: prov_dict[name]=value
            for name,value in paired_data:
                del prov_dict[name]   # remove temporarily
                valid,problems = self.global_item_check(name,value,prov_dict)
                prov_dict[name] = value  # add back in
                self.report_if_invalid(valid,problems,name)
        else:
            raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
        super(ValidCifBlock,self).AddCifItem(data)

    def AddItem(self,key,value,**kwargs):
        """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
        valid,problems = self.single_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        valid,problems = self.global_item_check(key,value)
        self.report_if_invalid(valid,problems,key)
        super(ValidCifBlock,self).AddItem(key,value,**kwargs)

    # utility function
    def report_if_invalid(self,valid,bad_list,data_name):
        if not valid:
            bad_tests = [a[0] for a in bad_list]
            error_string = ",".join(bad_tests)
            error_string = repr(data_name) + " fails following validity checks: "  + error_string
            raise ValidCifError( error_string)

    def __delitem__(self,key):
        # we don't need to run single item checks; we do need to run loop and
        # global checks.
        if key in self:
            try:
                loop_items = self.GetLoop(key)
            except TypeError:
                loop_items = []
            if loop_items:             #need to check loop conformance
                loop_names = [a[0] for a in loop_items if a[0] != key]
                valid,problems = self.loop_item_check(loop_names)
                self.report_if_invalid(valid,problems)
            valid,problems = self.remove_global_item_check(key)
            self.report_if_invalid(valid,problems)
        self.RemoveCifItem(key)


    def report(self):
       outstr = StringIO()
       outstr.write( "Validation results\n")
       outstr.write( "------------------\n")
       print("%d invalid items found\n" % len(self.v_result))
       for item_name,val_func_list in self.v_result.items():
           outstr.write("%s fails following tests:\n" % item_name)
           for val_func in val_func_list:
               outstr.write("\t%s\n")
       return outstr.getvalue()

Ancestors (in MRO)

Methods

def AddCifItem(

self, data)

Inheritance: CifBlock.AddCifItem

DEPRECATED. Use AddItem instead.

def AddCifItem(self,data):
    if isinstance(data[0],(unicode,str)):   # single item
        valid,problems = self.single_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
        valid,problems = self.global_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
    elif isinstance(data[0],tuple) or isinstance(data[0],list):
        paired_data = list(zip(data[0],data[1]))
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems,name)
        valid,problems = self.loop_item_check(data[0])
        self.report_if_invalid(valid,problems,data[0])
        prov_dict = {}            # for storing temporary items
        for name,value in paired_data: prov_dict[name]=value
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems,name)
    else:
        raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
    super(ValidCifBlock,self).AddCifItem(data)

def AddItem(

self, key, value, **kwargs)

Inheritance: CifBlock.AddItem

Set value of dataname key to value after checking for conformance with CIF dictionary

def AddItem(self,key,value,**kwargs):
    """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
    valid,problems = self.single_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    valid,problems = self.global_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    super(ValidCifBlock,self).AddItem(key,value,**kwargs)

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Inheritance: CifBlock.AddLoopItem

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Inheritance: CifBlock.AddLoopName

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddSingleCifItem(

self, key, value)

Inheritance: CifBlock.AddSingleCifItem

Deprecated. Use AddItem instead

def AddSingleCifItem(self,key,value):
    """*Deprecated*. Use `AddItem` instead"""
    """Add a single data item. If it is part of a loop, a separate call should be made"""
    self.AddItem(key,value)

def AddToLoop(

self, dataname, loopdata)

Inheritance: CifBlock.AddToLoop

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    # single item checks
    paired_data = loopdata.items()
    for name,value in paired_data:
        valid,problems = self.single_item_check(name,value)
        self.report_if_invalid(valid,problems)
    # loop item checks; merge with current loop
    found = 0
    for aloop in self.block["loops"]:
        if dataname in aloop:
            loopnames = aloop.keys()
            for new_name in loopdata.keys():
                if new_name not in loopnames: loopnames.append(new_name)
            valid,problems = self.looped_item_check(loopnames)
            self.report_if_invalid(valid,problems)
    prov_dict = loopdata.copy()
    for name,value in paired_data:
        del prov_dict[name]   # remove temporarily
        valid,problems = self.global_item_check(name,value,prov_dict)
        prov_dict[name] = value  # add back in
        self.report_if_invalid(valid,problems)
    CifBlock.AddToLoop(self,dataname,loopdata)

def ChangeItemOrder(

self, itemname, newpos)

Inheritance: CifBlock.ChangeItemOrder

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Inheritance: CifBlock.CreateLoop

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Inheritance: CifBlock.FindLoop

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Inheritance: CifBlock.GetCompoundKeyedPacket

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Inheritance: CifBlock.GetFullItemValue

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Inheritance: CifBlock.GetItemOrder

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

Inheritance: CifBlock.GetItemPosition

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Inheritance: CifBlock.GetItemValue

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Inheritance: CifBlock.GetKeyedPacket

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Inheritance: CifBlock.GetKeyedSemanticPacket

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Inheritance: CifBlock.GetLoop

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Inheritance: CifBlock.GetLoopNames

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Inheritance: CifBlock.GetMultiKeyedSemanticPacket

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveCifItem(

self, itemname)

Inheritance: CifBlock.RemoveCifItem

Remove itemname from the CifBlock

def RemoveCifItem(self,itemname):
    """Remove `itemname` from the CifBlock"""
    self.RemoveItem(itemname)

def RemoveItem(

self, itemname)

Inheritance: CifBlock.RemoveItem

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Inheritance: CifBlock.RemoveKeyedPacket

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Inheritance: CifBlock.RemoveLoopItem

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Inheritance: CifBlock.SetOutputLength

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class ValidCifError

class ValidCifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Validity error: ' + self.value

Ancestors (in MRO)

  • ValidCifError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class ValidCifFile

A CIF file for which all datablocks are valid. Argument dic to initialisation specifies a CifDic object to use for validation.

class ValidCifFile(CifFile):
    """A CIF file for which all datablocks are valid.  Argument `dic` to
    initialisation specifies a `CifDic` object to use for validation."""
    def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
        if not diclist and not dic and not hasattr(self,'bigdic'):
            raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
        if not dic and diclist:     #merge here for speed
            self.bigdic = merge_dic(diclist,mergemode)
        elif dic and not diclist:
            self.bigdic = dic
        CifFile.__init__(self,*args,**kwargs)
        for blockname in self.keys():
            self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)

    def NewBlock(self,blockname,blockcontents,**kwargs):
        CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
        # dictionary[blockname] is now a CifBlock object.  We
        # turn it into a ValidCifBlock object
        self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                         data=self.dictionary[blockname])

Ancestors (in MRO)

  • ValidCifFile
  • CifFile
  • CifFile.StarFile.StarFile
  • CifFile.StarFile.BlockCollection
  • __builtin__.object

Methods

def NewBlock(

self, blockname, blockcontents, **kwargs)

Inheritance: CifFile.NewBlock

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents,**kwargs):
    CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
    # dictionary[blockname] is now a CifBlock object.  We
    # turn it into a ValidCifBlock object
    self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                     data=self.dictionary[blockname])

def SetTemplate(

self, template_file)

Inheritance: CifFile.SetTemplate

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Inheritance: CifFile.WriteOut

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class ValidationResult

Represents validation result. It is initialised with

class ValidationResult:
    """Represents validation result. It is initialised with """
    def __init__(self,results):
        """results is return value of validate function"""
        self.valid_result, self.no_matches = results

    def report(self,use_html):
        """Return string with human-readable description of validation result"""
        return validate_report((self.valid_result, self.no_matches),use_html)

    def is_valid(self,block_name=None):
        """Return True for valid CIF file, otherwise False"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.valid_result.iterkeys()
        for block_name in block_names:
            if not self.valid_result[block_name] == (True,{}):
                valid = False
                break
            else:
                valid = True
        return valid

    def has_no_match_items(self,block_name=None):
        """Return true if some items are not found in dictionary"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.no_matches.iter_keys()
        for block_name in block_names:
            if self.no_matches[block_name]:
                has_no_match_items = True
                break
            else:
                has_no_match_items = False
        return has_no_match_items

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/StarFile.m.html000066400000000000000000043210601345362224200201400ustar00rootroot00000000000000 CifFile.StarFile API documentation Top

CifFile.StarFile module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""


import sys

# Python 2,3 compatibility
try:
    from urllib import urlopen         # for arbitrary opening
    from urlparse import urlparse, urlunparse
except:
    from urllib.request import urlopen
    from urllib.parse import urlparse,urlunparse
import re,os
import textwrap

try:
    from StringIO import StringIO #not cStringIO as we cannot subclass
except ImportError:
    from io import StringIO

if isinstance(u"abc",str):   #Python 3
    unicode = str

try:
    import numpy
    have_numpy = True
except ImportError:
    have_numpy = False

class StarList(list):
    def __getitem__(self,args):
        if isinstance(args,(int,slice)):
            return super(StarList,self).__getitem__(args)
        elif isinstance(args,tuple) and len(args)>1:   #extended comma notation
            return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:])
        else:
            return super(StarList,self).__getitem__(args[0])

    def __str__(self):
        return "SL("+super(StarList,self).__str__() + ")"

class StarDict(dict):
    pass


class LoopBlock(object):
    def __init__(self,parent_block,dataname):
        self.loop_no = parent_block.FindLoop(dataname)
        if self.loop_no < 0:
            raise KeyError('%s is not in a loop structure' % dataname)
        self.parent_block = parent_block

    def keys(self):
        return self.parent_block.loops[self.loop_no]

    def values(self):
        return [self.parent_block[a] for a in self.keys()]

    #Avoid iterator even though that is Python3-esque
    def items(self):
        return list(zip(self.keys(),self.values()))

    def __getitem__(self,dataname):
        if isinstance(dataname,int):   #a packet request
            return self.GetPacket(dataname)
        if dataname in self.keys():
            return self.parent_block[dataname]
        else:
            raise KeyError('%s not in loop block' % dataname)

    def __setitem__(self,dataname,value):
        self.parent_block[dataname] = value
        self.parent_block.AddLoopName(self.keys()[0],dataname)

    def __contains__(self,key):
        return key in self.parent_block.loops[self.loop_no]

    def has_key(self,key):
        return key in self

    def __iter__(self):
        packet_list = zip(*self.values())
        names = self.keys()
        for p in packet_list:
            r = StarPacket(p)
            for n in range(len(names)):
                setattr(r,names[n].lower(),r[n])
            yield r

    # for compatibility
    def __getattr__(self,attname):
        return getattr(self.parent_block,attname)

    def load_iter(self,coords=[]):
        count = 0        #to create packet index
        while not self.popout:
            # ok, we have a new packet:  append a list to our subloops
            for aloop in self.loops:
                aloop.new_enclosing_packet()
            for iname in self.item_order:
                if isinstance(iname,LoopBlock):       #into a nested loop
                    for subitems in iname.load_iter(coords=coords+[count]):
                        # print 'Yielding %s' % `subitems`
                        yield subitems
                    # print 'End of internal loop'
                else:
                    if self.dimension == 0:
                        # print 'Yielding %s' % `self[iname]`
                        yield self,self[iname]
                    else:
                        backval = self.block[iname]
                        for i in range(len(coords)):
                           # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                           backval = backval[coords[i]]
                        yield self,backval
            count = count + 1      # count packets
        self.popout = False        # reinitialise
        # print 'Finished iterating'
        yield self,'###Blank###'     #this value should never be used

    # an experimental fast iterator for level-1 loops (ie CIF)
    def fast_load_iter(self):
        targets = map(lambda a:self.block[a],self.item_order)
        while targets:
            for target in targets:
                yield self,target

    # Add another list of the required shape to take into account a new outer packet
    def new_enclosing_packet(self):
        if self.dimension > 1:      #otherwise have a top-level list
            for iname in self.keys():  #includes lower levels
                target_list = self[iname]
                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of...
                    target_list = target_list[-1]
                target_list.append([])
                # print '%s now %s' % (iname,`self[iname]`)

    def recursive_iter(self,dict_so_far={},coord=[]):
        # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
        my_length = 0
        top_items = self.block.items()
        top_values = self.block.values()       #same order as items
        drill_values = self.block.values()
        for dimup in range(0,self.dimension):  #look higher in the tree
            if len(drill_values)>0:            #this block has values
                drill_values=drill_values[0]   #drill in
            else:
                raise StarError("Malformed loop packet %s" % repr( top_items[0] ))
        my_length = len(drill_values[0])       #length of 'string' entry
        if self.dimension == 0:                #top level
            for aloop in self.loops:
                for apacket in aloop.recursive_iter():
                    # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) )
                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
                    for name,value in top_items + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
        else:                                  #in some loop
            for i in range(my_length):
                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
                kvvals = map(lambda a:a[1],kvpairs)   #just values
                # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs ))
                if self.loops:
                  for aloop in self.loops:
                    for apacket in aloop.recursive_iter(coord=coord+[i]):
                        # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) )
                        prep_yield = StarPacket(kvvals+apacket.values())
                        for name,value in kvpairs + apacket.items():
                            setattr(prep_yield,name,value)
                        yield prep_yield
                else:           # we're at the bottom of the tree
                    # print "Recursive yielding %s" % repr( dict(kvpairs) )
                    prep_yield = StarPacket(kvvals)
                    for name,value in kvpairs:
                        setattr(prep_yield,name,value)
                    yield prep_yield

    # small function to use the coordinates.
    def coord_to_group(self,dataname,coords):
          if not isinstance(dataname,unicode):
             return dataname     # flag inner loop processing
          newm = self[dataname]          # newm must be a list or tuple
          for c in coords:
              # print "Coord_to_group: %s ->" % (repr( newm )),
              newm = newm[c]
              # print repr( newm )
          return newm

    def flat_iterator(self):
            my_length = 0
            top_keys = self.block.keys()
            if len(top_keys)>0:
                my_length = len(self.block[top_keys[0]])
            for pack_no in range(my_length):
                yield(self.collapse(pack_no))


    def RemoveItem(self,itemname):
        """Remove `itemname` from the block."""
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if testkey in self:
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)

    def RemoveLoopItem(self,itemname):
        """*Deprecated*. Use `RemoveItem` instead"""
        self.RemoveItem(itemname)

    def GetLoop(self,keyname):
        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
        `keyname` is only significant as a way to specify the loop."""
        return LoopBlock(self,keyname)

    def GetPacket(self,index):
        thispack = StarPacket([])
        for myitem in self.parent_block.loops[self.loop_no]:
            thispack.append(self[myitem][index])
            setattr(thispack,myitem,thispack[-1])
        return thispack

    def AddPacket(self,packet):
        for myitem in self.parent_block.loops[self.loop_no]:
            old_values = self.parent_block[myitem]
            old_values.append(packet.__getattribute__(myitem))
            self.parent_block[myitem] = old_values

    def GetItemOrder(self):
        """Return a list of datanames in this `LoopBlock` in the order that they will be
        printed"""
        return self.parent_block.loops[self.loop_no][:]


    def ChangeItemOrder(self,itemname,newpos):
        """Change the position at which `itemname` appears when printing out to `newpos`."""
        self.parent_block.loops[self.loop_no].remove(itemname.lower())
        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

    def GetItemPosition(self,itemname):
        """A utility function to get the numerical order in the printout
        of `itemname`.  An item has coordinate `(loop_no,pos)` with
        the top level having a `loop_no` of -1.  If an integer is passed to
        the routine then it will return the position of the loop
        referenced by that number."""
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not itemname in self:
            raise ValueError('No such dataname %s' % itemname)
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try:
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError('Item does not exist')

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with `keyname`"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError('%s is not in any loop' % keyname)

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue

    def AddToLoop(self,dataname,loopdata):
        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.

        Add multiple columns to the loop containing `dataname`. `loopdata` is a
        collection of (key,value) pairs, where `key` is the new dataname and `value`
        is a list of values for that dataname"""
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
               % (repr( bad_vals ),loop_len))
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()


class StarBlock(object):
    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
                 characterset='ascii',maxnamelength=-1):
        self.block = {}    #the actual data storage (lower case keys)
        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
        self.item_order = []  #lower case, loops referenced by integer
        self.formatting_hints = {}
        self.true_case = {} #transform lower case to supplied case
        self.provide_value = False  #prefer string version always
        self.dictionary = None      #DDLm dictionary
        self.popout = False         #used during load iteration
        self.curitem = -1           #used during iteration
        self.cache_vals = True      #store all calculated values
        self.maxoutlength = maxoutlength
        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
        self.set_characterset(characterset)   #to check input names
        self.wraplength = wraplength
        self.overwrite = overwrite
        self.string_delimiters = ["'",'"',"\n;"]   #universal CIF set
        self.list_delimiter = "  "                 #CIF2 default
        self.wrapper = textwrap.TextWrapper()
        if isinstance(data,(tuple,list)):
            for item in data:
                self.AddLoopItem(item)
        elif isinstance(data,StarBlock):
            self.block = data.block.copy()
            self.item_order = data.item_order[:]
            self.true_case = data.true_case.copy()
            # loops as well
            self.loops = data.loops.copy()

    def setmaxnamelength(self,maxlength):
        """Set the maximum allowable dataname length (-1 for no check)"""
        self.maxnamelength = maxlength
        if maxlength > 0:
            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
            if len(bad_names)>0:
                raise StarError('Datanames too long: ' + repr( bad_names ))

    def set_characterset(self,characterset):
        """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
        self.characterset = characterset
        if characterset == 'ascii':
            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
        elif characterset == 'unicode':
            if sys.maxunicode < 1114111:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
            else:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)

    def __str__(self):
        return self.printsection()

    def __setitem__(self,key,value):
        if key == "saves":
            raise StarError("""Setting the saves key is deprecated. Add the save block to
    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
        self.AddItem(key,value)

    def __getitem__(self,key):
        if key == "saves":
            raise StarError("""The saves key is deprecated. Access the save block from
    the enclosing block collection (e.g. CIF or STAR file object)""")
        try:
           rawitem,is_value = self.GetFullItemValue(key)
        except KeyError:
           if self.dictionary:
               # send the dictionary the required key and a pointer to us
               try:
                   new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False)
               except StarDerivationFailure:   #try now with defaults included
                   try:
                       new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True)
                   except StarDerivationFailure as s:
                       print("In StarBlock.__getitem__, " + repr(s))
                       raise KeyError('No such item: %s' % key)
               print('Set %s to derived value %s' % (key, repr(new_value)))
               return new_value
           else:
               raise KeyError('No such item: %s' % key)
        # we now have an item, we can try to convert it to a number if that is appropriate
        # note numpy values are never stored but are converted to lists
        if not self.dictionary or not key in self.dictionary: return rawitem
        print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem )))
        if is_value:
            if self.provide_value: return rawitem
            else:
               print('Turning %s into string' % repr( rawitem ))
               return self.convert_to_string(key)
        else:    # a string
            if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \
                                      (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)):
                return self.dictionary.change_type(key,rawitem)
            elif self.provide_value: # catch the question marks
                do_calculate = False
                if isinstance(rawitem,(list,tuple)):
                    known = [a for a in rawitem if a != '?']
                    if len(known) == 0:   #all questions
                        do_calculate = True
                elif rawitem == '?':
                        do_calculate = True
                if do_calculate:
                   # remove old value
                   del self[key]
                   try:
                       new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False)
                   except StarDerivationFailure as s:
                       try:
                           new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True)
                       except StarDerivationFailure as s:

                           print("Could not turn %s into a value:" + repr(s))
                           return rawitem
                   else:
                       print('Set %s to derived value %s' % (key, repr( new_value )))
                       return new_value
            return rawitem   #can't do anything

    def __delitem__(self,key):
        self.RemoveItem(key)

    def __len__(self):
        blen = len(self.block)
        return blen

    def __nonzero__(self):
        if self.__len__() > 0: return 1
        return 0

    # keys returns all internal keys
    def keys(self):
        return list(self.block.keys())    #always lower case

    def values(self):
        return [self[a] for a in self.keys()]

    def items(self):
        return list(zip(self.keys(),self.values()))

    def __contains__(self,key):
        if isinstance(key,(unicode,str)) and key.lower() in self.keys():
            return True
        return False

    def has_key(self,key):
        return key in self

    def has_key_or_alias(self,key):
        """Check if a dataname or alias is available in the block"""
        initial_test = key in self
        if initial_test: return True
        elif self.dictionary:
            aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)]
            if len(aliases)>0:
               return True
        return False

    def get(self,key,default=None):
        if key in self:
            retval = self.__getitem__(key)
        else:
            retval = default
        return retval

    def clear(self):
        self.block = {}
        self.loops = {}
        self.item_order = []
        self.true_case = {}

    # doesn't appear to work
    def copy(self):
        newcopy = StarBlock()
        newcopy.block = self.block.copy()
        newcopy.loops = []
        newcopy.item_order = self.item_order[:]
        newcopy.true_case = self.true_case.copy()
        newcopy.loops = self.loops.copy()
    #    return self.copy.im_class(newcopy)   #catch inheritance
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self.AddItem(key,adict[key])

    def GetItemPosition(self,itemname):
        """A utility function to get the numerical order in the printout
        of `itemname`.  An item has coordinate `(loop_no,pos)` with
        the top level having a `loop_no` of -1.  If an integer is passed to
        the routine then it will return the position of the loop
        referenced by that number."""
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not itemname in self:
            raise ValueError('No such dataname %s' % itemname)
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def ChangeItemOrder(self,itemname,newpos):
        """Move the printout order of `itemname` to `newpos`. If `itemname` is
        in a loop, `newpos` refers to the order within the loop."""
        if isinstance(itemname,(unicode,str)):
            true_name = itemname.lower()
        else:
            true_name = itemname
        loopno = self.FindLoop(true_name)
        if loopno < 0:  #top level
            self.item_order.remove(true_name)
            self.item_order.insert(newpos,true_name)
        else:
            self.loops[loopno].remove(true_name)
            self.loops[loopno].insert(newpos,true_name)

    def GetItemOrder(self):
        """Return a list of datanames in the order in which they will be printed.  Loops are
        referred to by numerical index"""
        return self.item_order[:]

    def AddItem(self,key,value,precheck=False):
        """Add dataname `key` to block with value `value`.  `value` may be
        a single value, a list or a tuple. If `precheck` is False (the default),
        all values will be checked and converted to unicode strings as necessary. If
        `precheck` is True, this checking is bypassed.  No checking is necessary
        when values are read from a CIF file as they are already in correct form."""
        if not isinstance(key,(unicode,str)):
             raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
        key = unicode(key)    #everything is unicode internally
        if not precheck:
             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
        # check for overwriting
        if key in self:
             if not self.overwrite:
                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
        if not precheck:   #need to sanitise
            regval,empty_val = self.regularise_data(value)
            pure_string = check_stringiness(regval)
            self.check_item_value(regval)
        else:
            regval,empty_val = value,None
            pure_string = True
        # update ancillary information first
        lower_key = key.lower()
        if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
            self.item_order.append(lower_key)
        # always remove from our case table in case the case is different
        try:
            del self.true_case[lower_key]
        except KeyError:
            pass
        self.true_case[lower_key] = key
        if pure_string:
            self.block.update({lower_key:[regval,empty_val]})
        else:
            self.block.update({lower_key:[empty_val,regval]})

    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
        """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
        necessary."""
        # print "Received data %s" % `incomingdata`
        # we accept tuples, strings, lists and dicts!!
        # Direct insertion: we have a string-valued key, with an array
        # of values -> single-item into our loop
        if isinstance(incomingdata[0],(tuple,list)):
           # a whole loop
           keyvallist = zip(incomingdata[0],incomingdata[1])
           for key,value in keyvallist:
               self.AddItem(key,value)
           self.CreateLoop(incomingdata[0])
        elif not isinstance(incomingdata[0],(unicode,str)):
             raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
        else:
            self.AddItem(incomingdata[0],incomingdata[1])

    def check_data_name(self,dataname,maxlength=-1):
        if maxlength > 0:
            self.check_name_length(dataname,maxlength)
        if dataname[0]!='_':
            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
        if self.characterset=='ascii':
            if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
        else:
            # print 'Checking %s for unicode characterset conformance' % dataname
            if len ([a for a in dataname if ord(a) < 33]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
            if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
            if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
            if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
            if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
            if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0:
                print('%s fails' % dataname)
                for a in dataname: print('%x' % ord(a),end="")
                print()
                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

    def check_name_length(self,dataname,maxlength):
        if len(dataname)>maxlength:
            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
        return

    def check_item_value(self,item):
        test_item = item
        if not isinstance(item,(list,dict,tuple)):
           test_item = [item]         #single item list
        def check_one (it):
            if isinstance(it,unicode):
                if it=='': return
                me = self.char_check.match(it)
                if not me:
                    print("Fail value check: %s" % it)
                    raise StarError('Bad character in %s' % it)
                else:
                    if me.span() != (0,len(it)):
                        print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it )))
                        raise StarError('Data item "' + repr( it ) +  u'"... contains forbidden characters')
        [check_one(a) for a in test_item]

    def regularise_data(self,dataitem):
        """Place dataitem into a list if necessary"""
        from numbers import Number
        if isinstance(dataitem,str):
            return unicode(dataitem),None
        if isinstance(dataitem,(Number,unicode,StarList,StarDict)):
            return dataitem,None  #assume StarList/StarDict contain unicode if necessary
        if isinstance(dataitem,(tuple,list)):
            v,s = zip(*list([self.regularise_data(a) for a in dataitem]))
            return list(v),list(s)
            #return dataitem,[None]*len(dataitem)
        # so try to make into a list
        try:
            regval = list(dataitem)
        except TypeError as value:
            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
        v,s = zip(*list([self.regularise_data(a) for a in regval]))
        return list(v),list(s)

    def RemoveItem(self,itemname):
        """Remove `itemname` from the block."""
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if testkey in self:
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)

    def RemoveLoopItem(self,itemname):
        """*Deprecated*. Use `RemoveItem` instead"""
        self.RemoveItem(itemname)

    def GetItemValue(self,itemname):
        """Return value of `itemname`.  If `itemname` is looped, a list
        of all values will be returned."""
        return self.GetFullItemValue(itemname)[0]

    def GetFullItemValue(self,itemname):
        """Return the value associated with `itemname`, and a boolean flagging whether
        (True) or not (False) it is in a form suitable for calculation.  False is
        always returned for strings and `StarList` objects."""
        try:
            s,v = self.block[itemname.lower()]
        except KeyError:
            raise KeyError('Itemname %s not in datablock' % itemname)
        # prefer string value unless all are None
        # are we a looped value?
        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
            if not_none(s):
                return s,False    #a string value
            else:
                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
        elif not_none(s):
            return s,False         #a list of string values
        else:
            if len(v)>0:
                return v,not isinstance(v[0],StarList)
            return v,True

    def CreateLoop(self,datanames,order=-1,length_check=True):
           """Create a loop in the datablock. `datanames` is a list of datanames that
           together form a loop.  If length_check is True, they should have been initialised in the block
           to have the same number of elements (possibly 0). If `order` is given,
           the loop will appear at this position in the block when printing
           out. A loop counts as a single position."""

           if length_check:
               # check lengths: these datanames should exist
               listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
               if len(listed_values) == len(datanames):
                   len_set = set([len(self[a]) for a in datanames])
                   if len(len_set)>1:
                       raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
               elif len(listed_values) != 0:
                   raise ValueError('Request to loop datanames where some are single values and some are not')
           # store as lower case
           lc_datanames = [d.lower() for d in datanames]
           # remove these datanames from all other loops
           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
           # remove empty loops
           empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
           for a in empty_loops:
               self.item_order.remove(a)
               del self.loops[a]
           if len(self.loops)>0:
               loopno = max(self.loops.keys()) + 1
           else:
               loopno = 1
           self.loops[loopno] = list(lc_datanames)
           if order >= 0:
               self.item_order.insert(order,loopno)
           else:
               self.item_order.append(loopno)
           # remove these datanames from item ordering
           self.item_order = [a for a in self.item_order if a not in lc_datanames]

    def AddLoopName(self,oldname, newname):
        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
        error is raised.  If `newname` is in a different loop, it is removed from that loop.
        The number of values associated with `newname` must match the number of values associated
        with all other columns of the new loop or a `ValueError` will be raised."""
        lower_newname = newname.lower()
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError('%s not in loop' % oldname)
        if lower_newname in self.loops[loop_no]:
            return
        # check length
        old_provides = self.provide_value
        self.provide_value = False
        loop_len = len(self[oldname])
        self.provide_value = old_provides
        if len(self[newname]) != loop_len:
            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
        # remove from any other loops
        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(lower_newname)
        # remove from item_order if present
        try:
            self.item_order.remove(lower_newname)
        except ValueError:
            pass

    def FindLoop(self,keyname):
        """Find the loop that contains `keyname` and return its numerical index or
        -1 if not present. The numerical index can be used to refer to the loop in
        other routines."""
        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
        if len(loop_no)>0:
            return loop_no[0]
        else:
            return -1

    def GetLoop(self,keyname):
        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
        `keyname` is only significant as a way to specify the loop."""
        return LoopBlock(self,keyname)

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try:
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError('Item does not exist')

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with `keyname`"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError('%s is not in any loop' % keyname)

    def AddLoopName(self,oldname, newname):
        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
        error is raised.  If `newname` is in a different loop, it is removed from that loop.
        The number of values associated with `newname` must match the number of values associated
        with all other columns of the new loop or a `ValueError` will be raised."""
        lower_newname = newname.lower()
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError('%s not in loop' % oldname)
        if lower_newname in self.loops[loop_no]:
            return
        # check length
        old_provides = self.provide_value
        self.provide_value = False
        loop_len = len(self[oldname])
        self.provide_value = old_provides
        if len(self[newname]) != loop_len:
            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
        # remove from any other loops
        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(lower_newname)
        # remove from item_order if present
        try:
            self.item_order.remove(lower_newname)
        except ValueError:
            pass

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue

    def AddToLoop(self,dataname,loopdata):
        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.

        Add multiple columns to the loop containing `dataname`. `loopdata` is a
        collection of (key,value) pairs, where `key` is the new dataname and `value`
        is a list of values for that dataname"""
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
               % (repr( bad_vals ),loop_len))
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

    def RemoveKeyedPacket(self,keyname,keyvalue):
        """Remove the packet for which dataname `keyname` takes
        value `keyvalue`.  Only the first such occurrence is
        removed."""
        packet_coord = list(self[keyname]).index(keyvalue)
        loopnames = self.GetLoopNames(keyname)
        for dataname in loopnames:
            self.block[dataname][0] = list(self.block[dataname][0])
            del self.block[dataname][0][packet_coord]
            self.block[dataname][1] = list(self.block[dataname][1])
            del self.block[dataname][1][packet_coord]

    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
        """Return the loop packet (a `StarPacket` object) where `keyname` has value
        `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
        is raised if no packet is found or more than one packet is found."""
        my_loop = self.GetLoop(keyname)
        #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
        #print('Packet check on:' + keyname)
        #[print(repr(getattr(a,keyname))) for a in my_loop]
        if no_case:
           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
        else:
           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
        if len(one_pack)!=1:
            raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
        print("Keyed packet: %s" % one_pack[0])
        return one_pack[0]

    def GetCompoundKeyedPacket(self,keydict):
        """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
        in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
        True.  `ValueError` is raised if no packet is found or more than one packet is found."""
        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
        keynames = list(keydict.keys())
        my_loop = self.GetLoop(keynames[0])
        for one_key in keynames:
            keyval,no_case = keydict[one_key]
            if no_case:
               my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
            else:
               my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
        if len(my_loop)!=1:
            raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
        print("Compound keyed packet: %s" % my_loop[0])
        return my_loop[0]

    def GetKeyedSemanticPacket(self,keyvalue,cat_id):
        """Return a complete packet for category `cat_id` where the
        category key for the category equals `keyvalue`.  This routine
        will understand any joined loops, so if separate loops in the
        datafile belong to the
        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
        the returned `StarPacket` object will contain datanames from
        both categories."""
        target_keys = self.dictionary.cat_key_table[cat_id]
        target_keys = [k[0] for k in target_keys] #one only in each list
        p = StarPacket()
        # set case-sensitivity flag
        lcase = False
        if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
            lcase = True
        for cat_key in target_keys:
            try:
                extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except KeyError:        #missing key
                try:
                    test_key = self[cat_key]  #generate key if possible
                    print('Test key is %s' % repr( test_key ))
                    if test_key is not None and\
                    not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                        print('Getting packet for key %s' % repr( keyvalue ))
                        extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
                except:             #cannot be generated
                    continue
            except ValueError:      #none/more than one, assume none
                continue
                #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
            p.merge_packet(extra_packet)
        # the following attributes used to calculate missing values
        for keyname in target_keys:
            if hasattr(p,keyname):
                p.key = [keyname]
                break
        if not hasattr(p,"key"):
            raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p

    def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
        """Return a complete packet for category `cat_id` where the keyvalues are
        provided as a dictionary of key:(value,caseless) pairs
        This routine
        will understand any joined loops, so if separate loops in the
        datafile belong to the
        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
        the returned `StarPacket` object will contain datanames from
        the requested category and any children."""
        #if len(keyvalues)==1:   #simplification
        #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
        target_keys = self.dictionary.cat_key_table[cat_id]
        # update the dictionary passed to us with all equivalents, for
        # simplicity.
        parallel_keys = list(zip(*target_keys))  #transpose
        print('Parallel keys:' + repr(parallel_keys))
        print('Keydict:' + repr(keydict))
        start_keys = list(keydict.keys())
        for one_name in start_keys:
            key_set = [a for a in parallel_keys if one_name in a]
            for one_key in key_set:
                keydict[one_key] = keydict[one_name]
        # target_keys is a list of lists, each of which is a compound key
        p = StarPacket()
        # a little function to return the dataname for a key
        def find_key(key):
            for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
                if self.has_key(one_key):
                    return one_key
            return None
        for one_set in target_keys: #loop down the categories
            true_keys = [find_key(k) for k in one_set]
            true_keys = [k for k in true_keys if k is not None]
            if len(true_keys)==len(one_set):
                truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
                try:
                    extra_packet = self.GetCompoundKeyedPacket(truekeydict)
                except KeyError:     #one or more are missing
                    continue         #should try harder?
                except ValueError:
                    continue
            else:
                continue
            print('Merging packet for keys ' + repr(one_set))
            p.merge_packet(extra_packet)
        # the following attributes used to calculate missing values
        p.key = true_keys
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p


    def set_grammar(self,new_grammar):
        self.string_delimiters = ["'",'"',"\n;",None]
        if new_grammar in ['STAR2','2.0']:
            self.string_delimiters += ['"""',"'''"]
        if new_grammar == '2.0':
            self.list_delimiter = "  "
        elif new_grammar == 'STAR2':
            self.list_delimiter = ", "
        elif new_grammar not in ['1.0','1.1']:
            raise StarError('Request to set unknown grammar %s' % new_grammar)

    def SetOutputLength(self,wraplength=80,maxoutlength=2048):
        """Set the maximum output line length (`maxoutlength`) and the line length to
        wrap at (`wraplength`).  The wrap length is a target only and may not always be
        possible."""
        if wraplength > maxoutlength:
            raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
        self.wraplength = wraplength
        self.maxoutlength = maxoutlength

    def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''):
        self.provide_value = False
        # first make an ordering
        self.create_ordering(finish_at,start_from)  #create self.output_order
        # now do it...
        if not instring:
            outstring = CIFStringIO(target_width=80)       # the returned string
        else:
            outstring = instring
        # print block delimiter
        outstring.write(blockstart,canbreak=True)
        while len(self.output_order)>0:
           #print "Remaining to output " + `self.output_order`
           itemname = self.output_order.pop(0)
           if not isinstance(itemname,int):  #no loop
                   item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()]
                   if len(item_spec)>0:
                       item_spec = item_spec[0]
                       col_pos = item_spec.get('column',-1)
                       name_pos = item_spec.get('name_pos',-1)
                   else:
                       col_pos = -1
                       item_spec = {}
                       name_pos = -1
                   if col_pos < 0: col_pos = 40
                   outstring.set_tab(col_pos)
                   itemvalue = self[itemname]
                   outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos)
                   outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)    #space after itemname
                   self.format_value(itemvalue,outstring,hints=item_spec)
           else:# we are asked to print a loop block
                    outstring.set_tab(10)       #guess this is OK?
                    loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop']
                    if loop_spec:
                        loop_indent = max(loop_spec[0],0)
                    else:
                        loop_indent = indent
                    outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent)
                    self.format_names(outstring,indent+2,loop_no=itemname)
                    self.format_packets(outstring,indent+2,loop_no=itemname)
        else:
            returnstring = outstring.getvalue()
        outstring.close()
        return returnstring

    def format_names(self,outstring,indent=0,loop_no=-1):
        """Print datanames from `loop_no` one per line"""
        temp_order = self.loops[loop_no][:]   #copy
        format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order])
        while len(temp_order)>0:
            itemname = temp_order.pop(0)
            req_indent = format_hints.get(itemname,{}).get('name_pos',indent)
            outstring.write(' ' * req_indent,do_tab=False)
            outstring.write(self.true_case[itemname],do_tab=False)
            outstring.write("\n",do_tab=False)

    def format_packets(self,outstring,indent=0,loop_no=-1):
       alldata = [self[a] for a in self.loops[loop_no]]
       loopnames = self.loops[loop_no]
       #print 'Alldata: %s' % `alldata`
       packet_data = list(zip(*alldata))
       #print 'Packet data: %s' % `packet_data`
       #create a dictionary for quick lookup of formatting requirements
       format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames])
       for position in range(len(packet_data)):
           if position > 0:
               outstring.write("\n")    #new line each packet except first
           for point in range(len(packet_data[position])):
               datapoint = packet_data[position][point]
               format_hint = format_hints.get(loopnames[point],{})
               packstring = self.format_packet_item(datapoint,indent,outstring,format_hint)
               outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)

    def format_packet_item(self,pack_item,indent,outstring,format_hint):
           # print 'Formatting %s' % `pack_item`
           # temporary check for any non-unicode items
           if isinstance(pack_item,str) and not isinstance(pack_item,unicode):
               raise StarError("Item {0!r} is not unicode".format(pack_item))
           if isinstance(pack_item,unicode):
               delimiter = format_hint.get('delimiter',None)
               startcol = format_hint.get('column',-1)
               outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol)
           else:
               self.format_value(pack_item,outstring,hints = format_hint)

    def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}):
        if hints.get("reformat",False) and "\n" in instring:
            instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"])
        allowed_delimiters = set(self.string_delimiters)
        if len(instring)==0: allowed_delimiters.difference_update([None])
        if len(instring) > (self.maxoutlength-2) or '\n' in instring:
                allowed_delimiters.intersection_update(["\n;","'''",'"""'])
        if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring:
                allowed_delimiters.difference_update([None])
        if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'):
                allowed_delimiters.difference_update([None])
        if len(instring)>5 and instring[:6].lower()=='global':
                allowed_delimiters.difference_update([None])
        if '"' in instring: allowed_delimiters.difference_update(['"',None])
        if "'" in instring: allowed_delimiters.difference_update(["'",None])
        out_delimiter = "\n;"  #default (most conservative)
        if delimiter in allowed_delimiters:
            out_delimiter = delimiter
        elif "'" in allowed_delimiters: out_delimiter = "'"
        elif '"' in allowed_delimiters: out_delimiter = '"'
        if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
        elif out_delimiter is None: return instring
        # we are left with semicolon strings
        # use our protocols:
        maxlinelength = max([len(a) for a in instring.split('\n')])
        if maxlinelength > self.maxoutlength:
            protocol_string = apply_line_folding(instring)
        else:
            protocol_string = instring
        # now check for embedded delimiters
        if "\n;" in protocol_string:
            prefix = "CIF:"
            while prefix in protocol_string: prefix = prefix + ":"
            protocol_string = apply_line_prefix(protocol_string,prefix+"> ")
        return "\n;" + protocol_string + "\n;"

    def format_value(self,itemvalue,stringsink,compound=False,hints={}):
        """Format a Star data value"""
        global have_numpy
        delimiter = hints.get('delimiter',None)
        startcol = hints.get('column',-1)
        if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed
            raise StarError("Non-unicode value {0} found in block".format(itemvalue))
        if isinstance(itemvalue,unicode):  #need to sanitize
            stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol)
        elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy
           stringsink.set_tab(0)
           stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol)
           if len(itemvalue)>0:
               self.format_value(itemvalue[0],stringsink)
               for listval in itemvalue[1:]:
                  # print 'Formatting %s' % `listval`
                  stringsink.write(self.list_delimiter,do_tab=False)
                  self.format_value(listval,stringsink,compound=True)
           stringsink.write(']',unindent=True)
        elif isinstance(itemvalue,dict):
           stringsink.set_tab(0)
           stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol)  #start a new line inside
           items = list(itemvalue.items())
           if len(items)>0:
               stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
               self.format_value(items[0][1],stringsink)
               for key,value in items[1:]:
                   stringsink.write(self.list_delimiter)
                   stringsink.write("'"+key+"'"+":",canbreak=True)
                   self.format_value(value,stringsink)   #never break between key and value
           stringsink.write('}',unindent=True)
        elif isinstance(itemvalue,(float,int)) or \
             (have_numpy and isinstance(itemvalue,(numpy.number))):  #TODO - handle uncertainties
           stringsink.write(str(itemvalue),canbreak=True,startcol=startcol)   #numbers
        else:
           raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue ))

    def create_ordering(self,finish_at,start_from):
        """Create a canonical ordering that includes loops using our formatting hints dictionary"""
        requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop'])
        new_order = []
        for item in requested_order:
           if isinstance(item,unicode) and item.lower() in self.item_order:
               new_order.append(item.lower())
           elif item in self:    #in a loop somewhere
               target_loop = self.FindLoop(item)
               if target_loop not in new_order:
                   new_order.append(target_loop)
                   # adjust loop name order
                   loopnames = self.loops[target_loop]
                   loop_order = [i for i in requested_order if i in loopnames]
                   unordered = [i for i in loopnames if i not in loop_order]
                   self.loops[target_loop] = loop_order + unordered
        extras = list([i for i in self.item_order if i not in new_order])
        self.output_order = new_order + extras
        # now handle partial output
        if start_from != '':
            if start_from in requested_order:
                sfi = requested_order.index(start_from)
                loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0]
                candidates = list([k for k in self.output_order if k in requested_order[sfi:]])
                cand_pos = len(new_order)
                if len(candidates)>0:
                    cand_pos = self.output_order.index(candidates[0])
                if len(loop_order)>0:
                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
                if cand_pos < len(self.output_order):
                    print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from))
                    self.output_order = self.output_order[cand_pos:]
                else:
                    print('Start is beyond end of output list')
                    self.output_order = []
            elif start_from in extras:
               self.output_order = self.output_order[self.output_order.index(start_from):]
            else:
               self.output_order = []
        if finish_at != '':
            if finish_at in requested_order:
                fai = requested_order.index(finish_at)
                loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0])
                candidates = list([k for k in self.output_order if k in requested_order[fai:]])
                cand_pos = len(new_order)
                if len(candidates)>0:
                    cand_pos = self.output_order.index(candidates[0])
                if len(loop_order)>0:
                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
                if cand_pos < len(self.output_order):
                    print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at))
                    self.output_order = self.output_order[:cand_pos]
                else:
                    print('All of block output')
            elif finish_at in extras:
               self.output_order = self.output_order[:self.output_order.index(finish_at)]
        #print('Final order: ' + repr(self.output_order))

    def convert_to_string(self,dataname):
        """Convert values held in dataname value fork to string version"""
        v,is_value = self.GetFullItemValue(dataname)
        if not is_value:
            return v
        if check_stringiness(v): return v   #already strings
        # TODO...something else
        return v

    def do_wrapping(self,instring,indent=3):
        """Wrap the provided string"""
        if "   " in instring:   #already formatted
            return instring
        self.wrapper.initial_indent = ' '*indent
        self.wrapper.subsequent_indent = ' '*indent
        # remove leading and trailing space
        instring = instring.strip()
        # split into paragraphs
        paras = instring.split("\n\n")
        wrapped_paras = [self.wrapper.fill(p) for p in paras]
        return "\n".join(wrapped_paras)


    def merge(self,new_block,mode="strict",match_att=[],match_function=None,
                   rel_keys = []):
        if mode == 'strict':
           for key in new_block.keys():
               if key in self and key not in match_att:
                  raise StarError( "Identical keys %s in strict merge mode" % key)
               elif key not in match_att:           #a new dataname
                   self[key] = new_block[key]
           # we get here if there are no keys in common, so we can now copy
           # the loops and not worry about overlaps
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'replace':
           newkeys = list(new_block.keys())
           for ma in match_att:
              try:
                   newkeys.remove(ma)        #don't touch the special ones
              except ValueError:
                   pass
           for key in new_block.keys():
                  if isinstance(key,unicode):
                      self[key] = new_block[key]
           # creating the loop will remove items from other loops
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'overlay':
           print('Overlay mode, current overwrite is %s' % self.overwrite)
           raise StarError('Overlay block merge mode not implemented')
           save_overwrite = self.overwrite
           self.overwrite = True
           for attribute in new_block.keys():
               if attribute in match_att: continue      #ignore this one
               new_value = new_block[attribute]
               #non-looped items
               if new_block.FindLoop(attribute)<0:     #not looped
                  self[attribute] = new_value
           my_loops = self.loops.values()
           perfect_overlaps = [a for a in new_block.loops if a in my_loops]
           for po in perfect_overlaps:
              loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
              try:
                  newkeypos = map(lambda a:newkeys.index(a),loop_keys)
                  newkeypos = newkeypos[0]      #one key per loop for now
                  loop_keys = loop_keys[0]
              except (ValueError,IndexError):
                  newkeypos = []
                  overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
                  new_data = map(lambda a:new_block[a],overlaps) #new packet data
                  packet_data = transpose(overlap_data)
                  new_p_data = transpose(new_data)
                  # remove any packets for which the keys match between old and new; we
                  # make the arbitrary choice that the old data stays
                  if newkeypos:
                      # get matching values in new list
                      print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos])))
                      key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                      # filter out any new data with these key values
                      new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                      if new_p_data:
                          new_data = transpose(new_p_data)
                      else: new_data = []
                  # wipe out the old data and enter the new stuff
                  byebyeloop = self.GetLoop(overlaps[0])
                  # print("Removing '%r' with overlaps '%r'" % (byebyeloop, overlaps))
                  # Note that if, in the original dictionary, overlaps are not
                  # looped, GetLoop will return the block itself.  So we check
                  # for this case...
                  if byebyeloop != self:
                      self.remove_loop(byebyeloop)
                  self.AddLoopItem((overlaps,overlap_data))  #adding old packets
                  for pd in new_p_data:                             #adding new packets
                     if pd not in packet_data:
                        for i in range(len(overlaps)):
                            #don't do this at home; we are appending
                            #to something in place
                            self[overlaps[i]].append(pd[i])
           self.overwrite = save_overwrite

    def assign_dictionary(self,dic):
        if not dic.diclang=="DDLm":
            print("Warning: ignoring dictionary %s" % dic.my_uri)
            return
        self.dictionary = dic

    def unassign_dictionary(self):
        """Remove dictionary-dependent behaviour"""
        self.dictionary = None



class StarPacket(list):
    def merge_packet(self,incoming):
        """Merge contents of incoming packet with this packet"""
        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
        self.extend(incoming)
        for na in new_attrs:
            setattr(self,na,getattr(incoming,na))

    def __getattr__(self,att_name):
        """Derive a missing attribute"""
        if att_name.lower() in self.__dict__:
            return getattr(self,att_name.lower())
        if att_name in ('cif_dictionary','fulldata','key'):
            raise AttributeError('Programming error: can only assign value of %s' % att_name)
        d = self.cif_dictionary
        c = self.fulldata
        k = self.key
        assert isinstance(k,list)
        d.derive_item(att_name,c,store_value=True)
        #
        # now pick out the new value
        # self.key is a list of the key values
        keydict = dict([(v,(getattr(self,v),True)) for v in k])
        full_pack = c.GetCompoundKeyedPacket(keydict)
        return getattr(full_pack,att_name)

class BlockCollection(object):
    """A container for StarBlock objects. The constructor takes
    one non-keyword argument `datasource` to set the initial data.  If
    `datasource` is a Python dictionary, the values must be `StarBlock`
    objects and the keys will be blocknames in the new object. Keyword
    arguments:

    standard:
        `CIF` or `Dic`.  `CIF` enforces 75-character blocknames, and will
        print block contents before that block's save frame.

    blocktype:
        The type of blocks held in this container. Normally `StarBlock`
        or `CifBlock`.

    characterset:
        `ascii` or `unicode`.  Blocknames and datanames appearing within
        blocks are restricted to the appropriate characterset. Note that
        only characters in the basic multilingual plane are accepted. This
        restriction will be lifted when PyCIFRW is ported to Python3.

    scoping:
        `instance` or `dictionary`: `instance` implies that save frames are
        hidden from save frames lower in the hierarchy or in sibling
        hierarchies. `dictionary` makes all save frames visible everywhere
        within a data block.  This setting is only relevant for STAR2 dictionaries and
        STAR2 data files, as save frames are currently not used in plain CIF data
        files.

"""
    def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
                 characterset='ascii',scoping='instance',**kwargs):
        import collections
        self.dictionary = {}
        self.standard = standard
        self.lower_keys = set()           # short_cuts
        self.renamed = {}
        self.PC = collections.namedtuple('PC',['block_id','parent'])
        self.child_table = {}
        self.visible_keys = []            # for efficiency
        self.block_input_order = []       # to output in same order
        self.scoping = scoping  #will trigger setting of child table
        self.blocktype = blocktype
        self.master_template = {}   #for outputting
        self.set_grammar('2.0')
        self.set_characterset(characterset)
        if isinstance(datasource,BlockCollection):
            self.merge_fast(datasource)
            self.scoping = scoping   #reset visibility
        elif isinstance(datasource,dict):
            for key,value in datasource.items():
                 self[key]= value
        self.header_comment = ''

    def set_grammar(self,new_grammar):
        """Set the syntax and grammar for output to `new_grammar`"""
        if new_grammar not in ['1.1','1.0','2.0','STAR2']:
            raise StarError('Unrecognised output grammar %s' % new_grammar)
        self.grammar = new_grammar

    def set_characterset(self,characterset):
        """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames
        have already been added to any datablocks, they are not checked."""
        self.characterset = characterset
        for one_block in self.lower_keys:
            self[one_block].set_characterset(characterset)

    def unlock(self):
        """Allow overwriting of all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite=True

    def lock(self):
        """Disallow overwriting for all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite = False

    def __str__(self):
        return self.WriteOut()

    def __setitem__(self,key,value):
        self.NewBlock(key,value,parent=None)

    def __getitem__(self,key):
        if isinstance(key,(unicode,str)):
           lowerkey = key.lower()
           if lowerkey in self.lower_keys:
               return self.dictionary[lowerkey]
           #print 'Visible keys:' + `self.visible_keys`
           #print 'All keys' + `self.lower_keys`
           #print 'Child table' + `self.child_table`
           raise KeyError('No such item %s' % key)

    # we have to get an ordered list of the current keys,
    # as we'll have to delete one of them anyway.
    # Deletion will delete any key regardless of visibility

    def __delitem__(self,key):
        dummy = self[key]   #raise error if not present
        lowerkey = key.lower()
        # get rid of all children recursively as well
        children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
        for child in children:
            del self[child]   #recursive call
        del self.dictionary[lowerkey]
        del self.child_table[lowerkey]
        try:
            self.visible_keys.remove(lowerkey)
        except KeyError:
            pass
        self.lower_keys.remove(lowerkey)
        self.block_input_order.remove(lowerkey)

    def __len__(self):
        return len(self.visible_keys)

    def __contains__(self,item):
        """Support the 'in' operator"""
        if not isinstance(item,(unicode,str)): return False
        if item.lower() in self.visible_keys:
            return True
        return False

    # We iterate over all visible
    def __iter__(self):
        for one_block in self.keys():
            yield self[one_block]

    # TODO: handle different case
    def keys(self):
        return self.visible_keys

    # Note that has_key does not exist in 3.5
    def has_key(self,key):
        return key in self

    def get(self,key,default=None):
        if key in self:     # take account of case
            return self.__getitem__(key)
        else:
            return default

    def clear(self):
        self.dictionary.clear()
        self.lower_keys = set()
        self.child_table = {}
        self.visible_keys = []
        self.block_input_order = []

    def copy(self):
        newcopy = self.dictionary.copy()  #all blocks
        for k,v in self.dictionary.items():
            newcopy[k] = v.copy()
        newcopy = BlockCollection(newcopy)
        newcopy.child_table = self.child_table.copy()
        newcopy.lower_keys = self.lower_keys.copy()
        newcopy.block_input_order = self.block_input_order.copy()
        newcopy.characterset = self.characterset
        newcopy.SetTemplate(self.master_template.copy())
        newcopy.scoping = self.scoping  #this sets visible keys
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self[key] = adict[key]

    def items(self):
        return [(a,self[a]) for a in self.keys()]

    def first_block(self):
        """Return the 'first' block.  This is not necessarily the first block in the file."""
        if self.keys():
            return self[self.keys()[0]]

    def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
        """Add a new block named `blockname` with contents `blockcontents`. If `fix`
        is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
        allows a parent block to be set so that block hierarchies can be created.  Depending on
        the output standard, these blocks will be printed out as nested save frames or
        ignored."""
        if blockcontents is None:
            blockcontents = self.blocktype()
        if self.standard == "CIF":
            blockcontents.setmaxnamelength(75)
        if len(blockname)>75:
                 raise StarError('Blockname %s is longer than 75 characters' % blockname)
        if fix:
            newblockname = re.sub('[  \t]','_',blockname)
        else: newblockname = blockname
        new_lowerbn = newblockname.lower()
        if new_lowerbn in self.lower_keys:   #already there
            if self.standard is not None:
               toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
               if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
                  while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
               elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
                  replace_name = new_lowerbn
                  while replace_name in self.lower_keys: replace_name = replace_name + '+'
                  self._rekey(new_lowerbn,replace_name)
                  # now continue on to add in the new block
                  if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                      parent = replace_name
               else:
                  raise StarError( "Attempt to replace existing block " + blockname)
            else:
               del self[new_lowerbn]
        self.dictionary.update({new_lowerbn:blockcontents})
        self.lower_keys.add(new_lowerbn)
        self.block_input_order.append(new_lowerbn)
        if parent is None:
           self.child_table[new_lowerbn]=self.PC(newblockname,None)
           self.visible_keys.append(new_lowerbn)
        else:
           if parent.lower() in self.lower_keys:
              if self.scoping == 'instance':
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
              else:
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
                 self.visible_keys.append(new_lowerbn)
           else:
               print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
        self[new_lowerbn].set_grammar(self.grammar)
        self[new_lowerbn].set_characterset(self.characterset)
        self[new_lowerbn].formatting_hints = self.master_template
        return new_lowerbn  #in case calling routine wants to know

    def _rekey(self,oldname,newname,block_id=''):
        """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
           does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
        move_block = self[oldname]    #old block
        is_visible = oldname in self.visible_keys
        move_block_info = self.child_table[oldname]    #old info
        move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
        # now rewrite the necessary bits
        self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
        oldpos = self.block_input_order.index(oldname)
        del self[oldname]   #do this after updating child table so we don't delete children
        self.dictionary.update({newname:move_block})
        self.lower_keys.add(newname)
        #print 'Block input order was: ' + `self.block_input_order`
        self.block_input_order[oldpos:oldpos]=[newname]
        if block_id == '':
           self.child_table.update({newname:move_block_info})
        else:
           self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
        if is_visible: self.visible_keys += [newname]

    def rename(self,oldname,newname):
        """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
           conformance checks are conducted."""
        realoldname = oldname.lower()
        realnewname = newname.lower()
        if realnewname in self.lower_keys:
            raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname))
        if realoldname not in self.lower_keys:
            raise KeyError('Cannot find old block %s' % realoldname)
        self._rekey(realoldname,realnewname,block_id=newname)

    def makebc(self,namelist,scoping='dictionary'):
        """Make a block collection from a list of block names"""
        newbc = BlockCollection()
        block_lower = [n.lower() for n in namelist]
        proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower]
        newbc.child_table = dict(proto_child_table)
        new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower]
        newbc.child_table.update(dict(new_top_level))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        newbc.scoping = scoping
        newbc.block_input_order = block_lower
        return newbc


    def merge_fast(self,new_bc,parent=None):
        """Do a fast merge. WARNING: this may change one or more of its frame headers in order to
        remove duplicate frames.  Please keep a handle to the block object instead of the text of
        the header."""
        if self.standard is None:
            mode = 'replace'
        else:
            mode = 'strict'
        overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
        if parent is not None:
            parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent]
            if len(parent_name)==0 or len(parent_name)>1:
                raise StarError("Unable to find unique parent block name: have %s" % str(parent_name))
            parent_name = parent_name[0]
        else:
            parent_name = None  #an error will be thrown if we treat as a string
        if overlap_flag and mode != 'replace':
            double_keys = self.lower_keys.intersection(new_bc.lower_keys)
            for dup_key in double_keys:
                  our_parent = self.child_table[dup_key].parent
                  their_parent = new_bc.child_table[dup_key].parent
                  if (our_parent is None and their_parent is not None and parent is None) or\
                      parent is not None:  #rename our block
                    start_key = dup_key
                    while start_key in self.lower_keys: start_key = start_key+'+'
                    self._rekey(dup_key,start_key)
                    if parent_name.lower() == dup_key:  #we just renamed the prospective parent!
                        parent_name = start_key
                  elif our_parent is not None and their_parent is None and parent is None:
                    start_key = dup_key
                    while start_key in new_bc.lower_keys: start_key = start_key+'+'
                    new_bc._rekey(dup_key,start_key)
                  else:
                    raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
        self.dictionary.update(new_bc.dictionary)
        self.lower_keys.update(new_bc.lower_keys)
        self.visible_keys += (list(new_bc.lower_keys))
        self.block_input_order += new_bc.block_input_order
        #print('Block input order now:' + repr(self.block_input_order))
        self.child_table.update(new_bc.child_table)
        if parent_name is not None:     #redo the child_table entries
              reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
              reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list]
              self.child_table.update(dict(reparent_dict))

    def merge(self,new_bc,mode=None,parent=None,single_block=[],
                   idblock="",match_att=[],match_function=None):
        if mode is None:
            if self.standard is None:
               mode = 'replace'
            else:
               mode = 'strict'
        if single_block:
            self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                                   match_att=match_att,
                                                   match_function=match_function)
            return None
        base_keys = [a[1].block_id for a in self.child_table.items()]
        block_to_item = base_keys   #default
        new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
        if match_att:
            #make a blockname -> item name map
            if match_function:
                block_to_item = [match_function(self[a]) for a in self.keys()]
            else:
                block_to_item = [self[a].get(match_att[0],None) for a in self.keys()]
            #print `block_to_item`
        for key in new_keys:        #run over incoming blocknames
            if key == idblock: continue    #skip dictionary id
            basekey = key           #default value
            if len(match_att)>0:
               attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
            else:
               attval = 0
            for ii in range(len(block_to_item)):  #do this way to get looped names
                thisatt = block_to_item[ii]       #keyname in old block
                #print "Looking for %s in %s" % (attval,thisatt)
                if attval == thisatt or \
                   (isinstance(thisatt,list) and attval in thisatt):
                      basekey = base_keys.pop(ii)
                      block_to_item.remove(thisatt)
                      break
            if not basekey in self or mode=="replace":
                new_parent = new_bc.get_parent(key)
                if parent is not None and new_parent is None:
                   new_parent = parent
                self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
            else:
                if mode=="strict":
                    raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
                elif mode=="overlay":
                    # print "Merging block %s with %s" % (basekey,key)
                    self[basekey].merge(new_bc[key],mode,match_att=match_att)
                else:
                    raise StarError( "Merge called with unknown mode %s" % mode)

    def checknamelengths(self,target_block,maxlength=-1):
        if maxlength < 0:
            return
        else:
            toolong = [a for a in target_block.keys() if len(a)>maxlength]
        outstring = ""
        if toolong:
           outstring = "\n".join(toolong)
           raise StarError( 'Following data names too long:' + outstring)

    def get_all(self,item_name):
        raw_values = [self[a].get(item_name) for a in self.keys()]
        raw_values = [a for a in raw_values if a != None]
        ret_vals = []
        for rv in raw_values:
            if isinstance(rv,list):
                for rvv in rv:
                    if rvv not in ret_vals: ret_vals.append(rvv)
            else:
                if rv not in ret_vals: ret_vals.append(rv)
        return ret_vals

    def __setattr__(self,attr_name,newval):
        if attr_name == 'scoping':
            if newval not in ('dictionary','instance'):
                raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
            if newval == 'dictionary':
                self.visible_keys = [a for a in self.lower_keys]
            else:
                #only top-level datablocks visible
                self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
        object.__setattr__(self,attr_name,newval)

    def get_parent(self,blockname):
        """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
        possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
        try:
            first = next(possibles)   #get first one
        except:
            raise StarError('no parent for %s' % blockname)
        try:
           second = next(possibles)
        except StopIteration:
           return first[1].parent
        raise StarError('More than one parent for %s' % blockname)

    def get_roots(self):
        """Get the top-level blocks"""
        return [a for a in self.child_table.items() if a[1].parent==None]

    def get_children(self,blockname,include_parent=False,scoping='dictionary'):
        """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
        True, the parent block will also be included in the block collection as the root."""
        newbc = BlockCollection()
        block_lower = blockname.lower()
        proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
        newbc.child_table = dict(proto_child_table)
        if not include_parent:
           newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        if include_parent:
            newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
            newbc.lower_keys.add(block_lower)
            newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
        newbc.scoping = scoping
        return newbc

    def get_immediate_children(self,parentname):
        """Get the next level of children of the given block as a list, without nested levels"""
        child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
        return child_handles

    # This takes time
    def get_child_list(self,parentname):
        """Get a list of all child categories in alphabetical order"""
        child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
        child_handles.sort()
        return child_handles

    def is_child_of_parent(self,parentname,blockname):
        """Return `True` if `blockname` is a child of `parentname`"""
        checkname = parentname.lower()
        more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
        if blockname.lower() in more_children:
           return True
        else:
           for one_child in more_children:
               if self.is_child_of_parent(one_child,blockname): return True
        return False

    def set_parent(self,parentname,childname):
        """Set the parent block"""
        # first check that both blocks exist
        if parentname.lower() not in self.lower_keys:
            raise KeyError('Parent block %s does not exist' % parentname)
        if childname.lower() not in self.lower_keys:
            raise KeyError('Child block %s does not exist' % childname)
        old_entry = self.child_table[childname.lower()]
        self.child_table[childname.lower()]=self.PC(old_entry.block_id,
               parentname.lower())
        self.scoping = self.scoping #reset visibility

    def SetTemplate(self,template_file):
            """Use `template_file` as a template for all block output"""
            self.master_template = process_template(template_file)
            for b in self.dictionary.values():
                b.formatting_hints = self.master_template

    def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
        """Return the contents of this file as a string, wrapping if possible at `wraplength`
        characters and restricting maximum line length to `maxoutlength`.  Delimiters and
        save frame nesting are controlled by `self.grammar`. If `blockorder` is
        provided, blocks are output in this order unless nested save frames have been
        requested (STAR2). The default block order is the order in which blocks were input.
        `saves_after` inserts all save frames after the given dataname,
        which allows less important items to appear later.  Useful in conjunction with a
        template for dictionary files."""
        if maxoutlength != 0:
            self.SetOutputLength(maxoutlength)
        if not comment:
            comment = self.header_comment
        outstring = StringIO()
        if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
            outstring.write(r"#\#CIF_2.0" + "\n")
        outstring.write(comment)
        # prepare all blocks
        for b in self.dictionary.values():
            b.set_grammar(self.grammar)
            b.formatting_hints = self.master_template
            b.SetOutputLength(wraplength,self.maxoutlength)
        # loop over top-level
        # monitor output
        all_names = list(self.child_table.keys())   #i.e. lower case
        if blockorder is None:
            blockorder = self.block_input_order
        top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
        for blockref,blockname in top_block_names:
            print('Writing %s, ' % blockname + repr(self[blockref]))
            outstring.write('\n' + 'data_' +blockname+'\n')
            all_names.remove(blockref)
            if self.standard == 'Dic':              #put contents before save frames
                outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
            if self.grammar == 'STAR2':  #nested save frames
                child_refs = self.get_immediate_children(blockref)
                for child_ref,child_info in child_refs:
                    child_name = child_info.block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    self.block_to_string_nested(child_ref,child_name,outstring,4)
                    outstring.write('\n' + 'save_'+ '\n')
            elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
                child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
                for child_ref in child_refs:
                    child_name = self.child_table[child_ref].block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    outstring.write(str(self[child_ref]))
                    outstring.write('\n\n' + 'save_' + '\n')
                    all_names.remove(child_ref.lower())
            else:
                raise StarError('Grammar %s is not recognised for output' % self.grammar)
            if self.standard != 'Dic':              #put contents after save frames
                outstring.write(str(self[blockref]))
            else:
                outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
        returnstring =  outstring.getvalue()
        outstring.close()
        if len(all_names)>0:
            print('WARNING: following blocks not output: %s' % repr(all_names))
        else:
            print('All blocks output.')
        return returnstring

    def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0):
        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children,
           and syntactically nesting save frames"""
        child_refs = self.get_immediate_children(block_ref)
        self[block_ref].set_grammar(self.grammar)
        if self.standard == 'Dic':
            outstring.write(str(self[block_ref]))
        for child_ref,child_info in child_refs:
            child_name = child_info.block_id
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string_nested(child_ref,child_name,outstring,indentlevel)
            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
        if self.standard != 'Dic':
            outstring.write(str(self[block_ref]))


class StarFile(BlockCollection):
    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
                scoping='instance',grammar='1.1',scantype='standard',
                 permissive=False,**kwargs):
        super(StarFile,self).__init__(datasource=datasource,**kwargs)
        self.my_uri = getattr(datasource,'my_uri','')
        if maxoutlength == 0:
            self.maxoutlength = 2048
        else:
            self.maxoutlength = maxoutlength
        self.scoping = scoping
        if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"):
            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
                     maxlength = maxinlength,permissive=permissive)
        self.header_comment = \
"""#\\#STAR
##########################################################################
#               STAR Format file
#               Produced by PySTARRW module
#
#  This is a STAR file.  STAR is a superset of the CIF file type.  For
#  more information, please refer to International Tables for Crystallography,
#  Volume G, Chapter 2.1
#
##########################################################################
"""
    def set_uri(self,my_uri): self.my_uri = my_uri


class CIFStringIO(StringIO):
    def __init__(self,target_width=80,**kwargs):
        StringIO.__init__(self,**kwargs)
        self.currentpos = 0
        self.target_width = target_width
        self.tabwidth = -1
        self.indentlist = [0]
        self.last_char = ""

    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,
                             delimiter=False,startcol=-1):
        """Write a string with correct linebreak, tabs and indents"""
        # do we need to break?
        if delimiter:
            if len(outstring)>1:
                raise ValueError('Delimiter %s is longer than one character' % repr( outstring ))
            output_delimiter = True
        if mustbreak:    #insert a new line and indent
            temp_string = '\n' + ' ' * self.indentlist[-1]
            StringIO.write(self,temp_string)
            self.currentpos = self.indentlist[-1]
            self.last_char = temp_string[-1]
        if self.currentpos+len(outstring)>self.target_width: #try to break
            if not delimiter and outstring[0]!='\n':          #ie <cr>;
              if canbreak:
                temp_string = '\n' + ' ' * self.indentlist[-1]
                StringIO.write(self,temp_string)
                self.currentpos = self.indentlist[-1]
                self.last_char = temp_string[-1]
            else:        #assume a break will be forced on next value
                output_delimiter = False    #the line break becomes the delimiter
        #try to match requested column
        if startcol > 0:
            if self.currentpos < startcol:
                StringIO.write(self,(startcol - self.currentpos)* ' ')
                self.currentpos = startcol
                self.last_char = ' '
            else:
                print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos))
                startcol = -1   #so that tabbing works as a backup
        #handle tabs
        if self.tabwidth >0 and do_tab and startcol < 0:
            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
            if self.currentpos < next_stop:
                StringIO.write(self,(next_stop-self.currentpos)*' ')
                self.currentpos = next_stop
                self.last_char = ' '
        #calculate indentation after tabs and col setting applied
        if newindent:           #indent by current amount
            if self.indentlist[-1] == 0:    #first time
                self.indentlist.append(self.currentpos)
                # print 'Indentlist: ' + `self.indentlist`
            else:
                self.indentlist.append(self.indentlist[-1]+2)
        elif unindent:
            if len(self.indentlist)>1:
                self.indentlist.pop()
            else:
                print('Warning: cannot unindent any further')
        #check that we still need a delimiter
        if self.last_char in [' ','\n','\t']:
            output_delimiter = False
        #now output the string - every invocation comes through here
        if (delimiter and output_delimiter) or not delimiter:
            StringIO.write(self,outstring)
        last_line_break = outstring.rfind('\n')
        if last_line_break >=0:
            self.currentpos = len(outstring)-last_line_break
        else:
            self.currentpos = self.currentpos + len(outstring)
        #remember the last character
        if len(outstring)>0:
            self.last_char = outstring[-1]

    def set_tab(self,tabwidth):
        """Set the tab stop position"""
        self.tabwidth = tabwidth

class StarError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar Format error: '+ self.value

class StarLengthError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar length error: ' + self.value

class StarDerivationError(Exception):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed, None returned" % self.fail_name

#
# This is subclassed from AttributeError in order to allow hasattr
# to work.
#
class StarDerivationFailure(AttributeError):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed" % self.fail_name

def ReadStar(filename,prepared = None, maxlength=-1,
             scantype='standard',grammar='STAR2',CBF=False, permissive=False):

    """ Read in a STAR file, returning the contents in the `prepared` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename`
    will be added to.

    * `maxlength` is the maximum allowable line length in the input file. This has been set at
    2048 characters for CIF but is unlimited (-1) for STAR files.

    * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None` or `auto`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files.
    Note that (nested) save frames are read in all grammar variations and then flagged afterwards if
    they do not match the requested grammar.

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines.  Note that running PyCIFRW in Jython uses native Java regular expressions
    to provide a speedup regardless of this argument.

    * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is
    excised from the input data stream before parsing and is not available in the returned object.

    * `permissive` allows non UTF8 encodings (currently only latin1) in the input file. These are a 
    violation of the standard.

    """

    # save desired scoping
    save_scoping = prepared.scoping
    from . import YappsStarParser_1_1 as Y11
    from . import YappsStarParser_1_0 as Y10
    from . import YappsStarParser_2_0 as Y20
    from . import YappsStarParser_STAR2 as YST
    if prepared is None:
        prepared = StarFile()
    if grammar == "auto" or grammar is None:
        try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)]
    elif grammar == '1.0':
        try_list = [('1.0',Y10)]
    elif grammar == '1.1':
        try_list = [('1.1',Y11)]
    elif grammar == '2.0':
        try_list = [('2.0',Y20)]
    elif grammar == 'STAR2':
        try_list = [('STAR2',YST)]
    else:
        raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar ))
    if isinstance(filename,(unicode,str)):
        # create an absolute URL
        relpath = urlparse(filename)
        if relpath.scheme == "":
            if not os.path.isabs(filename):
                fullpath = os.path.join(os.getcwd(),filename)
            else:
                fullpath = filename
            newrel = list(relpath)
            newrel[0] = "file"
            newrel[2] = fullpath
            my_uri = urlunparse(newrel)
        else:
            my_uri = urlunparse(relpath)
        # print("Full URL is: " + my_uri)
        filestream = urlopen(my_uri)
        try:
            text = filestream.read().decode('utf-8-sig')
        except UnicodeDecodeError:
            if permissive:
                text = filestream.read().decode('latin1')
            else:
                raise SyntaxError("%s: bad encoding (must be utf8 or ascii)" % filename)
        filestream.close()
    else:
        filestream = filename   #already opened for us
        text = filestream.read()
        if not isinstance(text,unicode):
            try:
                text = text.decode('utf-8-sig')  #CIF is always ascii/utf8
            except UnicodeDecodeError:
                if permissive:
                    text = filestream.read().decode('latin1')
                else:
                    raise SyntaxError("Bad input encoding (must be utf8 or ascii)")
        my_uri = ""
    if not text:      # empty file, return empty block
        return prepared.set_uri(my_uri)
    # filter out non-ASCII characters in CBF files if required.  We assume
    # that the binary is enclosed in a fixed string that occurs
    # nowhere else.
    if CBF:
       text_bits  = text.split("-BINARY-FORMAT-SECTION-")
       text = text_bits[0]
       for section in range(2,len(text_bits),2):
           text = text+" (binary omitted)"+text_bits[section]
    # we recognise ctrl-Z as end of file
    endoffile = text.find(chr(26))
    if endoffile >= 0:
        text = text[:endoffile]
    split = text.split('\n')
    if maxlength > 0:
        toolong = [a for a in split if len(a)>maxlength]
        if toolong:
            pos = split.index(toolong[0])
            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    # honour the header string
    if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list:
        try_list.remove(('2.0',Y20),)
        if not try_list:
            raise StarError('File %s missing CIF2.0 header' % (filename))
    for grammar_name,Y in try_list:
       if scantype == 'standard' or grammar_name in ['2.0','STAR2']:
            parser = Y.StarParser(Y.StarParserScanner(text))
       else:
            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
       # handle encoding switch
       if grammar_name in ['2.0','STAR2']:
           prepared.set_characterset('unicode')
       else:
           prepared.set_characterset('ascii')
       proto_star = None
       try:
           proto_star = getattr(parser,"input")(prepared)
       except Y.yappsrt.YappsSyntaxError as e:
           input = parser._scanner.input
           Y.yappsrt.print_error(input, e, parser._scanner)
       except Y.yappsrt.NoMoreTokens:
           print('Could not complete parsing; stopped around here:',file=sys.stderr)
           print(parser._scanner,file=sys.stderr)
       except ValueError:
           print('Unexpected error:')
           import traceback
           traceback.print_exc()
       if proto_star is not None:
           proto_star.set_grammar(grammar_name)   #remember for output
           break
    if proto_star is None:
        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
        errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner )
        raise StarError( errorstring)
    # set visibility correctly
    proto_star.scoping = 'dictionary'
    proto_star.set_uri(my_uri)
    proto_star.scoping = save_scoping
    return proto_star

def get_dim(dataitem,current=0,packlen=0):
    zerotypes = [int, float, str]
    if type(dataitem) in zerotypes:
        return current, packlen
    if not dataitem.__class__ == ().__class__ and \
       not dataitem.__class__ == [].__class__:
       return current, packlen
    elif len(dataitem)>0:
    #    print "Get_dim: %d: %s" % (current,`dataitem`)
        return get_dim(dataitem[0],current+1,len(dataitem))
    else: return current+1,0

def apply_line_folding(instring,minwraplength=60,maxwraplength=80):
    """Insert line folding characters into instring between min/max wraplength"""
    # first check that we need to do this
    lines = instring.split('\n')
    line_len = [len(l) for l in lines]
    if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None:
        return instring
    outstring = "\\\n"   #header
    for l in lines:
        if len(l) < maxwraplength:
            outstring = outstring + l
            if len(l) > 0 and l[-1]=='\\': #who'da thunk it?  A line ending with a backslash
                    outstring = outstring + "\\\n"  #
            outstring = outstring + "\n"  #  put back the split character
        else:
            current_bit = l
            while len(current_bit) > maxwraplength:
                space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:])
                if space_pos is not None and space_pos.start()<maxwraplength-1:
                    outstring = outstring + current_bit[:minwraplength+space_pos.start()] + "\\\n"
                    current_bit = current_bit[minwraplength+space_pos.start():]
                else:    #just blindly insert
                    outstring = outstring + current_bit[:maxwraplength-1] + "\\\n"
                    current_bit = current_bit[maxwraplength-1:]
            outstring = outstring + current_bit
            if current_bit[-1] == '\\':  #a backslash just happens to be here
                outstring = outstring + "\\\n"
            outstring = outstring + '\n'
    outstring = outstring[:-1]  #remove final newline
    return outstring

def remove_line_folding(instring):
    """Remove line folding from instring"""
    if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None:
        return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M)
    else:
        return instring

def apply_line_prefix(instring,prefix):
    """Prefix every line in instring with prefix"""
    if prefix[0] != ";" and "\\" not in prefix:
        header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring)
        if header is not None:
            print('Found line folded string for prefixing...')
            not_header = instring[header.end():]
            outstring = prefix + "\\\\\n" + prefix
        else:
            print('No folding in input string...')
            not_header = instring
            outstring = prefix + "\\\n" + prefix
        outstring = outstring + not_header.replace("\n","\n"+prefix)
        return outstring
    raise StarError("Requested prefix starts with semicolon or contains a backslash: " + prefix)

def remove_line_prefix(instring):
    """Remove prefix from every line if present"""
    prefix_match = re.match("(?P<prefix>[^;\\\n][^\n\\\\]+)(?P<folding>\\\\{1,2}[ \t\v\f]*\n)",instring)
    if prefix_match is not None:
        prefix_text = prefix_match.group('prefix')
        print('Found prefix %s' % prefix_text)
        prefix_end = prefix_match.end('folding')
        # keep any line folding instructions
        if prefix_match.group('folding')[:2]=='\\\\':  #two backslashes
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return "\\" + outstring  #keep line folding first line
        else:
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return outstring[1:]   #drop first line ending, no longer necessary
    else:
        return instring


def listify(item):
    if isinstance(item,unicode): return [item]
    else: return item

#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = []
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

# This routine optimised to return as quickly as possible
# as it is called a lot.
def not_none(itemlist):
    """Return true only if no values of None are present"""
    if itemlist is None:
        return False
    if not isinstance(itemlist,(tuple,list)):
        return True
    for x in itemlist:
       if not not_none(x): return False
    return True


def check_stringiness(data):
   """Check that the contents of data are all strings"""
   if not hasattr(data,'dtype'):   #so not Numpy
       from numbers import Number
       if isinstance(data,Number): return False
       elif isinstance(data,(unicode,str)): return True
       elif data is None:return False  #should be data are None :)
       else:
           for one_item in data:
               if not check_stringiness(one_item): return False
           return True   #all must be strings
   else:   #numerical python
       import numpy
       if data.ndim == 0:    #a bare value
           if data.dtype.kind in ['S','U']: return True
           else: return False
       else:
           for one_item in numpy.nditer(data):
               print('numpy data: ' + repr( one_item ))
               if not check_stringiness(one_item): return False
           return True

def process_template(template_file):
    """Process a template datafile to formatting instructions"""
    template_as_cif = StarFile(template_file,grammar="2.0").first_block()
    if isinstance(template_file,(unicode,str)):
        template_string = open(template_file).read()
    else:   #a StringIO object
        template_file.seek(0)   #reset
        template_string = template_file.read()
    #template_as_lines = template_string.split("\n")
    #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
    #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
    #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
    form_hints = []   #ordered array of hint dictionaries
    find_indent = "^ +"
    for item in template_as_cif.item_order:  #order of input
        if not isinstance(item,int):    #not nested
            hint_dict = {"dataname":item}
            # find the line in the file
            start_pos = re.search("(^[ \t]*(?P<name>" + item + ")[ \t\n]+)(?P<spec>([\S]+)|(^;))",template_string,re.I|re.M)
            if start_pos.group("spec") != None:
                spec_pos = start_pos.start("spec")-start_pos.start(0)
                spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3]
                if spec_char[0] in '\'";':
                    hint_dict.update({"delimiter":spec_char[0]})
                    if spec_char == '"""' or spec_char == "'''":
                        hint_dict.update({"delimiter":spec_char})
                if spec_char[0] != ";":   #so we need to work out the column number
                    hint_dict.update({"column":spec_pos})
                else:                  #need to put in the carriage return
                    hint_dict.update({"delimiter":"\n;"})
                    # can we format the text?
                    text_val = template_as_cif[item]
                    hint_dict["reformat"] = "\n\t" in text_val or "\n  " in text_val
                    if hint_dict["reformat"]:   #find the indentation
                        p = re.search(find_indent,text_val,re.M)
                        if p.group() is not None:
                            hint_dict["reformat_indent"]=p.end() - p.start()
                if start_pos.group('name') != None:
                    name_pos = start_pos.start('name') - start_pos.start(0)
                    hint_dict.update({"name_pos":name_pos})
            #print '%s: %s' % (item,`hint_dict`)
            form_hints.append(hint_dict)
        else:           #loop block
            testnames = template_as_cif.loops[item]
            total_items = len(template_as_cif.loops[item])
            testname = testnames[0]
            #find the loop spec line in the file
            loop_regex = "(^[ \t]*(?P<loop>loop_)[ \t\n\r]+(?P<name>" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P<packet>(.(?!_loop|_[\S]+))*))" % (total_items - 1)
            loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
            loop_so_far = loop_line.end()
            packet_text = loop_line.group('packet')
            loop_indent = loop_line.start('loop') - loop_line.start(0)
            form_hints.append({"dataname":'loop','name_pos':loop_indent})
            packet_regex = "[ \t]*(?P<all>(?P<sqqq>'''([^\n\r\f']*)''')|(?P<sq>'([^\n\r\f']*)'+)|(?P<dq>\"([^\n\r\"]*)\"+)|(?P<none>[^\s]+))"
            packet_pos = re.finditer(packet_regex,packet_text)
            line_end_pos = re.finditer("^",packet_text,re.M)
            next_end = next(line_end_pos).end()
            last_end = next_end
            for loopname in testnames:
                #find the name in the file for name pos
                name_regex = "(^[ \t]*(?P<name>" + loopname + "))"
                name_match = re.search(name_regex,template_string,re.I|re.M|re.S)
                loop_name_indent = name_match.start('name')-name_match.start(0)
                hint_dict = {"dataname":loopname,"name_pos":loop_name_indent}
                #find the value
                thismatch = next(packet_pos)
                while thismatch.start('all') > next_end:
                    try:
                        last_end = next_end
                        next_end = next(line_end_pos).start()
                        print('next end %d' % next_end)
                    except StopIteration:
                        break
                print('Start %d, last_end %d' % (thismatch.start('all'),last_end))
                col_pos = thismatch.start('all') - last_end + 1
                if thismatch.group('none') is None:
                    if thismatch.group('sqqq') is not None:
                        hint_dict.update({'delimiter':"'''"})
                    else:
                        hint_dict.update({'delimiter':thismatch.groups()[0][0]})
                hint_dict.update({'column':col_pos})
                print('%s: %s' % (loopname,repr( hint_dict )))
                form_hints.append(hint_dict)
    return form_hints


#No documentation flags

Functions

def ReadStar(

filename, prepared=None, maxlength=-1, scantype=u'standard', grammar=u'STAR2', CBF=False, permissive=False)

Read in a STAR file, returning the contents in the prepared object.

  • filename may be a URL, a file path on the local system, or any object with a read method.

  • prepared provides a StarFile or CifFile object that the contents of filename will be added to.

  • maxlength is the maximum allowable line length in the input file. This has been set at 2048 characters for CIF but is unlimited (-1) for STAR files.

  • grammar chooses the STAR grammar variant. 1.0 is the original 1992 CIF/STAR grammar and 1.1 is identical except for the exclusion of square brackets as the first characters in undelimited datanames. 2.0 will read files in the CIF2.0 standard, and STAR2 will read files according to the STAR2 publication. If grammar is None or auto, autodetection will be attempted in the order 2.0, 1.1 and 1.0. This will always succeed for conformant CIF2.0 files. Note that (nested) save frames are read in all grammar variations and then flagged afterwards if they do not match the requested grammar.

  • scantype can be standard or flex. standard provides pure Python parsing at the cost of a factor of 10 or so in speed. flex will tokenise the input CIF file using fast C routines. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument.

  • CBF flags that the input file is in Crystallographic Binary File format. The binary block is excised from the input data stream before parsing and is not available in the returned object.

  • permissive allows non UTF8 encodings (currently only latin1) in the input file. These are a violation of the standard.

def ReadStar(filename,prepared = None, maxlength=-1,
             scantype='standard',grammar='STAR2',CBF=False, permissive=False):

    """ Read in a STAR file, returning the contents in the `prepared` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename`
    will be added to.

    * `maxlength` is the maximum allowable line length in the input file. This has been set at
    2048 characters for CIF but is unlimited (-1) for STAR files.

    * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None` or `auto`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files.
    Note that (nested) save frames are read in all grammar variations and then flagged afterwards if
    they do not match the requested grammar.

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines.  Note that running PyCIFRW in Jython uses native Java regular expressions
    to provide a speedup regardless of this argument.

    * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is
    excised from the input data stream before parsing and is not available in the returned object.

    * `permissive` allows non UTF8 encodings (currently only latin1) in the input file. These are a 
    violation of the standard.

    """

    # save desired scoping
    save_scoping = prepared.scoping
    from . import YappsStarParser_1_1 as Y11
    from . import YappsStarParser_1_0 as Y10
    from . import YappsStarParser_2_0 as Y20
    from . import YappsStarParser_STAR2 as YST
    if prepared is None:
        prepared = StarFile()
    if grammar == "auto" or grammar is None:
        try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)]
    elif grammar == '1.0':
        try_list = [('1.0',Y10)]
    elif grammar == '1.1':
        try_list = [('1.1',Y11)]
    elif grammar == '2.0':
        try_list = [('2.0',Y20)]
    elif grammar == 'STAR2':
        try_list = [('STAR2',YST)]
    else:
        raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar ))
    if isinstance(filename,(unicode,str)):
        # create an absolute URL
        relpath = urlparse(filename)
        if relpath.scheme == "":
            if not os.path.isabs(filename):
                fullpath = os.path.join(os.getcwd(),filename)
            else:
                fullpath = filename
            newrel = list(relpath)
            newrel[0] = "file"
            newrel[2] = fullpath
            my_uri = urlunparse(newrel)
        else:
            my_uri = urlunparse(relpath)
        # print("Full URL is: " + my_uri)
        filestream = urlopen(my_uri)
        try:
            text = filestream.read().decode('utf-8-sig')
        except UnicodeDecodeError:
            if permissive:
                text = filestream.read().decode('latin1')
            else:
                raise SyntaxError("%s: bad encoding (must be utf8 or ascii)" % filename)
        filestream.close()
    else:
        filestream = filename   #already opened for us
        text = filestream.read()
        if not isinstance(text,unicode):
            try:
                text = text.decode('utf-8-sig')  #CIF is always ascii/utf8
            except UnicodeDecodeError:
                if permissive:
                    text = filestream.read().decode('latin1')
                else:
                    raise SyntaxError("Bad input encoding (must be utf8 or ascii)")
        my_uri = ""
    if not text:      # empty file, return empty block
        return prepared.set_uri(my_uri)
    # filter out non-ASCII characters in CBF files if required.  We assume
    # that the binary is enclosed in a fixed string that occurs
    # nowhere else.
    if CBF:
       text_bits  = text.split("-BINARY-FORMAT-SECTION-")
       text = text_bits[0]
       for section in range(2,len(text_bits),2):
           text = text+" (binary omitted)"+text_bits[section]
    # we recognise ctrl-Z as end of file
    endoffile = text.find(chr(26))
    if endoffile >= 0:
        text = text[:endoffile]
    split = text.split('\n')
    if maxlength > 0:
        toolong = [a for a in split if len(a)>maxlength]
        if toolong:
            pos = split.index(toolong[0])
            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    # honour the header string
    if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list:
        try_list.remove(('2.0',Y20),)
        if not try_list:
            raise StarError('File %s missing CIF2.0 header' % (filename))
    for grammar_name,Y in try_list:
       if scantype == 'standard' or grammar_name in ['2.0','STAR2']:
            parser = Y.StarParser(Y.StarParserScanner(text))
       else:
            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
       # handle encoding switch
       if grammar_name in ['2.0','STAR2']:
           prepared.set_characterset('unicode')
       else:
           prepared.set_characterset('ascii')
       proto_star = None
       try:
           proto_star = getattr(parser,"input")(prepared)
       except Y.yappsrt.YappsSyntaxError as e:
           input = parser._scanner.input
           Y.yappsrt.print_error(input, e, parser._scanner)
       except Y.yappsrt.NoMoreTokens:
           print('Could not complete parsing; stopped around here:',file=sys.stderr)
           print(parser._scanner,file=sys.stderr)
       except ValueError:
           print('Unexpected error:')
           import traceback
           traceback.print_exc()
       if proto_star is not None:
           proto_star.set_grammar(grammar_name)   #remember for output
           break
    if proto_star is None:
        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
        errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner )
        raise StarError( errorstring)
    # set visibility correctly
    proto_star.scoping = 'dictionary'
    proto_star.set_uri(my_uri)
    proto_star.scoping = save_scoping
    return proto_star

Classes

class BlockCollection

A container for StarBlock objects. The constructor takes one non-keyword argument datasource to set the initial data. If datasource is a Python dictionary, the values must be StarBlock objects and the keys will be blocknames in the new object. Keyword arguments:

standard: CIF or Dic. CIF enforces 75-character blocknames, and will print block contents before that block's save frame.

blocktype: The type of blocks held in this container. Normally StarBlock or CifBlock.

characterset: ascii or unicode. Blocknames and datanames appearing within blocks are restricted to the appropriate characterset. Note that only characters in the basic multilingual plane are accepted. This restriction will be lifted when PyCIFRW is ported to Python3.

scoping: instance or dictionary: instance implies that save frames are hidden from save frames lower in the hierarchy or in sibling hierarchies. dictionary makes all save frames visible everywhere within a data block. This setting is only relevant for STAR2 dictionaries and STAR2 data files, as save frames are currently not used in plain CIF data files.

class BlockCollection(object):
    """A container for StarBlock objects. The constructor takes
    one non-keyword argument `datasource` to set the initial data.  If
    `datasource` is a Python dictionary, the values must be `StarBlock`
    objects and the keys will be blocknames in the new object. Keyword
    arguments:

    standard:
        `CIF` or `Dic`.  `CIF` enforces 75-character blocknames, and will
        print block contents before that block's save frame.

    blocktype:
        The type of blocks held in this container. Normally `StarBlock`
        or `CifBlock`.

    characterset:
        `ascii` or `unicode`.  Blocknames and datanames appearing within
        blocks are restricted to the appropriate characterset. Note that
        only characters in the basic multilingual plane are accepted. This
        restriction will be lifted when PyCIFRW is ported to Python3.

    scoping:
        `instance` or `dictionary`: `instance` implies that save frames are
        hidden from save frames lower in the hierarchy or in sibling
        hierarchies. `dictionary` makes all save frames visible everywhere
        within a data block.  This setting is only relevant for STAR2 dictionaries and
        STAR2 data files, as save frames are currently not used in plain CIF data
        files.

"""
    def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
                 characterset='ascii',scoping='instance',**kwargs):
        import collections
        self.dictionary = {}
        self.standard = standard
        self.lower_keys = set()           # short_cuts
        self.renamed = {}
        self.PC = collections.namedtuple('PC',['block_id','parent'])
        self.child_table = {}
        self.visible_keys = []            # for efficiency
        self.block_input_order = []       # to output in same order
        self.scoping = scoping  #will trigger setting of child table
        self.blocktype = blocktype
        self.master_template = {}   #for outputting
        self.set_grammar('2.0')
        self.set_characterset(characterset)
        if isinstance(datasource,BlockCollection):
            self.merge_fast(datasource)
            self.scoping = scoping   #reset visibility
        elif isinstance(datasource,dict):
            for key,value in datasource.items():
                 self[key]= value
        self.header_comment = ''

    def set_grammar(self,new_grammar):
        """Set the syntax and grammar for output to `new_grammar`"""
        if new_grammar not in ['1.1','1.0','2.0','STAR2']:
            raise StarError('Unrecognised output grammar %s' % new_grammar)
        self.grammar = new_grammar

    def set_characterset(self,characterset):
        """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames
        have already been added to any datablocks, they are not checked."""
        self.characterset = characterset
        for one_block in self.lower_keys:
            self[one_block].set_characterset(characterset)

    def unlock(self):
        """Allow overwriting of all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite=True

    def lock(self):
        """Disallow overwriting for all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite = False

    def __str__(self):
        return self.WriteOut()

    def __setitem__(self,key,value):
        self.NewBlock(key,value,parent=None)

    def __getitem__(self,key):
        if isinstance(key,(unicode,str)):
           lowerkey = key.lower()
           if lowerkey in self.lower_keys:
               return self.dictionary[lowerkey]
           #print 'Visible keys:' + `self.visible_keys`
           #print 'All keys' + `self.lower_keys`
           #print 'Child table' + `self.child_table`
           raise KeyError('No such item %s' % key)

    # we have to get an ordered list of the current keys,
    # as we'll have to delete one of them anyway.
    # Deletion will delete any key regardless of visibility

    def __delitem__(self,key):
        dummy = self[key]   #raise error if not present
        lowerkey = key.lower()
        # get rid of all children recursively as well
        children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
        for child in children:
            del self[child]   #recursive call
        del self.dictionary[lowerkey]
        del self.child_table[lowerkey]
        try:
            self.visible_keys.remove(lowerkey)
        except KeyError:
            pass
        self.lower_keys.remove(lowerkey)
        self.block_input_order.remove(lowerkey)

    def __len__(self):
        return len(self.visible_keys)

    def __contains__(self,item):
        """Support the 'in' operator"""
        if not isinstance(item,(unicode,str)): return False
        if item.lower() in self.visible_keys:
            return True
        return False

    # We iterate over all visible
    def __iter__(self):
        for one_block in self.keys():
            yield self[one_block]

    # TODO: handle different case
    def keys(self):
        return self.visible_keys

    # Note that has_key does not exist in 3.5
    def has_key(self,key):
        return key in self

    def get(self,key,default=None):
        if key in self:     # take account of case
            return self.__getitem__(key)
        else:
            return default

    def clear(self):
        self.dictionary.clear()
        self.lower_keys = set()
        self.child_table = {}
        self.visible_keys = []
        self.block_input_order = []

    def copy(self):
        newcopy = self.dictionary.copy()  #all blocks
        for k,v in self.dictionary.items():
            newcopy[k] = v.copy()
        newcopy = BlockCollection(newcopy)
        newcopy.child_table = self.child_table.copy()
        newcopy.lower_keys = self.lower_keys.copy()
        newcopy.block_input_order = self.block_input_order.copy()
        newcopy.characterset = self.characterset
        newcopy.SetTemplate(self.master_template.copy())
        newcopy.scoping = self.scoping  #this sets visible keys
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self[key] = adict[key]

    def items(self):
        return [(a,self[a]) for a in self.keys()]

    def first_block(self):
        """Return the 'first' block.  This is not necessarily the first block in the file."""
        if self.keys():
            return self[self.keys()[0]]

    def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
        """Add a new block named `blockname` with contents `blockcontents`. If `fix`
        is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
        allows a parent block to be set so that block hierarchies can be created.  Depending on
        the output standard, these blocks will be printed out as nested save frames or
        ignored."""
        if blockcontents is None:
            blockcontents = self.blocktype()
        if self.standard == "CIF":
            blockcontents.setmaxnamelength(75)
        if len(blockname)>75:
                 raise StarError('Blockname %s is longer than 75 characters' % blockname)
        if fix:
            newblockname = re.sub('[  \t]','_',blockname)
        else: newblockname = blockname
        new_lowerbn = newblockname.lower()
        if new_lowerbn in self.lower_keys:   #already there
            if self.standard is not None:
               toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
               if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
                  while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
               elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
                  replace_name = new_lowerbn
                  while replace_name in self.lower_keys: replace_name = replace_name + '+'
                  self._rekey(new_lowerbn,replace_name)
                  # now continue on to add in the new block
                  if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                      parent = replace_name
               else:
                  raise StarError( "Attempt to replace existing block " + blockname)
            else:
               del self[new_lowerbn]
        self.dictionary.update({new_lowerbn:blockcontents})
        self.lower_keys.add(new_lowerbn)
        self.block_input_order.append(new_lowerbn)
        if parent is None:
           self.child_table[new_lowerbn]=self.PC(newblockname,None)
           self.visible_keys.append(new_lowerbn)
        else:
           if parent.lower() in self.lower_keys:
              if self.scoping == 'instance':
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
              else:
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
                 self.visible_keys.append(new_lowerbn)
           else:
               print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
        self[new_lowerbn].set_grammar(self.grammar)
        self[new_lowerbn].set_characterset(self.characterset)
        self[new_lowerbn].formatting_hints = self.master_template
        return new_lowerbn  #in case calling routine wants to know

    def _rekey(self,oldname,newname,block_id=''):
        """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
           does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
        move_block = self[oldname]    #old block
        is_visible = oldname in self.visible_keys
        move_block_info = self.child_table[oldname]    #old info
        move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
        # now rewrite the necessary bits
        self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
        oldpos = self.block_input_order.index(oldname)
        del self[oldname]   #do this after updating child table so we don't delete children
        self.dictionary.update({newname:move_block})
        self.lower_keys.add(newname)
        #print 'Block input order was: ' + `self.block_input_order`
        self.block_input_order[oldpos:oldpos]=[newname]
        if block_id == '':
           self.child_table.update({newname:move_block_info})
        else:
           self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
        if is_visible: self.visible_keys += [newname]

    def rename(self,oldname,newname):
        """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
           conformance checks are conducted."""
        realoldname = oldname.lower()
        realnewname = newname.lower()
        if realnewname in self.lower_keys:
            raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname))
        if realoldname not in self.lower_keys:
            raise KeyError('Cannot find old block %s' % realoldname)
        self._rekey(realoldname,realnewname,block_id=newname)

    def makebc(self,namelist,scoping='dictionary'):
        """Make a block collection from a list of block names"""
        newbc = BlockCollection()
        block_lower = [n.lower() for n in namelist]
        proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower]
        newbc.child_table = dict(proto_child_table)
        new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower]
        newbc.child_table.update(dict(new_top_level))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        newbc.scoping = scoping
        newbc.block_input_order = block_lower
        return newbc


    def merge_fast(self,new_bc,parent=None):
        """Do a fast merge. WARNING: this may change one or more of its frame headers in order to
        remove duplicate frames.  Please keep a handle to the block object instead of the text of
        the header."""
        if self.standard is None:
            mode = 'replace'
        else:
            mode = 'strict'
        overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
        if parent is not None:
            parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent]
            if len(parent_name)==0 or len(parent_name)>1:
                raise StarError("Unable to find unique parent block name: have %s" % str(parent_name))
            parent_name = parent_name[0]
        else:
            parent_name = None  #an error will be thrown if we treat as a string
        if overlap_flag and mode != 'replace':
            double_keys = self.lower_keys.intersection(new_bc.lower_keys)
            for dup_key in double_keys:
                  our_parent = self.child_table[dup_key].parent
                  their_parent = new_bc.child_table[dup_key].parent
                  if (our_parent is None and their_parent is not None and parent is None) or\
                      parent is not None:  #rename our block
                    start_key = dup_key
                    while start_key in self.lower_keys: start_key = start_key+'+'
                    self._rekey(dup_key,start_key)
                    if parent_name.lower() == dup_key:  #we just renamed the prospective parent!
                        parent_name = start_key
                  elif our_parent is not None and their_parent is None and parent is None:
                    start_key = dup_key
                    while start_key in new_bc.lower_keys: start_key = start_key+'+'
                    new_bc._rekey(dup_key,start_key)
                  else:
                    raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
        self.dictionary.update(new_bc.dictionary)
        self.lower_keys.update(new_bc.lower_keys)
        self.visible_keys += (list(new_bc.lower_keys))
        self.block_input_order += new_bc.block_input_order
        #print('Block input order now:' + repr(self.block_input_order))
        self.child_table.update(new_bc.child_table)
        if parent_name is not None:     #redo the child_table entries
              reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
              reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list]
              self.child_table.update(dict(reparent_dict))

    def merge(self,new_bc,mode=None,parent=None,single_block=[],
                   idblock="",match_att=[],match_function=None):
        if mode is None:
            if self.standard is None:
               mode = 'replace'
            else:
               mode = 'strict'
        if single_block:
            self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                                   match_att=match_att,
                                                   match_function=match_function)
            return None
        base_keys = [a[1].block_id for a in self.child_table.items()]
        block_to_item = base_keys   #default
        new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
        if match_att:
            #make a blockname -> item name map
            if match_function:
                block_to_item = [match_function(self[a]) for a in self.keys()]
            else:
                block_to_item = [self[a].get(match_att[0],None) for a in self.keys()]
            #print `block_to_item`
        for key in new_keys:        #run over incoming blocknames
            if key == idblock: continue    #skip dictionary id
            basekey = key           #default value
            if len(match_att)>0:
               attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
            else:
               attval = 0
            for ii in range(len(block_to_item)):  #do this way to get looped names
                thisatt = block_to_item[ii]       #keyname in old block
                #print "Looking for %s in %s" % (attval,thisatt)
                if attval == thisatt or \
                   (isinstance(thisatt,list) and attval in thisatt):
                      basekey = base_keys.pop(ii)
                      block_to_item.remove(thisatt)
                      break
            if not basekey in self or mode=="replace":
                new_parent = new_bc.get_parent(key)
                if parent is not None and new_parent is None:
                   new_parent = parent
                self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
            else:
                if mode=="strict":
                    raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
                elif mode=="overlay":
                    # print "Merging block %s with %s" % (basekey,key)
                    self[basekey].merge(new_bc[key],mode,match_att=match_att)
                else:
                    raise StarError( "Merge called with unknown mode %s" % mode)

    def checknamelengths(self,target_block,maxlength=-1):
        if maxlength < 0:
            return
        else:
            toolong = [a for a in target_block.keys() if len(a)>maxlength]
        outstring = ""
        if toolong:
           outstring = "\n".join(toolong)
           raise StarError( 'Following data names too long:' + outstring)

    def get_all(self,item_name):
        raw_values = [self[a].get(item_name) for a in self.keys()]
        raw_values = [a for a in raw_values if a != None]
        ret_vals = []
        for rv in raw_values:
            if isinstance(rv,list):
                for rvv in rv:
                    if rvv not in ret_vals: ret_vals.append(rvv)
            else:
                if rv not in ret_vals: ret_vals.append(rv)
        return ret_vals

    def __setattr__(self,attr_name,newval):
        if attr_name == 'scoping':
            if newval not in ('dictionary','instance'):
                raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
            if newval == 'dictionary':
                self.visible_keys = [a for a in self.lower_keys]
            else:
                #only top-level datablocks visible
                self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
        object.__setattr__(self,attr_name,newval)

    def get_parent(self,blockname):
        """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
        possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
        try:
            first = next(possibles)   #get first one
        except:
            raise StarError('no parent for %s' % blockname)
        try:
           second = next(possibles)
        except StopIteration:
           return first[1].parent
        raise StarError('More than one parent for %s' % blockname)

    def get_roots(self):
        """Get the top-level blocks"""
        return [a for a in self.child_table.items() if a[1].parent==None]

    def get_children(self,blockname,include_parent=False,scoping='dictionary'):
        """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
        True, the parent block will also be included in the block collection as the root."""
        newbc = BlockCollection()
        block_lower = blockname.lower()
        proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
        newbc.child_table = dict(proto_child_table)
        if not include_parent:
           newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        if include_parent:
            newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
            newbc.lower_keys.add(block_lower)
            newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
        newbc.scoping = scoping
        return newbc

    def get_immediate_children(self,parentname):
        """Get the next level of children of the given block as a list, without nested levels"""
        child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
        return child_handles

    # This takes time
    def get_child_list(self,parentname):
        """Get a list of all child categories in alphabetical order"""
        child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
        child_handles.sort()
        return child_handles

    def is_child_of_parent(self,parentname,blockname):
        """Return `True` if `blockname` is a child of `parentname`"""
        checkname = parentname.lower()
        more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
        if blockname.lower() in more_children:
           return True
        else:
           for one_child in more_children:
               if self.is_child_of_parent(one_child,blockname): return True
        return False

    def set_parent(self,parentname,childname):
        """Set the parent block"""
        # first check that both blocks exist
        if parentname.lower() not in self.lower_keys:
            raise KeyError('Parent block %s does not exist' % parentname)
        if childname.lower() not in self.lower_keys:
            raise KeyError('Child block %s does not exist' % childname)
        old_entry = self.child_table[childname.lower()]
        self.child_table[childname.lower()]=self.PC(old_entry.block_id,
               parentname.lower())
        self.scoping = self.scoping #reset visibility

    def SetTemplate(self,template_file):
            """Use `template_file` as a template for all block output"""
            self.master_template = process_template(template_file)
            for b in self.dictionary.values():
                b.formatting_hints = self.master_template

    def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
        """Return the contents of this file as a string, wrapping if possible at `wraplength`
        characters and restricting maximum line length to `maxoutlength`.  Delimiters and
        save frame nesting are controlled by `self.grammar`. If `blockorder` is
        provided, blocks are output in this order unless nested save frames have been
        requested (STAR2). The default block order is the order in which blocks were input.
        `saves_after` inserts all save frames after the given dataname,
        which allows less important items to appear later.  Useful in conjunction with a
        template for dictionary files."""
        if maxoutlength != 0:
            self.SetOutputLength(maxoutlength)
        if not comment:
            comment = self.header_comment
        outstring = StringIO()
        if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
            outstring.write(r"#\#CIF_2.0" + "\n")
        outstring.write(comment)
        # prepare all blocks
        for b in self.dictionary.values():
            b.set_grammar(self.grammar)
            b.formatting_hints = self.master_template
            b.SetOutputLength(wraplength,self.maxoutlength)
        # loop over top-level
        # monitor output
        all_names = list(self.child_table.keys())   #i.e. lower case
        if blockorder is None:
            blockorder = self.block_input_order
        top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
        for blockref,blockname in top_block_names:
            print('Writing %s, ' % blockname + repr(self[blockref]))
            outstring.write('\n' + 'data_' +blockname+'\n')
            all_names.remove(blockref)
            if self.standard == 'Dic':              #put contents before save frames
                outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
            if self.grammar == 'STAR2':  #nested save frames
                child_refs = self.get_immediate_children(blockref)
                for child_ref,child_info in child_refs:
                    child_name = child_info.block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    self.block_to_string_nested(child_ref,child_name,outstring,4)
                    outstring.write('\n' + 'save_'+ '\n')
            elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
                child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
                for child_ref in child_refs:
                    child_name = self.child_table[child_ref].block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    outstring.write(str(self[child_ref]))
                    outstring.write('\n\n' + 'save_' + '\n')
                    all_names.remove(child_ref.lower())
            else:
                raise StarError('Grammar %s is not recognised for output' % self.grammar)
            if self.standard != 'Dic':              #put contents after save frames
                outstring.write(str(self[blockref]))
            else:
                outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
        returnstring =  outstring.getvalue()
        outstring.close()
        if len(all_names)>0:
            print('WARNING: following blocks not output: %s' % repr(all_names))
        else:
            print('All blocks output.')
        return returnstring

    def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0):
        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children,
           and syntactically nesting save frames"""
        child_refs = self.get_immediate_children(block_ref)
        self[block_ref].set_grammar(self.grammar)
        if self.standard == 'Dic':
            outstring.write(str(self[block_ref]))
        for child_ref,child_info in child_refs:
            child_name = child_info.block_id
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string_nested(child_ref,child_name,outstring,indentlevel)
            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
        if self.standard != 'Dic':
            outstring.write(str(self[block_ref]))

Ancestors (in MRO)

Instance variables

var PC

Methods

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

def SetTemplate(

self, template_file)

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class CIFStringIO

class CIFStringIO(StringIO):
    def __init__(self,target_width=80,**kwargs):
        StringIO.__init__(self,**kwargs)
        self.currentpos = 0
        self.target_width = target_width
        self.tabwidth = -1
        self.indentlist = [0]
        self.last_char = ""

    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,
                             delimiter=False,startcol=-1):
        """Write a string with correct linebreak, tabs and indents"""
        # do we need to break?
        if delimiter:
            if len(outstring)>1:
                raise ValueError('Delimiter %s is longer than one character' % repr( outstring ))
            output_delimiter = True
        if mustbreak:    #insert a new line and indent
            temp_string = '\n' + ' ' * self.indentlist[-1]
            StringIO.write(self,temp_string)
            self.currentpos = self.indentlist[-1]
            self.last_char = temp_string[-1]
        if self.currentpos+len(outstring)>self.target_width: #try to break
            if not delimiter and outstring[0]!='\n':          #ie <cr>;
              if canbreak:
                temp_string = '\n' + ' ' * self.indentlist[-1]
                StringIO.write(self,temp_string)
                self.currentpos = self.indentlist[-1]
                self.last_char = temp_string[-1]
            else:        #assume a break will be forced on next value
                output_delimiter = False    #the line break becomes the delimiter
        #try to match requested column
        if startcol > 0:
            if self.currentpos < startcol:
                StringIO.write(self,(startcol - self.currentpos)* ' ')
                self.currentpos = startcol
                self.last_char = ' '
            else:
                print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos))
                startcol = -1   #so that tabbing works as a backup
        #handle tabs
        if self.tabwidth >0 and do_tab and startcol < 0:
            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
            if self.currentpos < next_stop:
                StringIO.write(self,(next_stop-self.currentpos)*' ')
                self.currentpos = next_stop
                self.last_char = ' '
        #calculate indentation after tabs and col setting applied
        if newindent:           #indent by current amount
            if self.indentlist[-1] == 0:    #first time
                self.indentlist.append(self.currentpos)
                # print 'Indentlist: ' + `self.indentlist`
            else:
                self.indentlist.append(self.indentlist[-1]+2)
        elif unindent:
            if len(self.indentlist)>1:
                self.indentlist.pop()
            else:
                print('Warning: cannot unindent any further')
        #check that we still need a delimiter
        if self.last_char in [' ','\n','\t']:
            output_delimiter = False
        #now output the string - every invocation comes through here
        if (delimiter and output_delimiter) or not delimiter:
            StringIO.write(self,outstring)
        last_line_break = outstring.rfind('\n')
        if last_line_break >=0:
            self.currentpos = len(outstring)-last_line_break
        else:
            self.currentpos = self.currentpos + len(outstring)
        #remember the last character
        if len(outstring)>0:
            self.last_char = outstring[-1]

    def set_tab(self,tabwidth):
        """Set the tab stop position"""
        self.tabwidth = tabwidth

Ancestors (in MRO)

class LoopBlock

class LoopBlock(object):
    def __init__(self,parent_block,dataname):
        self.loop_no = parent_block.FindLoop(dataname)
        if self.loop_no < 0:
            raise KeyError('%s is not in a loop structure' % dataname)
        self.parent_block = parent_block

    def keys(self):
        return self.parent_block.loops[self.loop_no]

    def values(self):
        return [self.parent_block[a] for a in self.keys()]

    #Avoid iterator even though that is Python3-esque
    def items(self):
        return list(zip(self.keys(),self.values()))

    def __getitem__(self,dataname):
        if isinstance(dataname,int):   #a packet request
            return self.GetPacket(dataname)
        if dataname in self.keys():
            return self.parent_block[dataname]
        else:
            raise KeyError('%s not in loop block' % dataname)

    def __setitem__(self,dataname,value):
        self.parent_block[dataname] = value
        self.parent_block.AddLoopName(self.keys()[0],dataname)

    def __contains__(self,key):
        return key in self.parent_block.loops[self.loop_no]

    def has_key(self,key):
        return key in self

    def __iter__(self):
        packet_list = zip(*self.values())
        names = self.keys()
        for p in packet_list:
            r = StarPacket(p)
            for n in range(len(names)):
                setattr(r,names[n].lower(),r[n])
            yield r

    # for compatibility
    def __getattr__(self,attname):
        return getattr(self.parent_block,attname)

    def load_iter(self,coords=[]):
        count = 0        #to create packet index
        while not self.popout:
            # ok, we have a new packet:  append a list to our subloops
            for aloop in self.loops:
                aloop.new_enclosing_packet()
            for iname in self.item_order:
                if isinstance(iname,LoopBlock):       #into a nested loop
                    for subitems in iname.load_iter(coords=coords+[count]):
                        # print 'Yielding %s' % `subitems`
                        yield subitems
                    # print 'End of internal loop'
                else:
                    if self.dimension == 0:
                        # print 'Yielding %s' % `self[iname]`
                        yield self,self[iname]
                    else:
                        backval = self.block[iname]
                        for i in range(len(coords)):
                           # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                           backval = backval[coords[i]]
                        yield self,backval
            count = count + 1      # count packets
        self.popout = False        # reinitialise
        # print 'Finished iterating'
        yield self,'###Blank###'     #this value should never be used

    # an experimental fast iterator for level-1 loops (ie CIF)
    def fast_load_iter(self):
        targets = map(lambda a:self.block[a],self.item_order)
        while targets:
            for target in targets:
                yield self,target

    # Add another list of the required shape to take into account a new outer packet
    def new_enclosing_packet(self):
        if self.dimension > 1:      #otherwise have a top-level list
            for iname in self.keys():  #includes lower levels
                target_list = self[iname]
                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of...
                    target_list = target_list[-1]
                target_list.append([])
                # print '%s now %s' % (iname,`self[iname]`)

    def recursive_iter(self,dict_so_far={},coord=[]):
        # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
        my_length = 0
        top_items = self.block.items()
        top_values = self.block.values()       #same order as items
        drill_values = self.block.values()
        for dimup in range(0,self.dimension):  #look higher in the tree
            if len(drill_values)>0:            #this block has values
                drill_values=drill_values[0]   #drill in
            else:
                raise StarError("Malformed loop packet %s" % repr( top_items[0] ))
        my_length = len(drill_values[0])       #length of 'string' entry
        if self.dimension == 0:                #top level
            for aloop in self.loops:
                for apacket in aloop.recursive_iter():
                    # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) )
                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
                    for name,value in top_items + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
        else:                                  #in some loop
            for i in range(my_length):
                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
                kvvals = map(lambda a:a[1],kvpairs)   #just values
                # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs ))
                if self.loops:
                  for aloop in self.loops:
                    for apacket in aloop.recursive_iter(coord=coord+[i]):
                        # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) )
                        prep_yield = StarPacket(kvvals+apacket.values())
                        for name,value in kvpairs + apacket.items():
                            setattr(prep_yield,name,value)
                        yield prep_yield
                else:           # we're at the bottom of the tree
                    # print "Recursive yielding %s" % repr( dict(kvpairs) )
                    prep_yield = StarPacket(kvvals)
                    for name,value in kvpairs:
                        setattr(prep_yield,name,value)
                    yield prep_yield

    # small function to use the coordinates.
    def coord_to_group(self,dataname,coords):
          if not isinstance(dataname,unicode):
             return dataname     # flag inner loop processing
          newm = self[dataname]          # newm must be a list or tuple
          for c in coords:
              # print "Coord_to_group: %s ->" % (repr( newm )),
              newm = newm[c]
              # print repr( newm )
          return newm

    def flat_iterator(self):
            my_length = 0
            top_keys = self.block.keys()
            if len(top_keys)>0:
                my_length = len(self.block[top_keys[0]])
            for pack_no in range(my_length):
                yield(self.collapse(pack_no))


    def RemoveItem(self,itemname):
        """Remove `itemname` from the block."""
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if testkey in self:
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)

    def RemoveLoopItem(self,itemname):
        """*Deprecated*. Use `RemoveItem` instead"""
        self.RemoveItem(itemname)

    def GetLoop(self,keyname):
        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
        `keyname` is only significant as a way to specify the loop."""
        return LoopBlock(self,keyname)

    def GetPacket(self,index):
        thispack = StarPacket([])
        for myitem in self.parent_block.loops[self.loop_no]:
            thispack.append(self[myitem][index])
            setattr(thispack,myitem,thispack[-1])
        return thispack

    def AddPacket(self,packet):
        for myitem in self.parent_block.loops[self.loop_no]:
            old_values = self.parent_block[myitem]
            old_values.append(packet.__getattribute__(myitem))
            self.parent_block[myitem] = old_values

    def GetItemOrder(self):
        """Return a list of datanames in this `LoopBlock` in the order that they will be
        printed"""
        return self.parent_block.loops[self.loop_no][:]


    def ChangeItemOrder(self,itemname,newpos):
        """Change the position at which `itemname` appears when printing out to `newpos`."""
        self.parent_block.loops[self.loop_no].remove(itemname.lower())
        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

    def GetItemPosition(self,itemname):
        """A utility function to get the numerical order in the printout
        of `itemname`.  An item has coordinate `(loop_no,pos)` with
        the top level having a `loop_no` of -1.  If an integer is passed to
        the routine then it will return the position of the loop
        referenced by that number."""
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not itemname in self:
            raise ValueError('No such dataname %s' % itemname)
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try:
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError('Item does not exist')

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with `keyname`"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError('%s is not in any loop' % keyname)

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue

    def AddToLoop(self,dataname,loopdata):
        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.

        Add multiple columns to the loop containing `dataname`. `loopdata` is a
        collection of (key,value) pairs, where `key` is the new dataname and `value`
        is a list of values for that dataname"""
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
               % (repr( bad_vals ),loop_len))
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

Ancestors (in MRO)

Methods

def AddPacket(

self, packet)

def AddPacket(self,packet):
    for myitem in self.parent_block.loops[self.loop_no]:
        old_values = self.parent_block[myitem]
        old_values.append(packet.__getattribute__(myitem))
        self.parent_block[myitem] = old_values

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Change the position at which itemname appears when printing out to newpos.

def ChangeItemOrder(self,itemname,newpos):
    """Change the position at which `itemname` appears when printing out to `newpos`."""
    self.parent_block.loops[self.loop_no].remove(itemname.lower())
    self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

def GetItemOrder(

self)

Return a list of datanames in this LoopBlock in the order that they will be printed

def GetItemOrder(self):
    """Return a list of datanames in this `LoopBlock` in the order that they will be
    printed"""
    return self.parent_block.loops[self.loop_no][:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetPacket(

self, index)

def GetPacket(self,index):
    thispack = StarPacket([])
    for myitem in self.parent_block.loops[self.loop_no]:
        thispack.append(self[myitem][index])
        setattr(thispack,myitem,thispack[-1])
    return thispack

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

class StarBlock

class StarBlock(object):
    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
                 characterset='ascii',maxnamelength=-1):
        self.block = {}    #the actual data storage (lower case keys)
        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
        self.item_order = []  #lower case, loops referenced by integer
        self.formatting_hints = {}
        self.true_case = {} #transform lower case to supplied case
        self.provide_value = False  #prefer string version always
        self.dictionary = None      #DDLm dictionary
        self.popout = False         #used during load iteration
        self.curitem = -1           #used during iteration
        self.cache_vals = True      #store all calculated values
        self.maxoutlength = maxoutlength
        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
        self.set_characterset(characterset)   #to check input names
        self.wraplength = wraplength
        self.overwrite = overwrite
        self.string_delimiters = ["'",'"',"\n;"]   #universal CIF set
        self.list_delimiter = "  "                 #CIF2 default
        self.wrapper = textwrap.TextWrapper()
        if isinstance(data,(tuple,list)):
            for item in data:
                self.AddLoopItem(item)
        elif isinstance(data,StarBlock):
            self.block = data.block.copy()
            self.item_order = data.item_order[:]
            self.true_case = data.true_case.copy()
            # loops as well
            self.loops = data.loops.copy()

    def setmaxnamelength(self,maxlength):
        """Set the maximum allowable dataname length (-1 for no check)"""
        self.maxnamelength = maxlength
        if maxlength > 0:
            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
            if len(bad_names)>0:
                raise StarError('Datanames too long: ' + repr( bad_names ))

    def set_characterset(self,characterset):
        """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
        self.characterset = characterset
        if characterset == 'ascii':
            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
        elif characterset == 'unicode':
            if sys.maxunicode < 1114111:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
            else:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)

    def __str__(self):
        return self.printsection()

    def __setitem__(self,key,value):
        if key == "saves":
            raise StarError("""Setting the saves key is deprecated. Add the save block to
    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
        self.AddItem(key,value)

    def __getitem__(self,key):
        if key == "saves":
            raise StarError("""The saves key is deprecated. Access the save block from
    the enclosing block collection (e.g. CIF or STAR file object)""")
        try:
           rawitem,is_value = self.GetFullItemValue(key)
        except KeyError:
           if self.dictionary:
               # send the dictionary the required key and a pointer to us
               try:
                   new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False)
               except StarDerivationFailure:   #try now with defaults included
                   try:
                       new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True)
                   except StarDerivationFailure as s:
                       print("In StarBlock.__getitem__, " + repr(s))
                       raise KeyError('No such item: %s' % key)
               print('Set %s to derived value %s' % (key, repr(new_value)))
               return new_value
           else:
               raise KeyError('No such item: %s' % key)
        # we now have an item, we can try to convert it to a number if that is appropriate
        # note numpy values are never stored but are converted to lists
        if not self.dictionary or not key in self.dictionary: return rawitem
        print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem )))
        if is_value:
            if self.provide_value: return rawitem
            else:
               print('Turning %s into string' % repr( rawitem ))
               return self.convert_to_string(key)
        else:    # a string
            if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \
                                      (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)):
                return self.dictionary.change_type(key,rawitem)
            elif self.provide_value: # catch the question marks
                do_calculate = False
                if isinstance(rawitem,(list,tuple)):
                    known = [a for a in rawitem if a != '?']
                    if len(known) == 0:   #all questions
                        do_calculate = True
                elif rawitem == '?':
                        do_calculate = True
                if do_calculate:
                   # remove old value
                   del self[key]
                   try:
                       new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False)
                   except StarDerivationFailure as s:
                       try:
                           new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True)
                       except StarDerivationFailure as s:

                           print("Could not turn %s into a value:" + repr(s))
                           return rawitem
                   else:
                       print('Set %s to derived value %s' % (key, repr( new_value )))
                       return new_value
            return rawitem   #can't do anything

    def __delitem__(self,key):
        self.RemoveItem(key)

    def __len__(self):
        blen = len(self.block)
        return blen

    def __nonzero__(self):
        if self.__len__() > 0: return 1
        return 0

    # keys returns all internal keys
    def keys(self):
        return list(self.block.keys())    #always lower case

    def values(self):
        return [self[a] for a in self.keys()]

    def items(self):
        return list(zip(self.keys(),self.values()))

    def __contains__(self,key):
        if isinstance(key,(unicode,str)) and key.lower() in self.keys():
            return True
        return False

    def has_key(self,key):
        return key in self

    def has_key_or_alias(self,key):
        """Check if a dataname or alias is available in the block"""
        initial_test = key in self
        if initial_test: return True
        elif self.dictionary:
            aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)]
            if len(aliases)>0:
               return True
        return False

    def get(self,key,default=None):
        if key in self:
            retval = self.__getitem__(key)
        else:
            retval = default
        return retval

    def clear(self):
        self.block = {}
        self.loops = {}
        self.item_order = []
        self.true_case = {}

    # doesn't appear to work
    def copy(self):
        newcopy = StarBlock()
        newcopy.block = self.block.copy()
        newcopy.loops = []
        newcopy.item_order = self.item_order[:]
        newcopy.true_case = self.true_case.copy()
        newcopy.loops = self.loops.copy()
    #    return self.copy.im_class(newcopy)   #catch inheritance
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self.AddItem(key,adict[key])

    def GetItemPosition(self,itemname):
        """A utility function to get the numerical order in the printout
        of `itemname`.  An item has coordinate `(loop_no,pos)` with
        the top level having a `loop_no` of -1.  If an integer is passed to
        the routine then it will return the position of the loop
        referenced by that number."""
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not itemname in self:
            raise ValueError('No such dataname %s' % itemname)
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def ChangeItemOrder(self,itemname,newpos):
        """Move the printout order of `itemname` to `newpos`. If `itemname` is
        in a loop, `newpos` refers to the order within the loop."""
        if isinstance(itemname,(unicode,str)):
            true_name = itemname.lower()
        else:
            true_name = itemname
        loopno = self.FindLoop(true_name)
        if loopno < 0:  #top level
            self.item_order.remove(true_name)
            self.item_order.insert(newpos,true_name)
        else:
            self.loops[loopno].remove(true_name)
            self.loops[loopno].insert(newpos,true_name)

    def GetItemOrder(self):
        """Return a list of datanames in the order in which they will be printed.  Loops are
        referred to by numerical index"""
        return self.item_order[:]

    def AddItem(self,key,value,precheck=False):
        """Add dataname `key` to block with value `value`.  `value` may be
        a single value, a list or a tuple. If `precheck` is False (the default),
        all values will be checked and converted to unicode strings as necessary. If
        `precheck` is True, this checking is bypassed.  No checking is necessary
        when values are read from a CIF file as they are already in correct form."""
        if not isinstance(key,(unicode,str)):
             raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
        key = unicode(key)    #everything is unicode internally
        if not precheck:
             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
        # check for overwriting
        if key in self:
             if not self.overwrite:
                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
        if not precheck:   #need to sanitise
            regval,empty_val = self.regularise_data(value)
            pure_string = check_stringiness(regval)
            self.check_item_value(regval)
        else:
            regval,empty_val = value,None
            pure_string = True
        # update ancillary information first
        lower_key = key.lower()
        if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
            self.item_order.append(lower_key)
        # always remove from our case table in case the case is different
        try:
            del self.true_case[lower_key]
        except KeyError:
            pass
        self.true_case[lower_key] = key
        if pure_string:
            self.block.update({lower_key:[regval,empty_val]})
        else:
            self.block.update({lower_key:[empty_val,regval]})

    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
        """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
        necessary."""
        # print "Received data %s" % `incomingdata`
        # we accept tuples, strings, lists and dicts!!
        # Direct insertion: we have a string-valued key, with an array
        # of values -> single-item into our loop
        if isinstance(incomingdata[0],(tuple,list)):
           # a whole loop
           keyvallist = zip(incomingdata[0],incomingdata[1])
           for key,value in keyvallist:
               self.AddItem(key,value)
           self.CreateLoop(incomingdata[0])
        elif not isinstance(incomingdata[0],(unicode,str)):
             raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
        else:
            self.AddItem(incomingdata[0],incomingdata[1])

    def check_data_name(self,dataname,maxlength=-1):
        if maxlength > 0:
            self.check_name_length(dataname,maxlength)
        if dataname[0]!='_':
            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
        if self.characterset=='ascii':
            if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
        else:
            # print 'Checking %s for unicode characterset conformance' % dataname
            if len ([a for a in dataname if ord(a) < 33]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
            if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
            if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
            if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
            if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
            if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0:
                print('%s fails' % dataname)
                for a in dataname: print('%x' % ord(a),end="")
                print()
                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

    def check_name_length(self,dataname,maxlength):
        if len(dataname)>maxlength:
            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
        return

    def check_item_value(self,item):
        test_item = item
        if not isinstance(item,(list,dict,tuple)):
           test_item = [item]         #single item list
        def check_one (it):
            if isinstance(it,unicode):
                if it=='': return
                me = self.char_check.match(it)
                if not me:
                    print("Fail value check: %s" % it)
                    raise StarError('Bad character in %s' % it)
                else:
                    if me.span() != (0,len(it)):
                        print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it )))
                        raise StarError('Data item "' + repr( it ) +  u'"... contains forbidden characters')
        [check_one(a) for a in test_item]

    def regularise_data(self,dataitem):
        """Place dataitem into a list if necessary"""
        from numbers import Number
        if isinstance(dataitem,str):
            return unicode(dataitem),None
        if isinstance(dataitem,(Number,unicode,StarList,StarDict)):
            return dataitem,None  #assume StarList/StarDict contain unicode if necessary
        if isinstance(dataitem,(tuple,list)):
            v,s = zip(*list([self.regularise_data(a) for a in dataitem]))
            return list(v),list(s)
            #return dataitem,[None]*len(dataitem)
        # so try to make into a list
        try:
            regval = list(dataitem)
        except TypeError as value:
            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
        v,s = zip(*list([self.regularise_data(a) for a in regval]))
        return list(v),list(s)

    def RemoveItem(self,itemname):
        """Remove `itemname` from the block."""
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if testkey in self:
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)

    def RemoveLoopItem(self,itemname):
        """*Deprecated*. Use `RemoveItem` instead"""
        self.RemoveItem(itemname)

    def GetItemValue(self,itemname):
        """Return value of `itemname`.  If `itemname` is looped, a list
        of all values will be returned."""
        return self.GetFullItemValue(itemname)[0]

    def GetFullItemValue(self,itemname):
        """Return the value associated with `itemname`, and a boolean flagging whether
        (True) or not (False) it is in a form suitable for calculation.  False is
        always returned for strings and `StarList` objects."""
        try:
            s,v = self.block[itemname.lower()]
        except KeyError:
            raise KeyError('Itemname %s not in datablock' % itemname)
        # prefer string value unless all are None
        # are we a looped value?
        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
            if not_none(s):
                return s,False    #a string value
            else:
                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
        elif not_none(s):
            return s,False         #a list of string values
        else:
            if len(v)>0:
                return v,not isinstance(v[0],StarList)
            return v,True

    def CreateLoop(self,datanames,order=-1,length_check=True):
           """Create a loop in the datablock. `datanames` is a list of datanames that
           together form a loop.  If length_check is True, they should have been initialised in the block
           to have the same number of elements (possibly 0). If `order` is given,
           the loop will appear at this position in the block when printing
           out. A loop counts as a single position."""

           if length_check:
               # check lengths: these datanames should exist
               listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
               if len(listed_values) == len(datanames):
                   len_set = set([len(self[a]) for a in datanames])
                   if len(len_set)>1:
                       raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
               elif len(listed_values) != 0:
                   raise ValueError('Request to loop datanames where some are single values and some are not')
           # store as lower case
           lc_datanames = [d.lower() for d in datanames]
           # remove these datanames from all other loops
           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
           # remove empty loops
           empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
           for a in empty_loops:
               self.item_order.remove(a)
               del self.loops[a]
           if len(self.loops)>0:
               loopno = max(self.loops.keys()) + 1
           else:
               loopno = 1
           self.loops[loopno] = list(lc_datanames)
           if order >= 0:
               self.item_order.insert(order,loopno)
           else:
               self.item_order.append(loopno)
           # remove these datanames from item ordering
           self.item_order = [a for a in self.item_order if a not in lc_datanames]

    def AddLoopName(self,oldname, newname):
        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
        error is raised.  If `newname` is in a different loop, it is removed from that loop.
        The number of values associated with `newname` must match the number of values associated
        with all other columns of the new loop or a `ValueError` will be raised."""
        lower_newname = newname.lower()
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError('%s not in loop' % oldname)
        if lower_newname in self.loops[loop_no]:
            return
        # check length
        old_provides = self.provide_value
        self.provide_value = False
        loop_len = len(self[oldname])
        self.provide_value = old_provides
        if len(self[newname]) != loop_len:
            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
        # remove from any other loops
        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(lower_newname)
        # remove from item_order if present
        try:
            self.item_order.remove(lower_newname)
        except ValueError:
            pass

    def FindLoop(self,keyname):
        """Find the loop that contains `keyname` and return its numerical index or
        -1 if not present. The numerical index can be used to refer to the loop in
        other routines."""
        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
        if len(loop_no)>0:
            return loop_no[0]
        else:
            return -1

    def GetLoop(self,keyname):
        """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
        `keyname` is only significant as a way to specify the loop."""
        return LoopBlock(self,keyname)

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try:
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError('Item does not exist')

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with `keyname`"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError('%s is not in any loop' % keyname)

    def AddLoopName(self,oldname, newname):
        """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
        error is raised.  If `newname` is in a different loop, it is removed from that loop.
        The number of values associated with `newname` must match the number of values associated
        with all other columns of the new loop or a `ValueError` will be raised."""
        lower_newname = newname.lower()
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError('%s not in loop' % oldname)
        if lower_newname in self.loops[loop_no]:
            return
        # check length
        old_provides = self.provide_value
        self.provide_value = False
        loop_len = len(self[oldname])
        self.provide_value = old_provides
        if len(self[newname]) != loop_len:
            raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
        # remove from any other loops
        [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(lower_newname)
        # remove from item_order if present
        try:
            self.item_order.remove(lower_newname)
        except ValueError:
            pass

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue

    def AddToLoop(self,dataname,loopdata):
        """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.

        Add multiple columns to the loop containing `dataname`. `loopdata` is a
        collection of (key,value) pairs, where `key` is the new dataname and `value`
        is a list of values for that dataname"""
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
               % (repr( bad_vals ),loop_len))
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

    def RemoveKeyedPacket(self,keyname,keyvalue):
        """Remove the packet for which dataname `keyname` takes
        value `keyvalue`.  Only the first such occurrence is
        removed."""
        packet_coord = list(self[keyname]).index(keyvalue)
        loopnames = self.GetLoopNames(keyname)
        for dataname in loopnames:
            self.block[dataname][0] = list(self.block[dataname][0])
            del self.block[dataname][0][packet_coord]
            self.block[dataname][1] = list(self.block[dataname][1])
            del self.block[dataname][1][packet_coord]

    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
        """Return the loop packet (a `StarPacket` object) where `keyname` has value
        `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
        is raised if no packet is found or more than one packet is found."""
        my_loop = self.GetLoop(keyname)
        #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
        #print('Packet check on:' + keyname)
        #[print(repr(getattr(a,keyname))) for a in my_loop]
        if no_case:
           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
        else:
           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
        if len(one_pack)!=1:
            raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
        print("Keyed packet: %s" % one_pack[0])
        return one_pack[0]

    def GetCompoundKeyedPacket(self,keydict):
        """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
        in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
        True.  `ValueError` is raised if no packet is found or more than one packet is found."""
        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
        keynames = list(keydict.keys())
        my_loop = self.GetLoop(keynames[0])
        for one_key in keynames:
            keyval,no_case = keydict[one_key]
            if no_case:
               my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
            else:
               my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
        if len(my_loop)!=1:
            raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
        print("Compound keyed packet: %s" % my_loop[0])
        return my_loop[0]

    def GetKeyedSemanticPacket(self,keyvalue,cat_id):
        """Return a complete packet for category `cat_id` where the
        category key for the category equals `keyvalue`.  This routine
        will understand any joined loops, so if separate loops in the
        datafile belong to the
        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
        the returned `StarPacket` object will contain datanames from
        both categories."""
        target_keys = self.dictionary.cat_key_table[cat_id]
        target_keys = [k[0] for k in target_keys] #one only in each list
        p = StarPacket()
        # set case-sensitivity flag
        lcase = False
        if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
            lcase = True
        for cat_key in target_keys:
            try:
                extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except KeyError:        #missing key
                try:
                    test_key = self[cat_key]  #generate key if possible
                    print('Test key is %s' % repr( test_key ))
                    if test_key is not None and\
                    not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                        print('Getting packet for key %s' % repr( keyvalue ))
                        extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
                except:             #cannot be generated
                    continue
            except ValueError:      #none/more than one, assume none
                continue
                #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
            p.merge_packet(extra_packet)
        # the following attributes used to calculate missing values
        for keyname in target_keys:
            if hasattr(p,keyname):
                p.key = [keyname]
                break
        if not hasattr(p,"key"):
            raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p

    def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
        """Return a complete packet for category `cat_id` where the keyvalues are
        provided as a dictionary of key:(value,caseless) pairs
        This routine
        will understand any joined loops, so if separate loops in the
        datafile belong to the
        same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
        the returned `StarPacket` object will contain datanames from
        the requested category and any children."""
        #if len(keyvalues)==1:   #simplification
        #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
        target_keys = self.dictionary.cat_key_table[cat_id]
        # update the dictionary passed to us with all equivalents, for
        # simplicity.
        parallel_keys = list(zip(*target_keys))  #transpose
        print('Parallel keys:' + repr(parallel_keys))
        print('Keydict:' + repr(keydict))
        start_keys = list(keydict.keys())
        for one_name in start_keys:
            key_set = [a for a in parallel_keys if one_name in a]
            for one_key in key_set:
                keydict[one_key] = keydict[one_name]
        # target_keys is a list of lists, each of which is a compound key
        p = StarPacket()
        # a little function to return the dataname for a key
        def find_key(key):
            for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
                if self.has_key(one_key):
                    return one_key
            return None
        for one_set in target_keys: #loop down the categories
            true_keys = [find_key(k) for k in one_set]
            true_keys = [k for k in true_keys if k is not None]
            if len(true_keys)==len(one_set):
                truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
                try:
                    extra_packet = self.GetCompoundKeyedPacket(truekeydict)
                except KeyError:     #one or more are missing
                    continue         #should try harder?
                except ValueError:
                    continue
            else:
                continue
            print('Merging packet for keys ' + repr(one_set))
            p.merge_packet(extra_packet)
        # the following attributes used to calculate missing values
        p.key = true_keys
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p


    def set_grammar(self,new_grammar):
        self.string_delimiters = ["'",'"',"\n;",None]
        if new_grammar in ['STAR2','2.0']:
            self.string_delimiters += ['"""',"'''"]
        if new_grammar == '2.0':
            self.list_delimiter = "  "
        elif new_grammar == 'STAR2':
            self.list_delimiter = ", "
        elif new_grammar not in ['1.0','1.1']:
            raise StarError('Request to set unknown grammar %s' % new_grammar)

    def SetOutputLength(self,wraplength=80,maxoutlength=2048):
        """Set the maximum output line length (`maxoutlength`) and the line length to
        wrap at (`wraplength`).  The wrap length is a target only and may not always be
        possible."""
        if wraplength > maxoutlength:
            raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
        self.wraplength = wraplength
        self.maxoutlength = maxoutlength

    def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''):
        self.provide_value = False
        # first make an ordering
        self.create_ordering(finish_at,start_from)  #create self.output_order
        # now do it...
        if not instring:
            outstring = CIFStringIO(target_width=80)       # the returned string
        else:
            outstring = instring
        # print block delimiter
        outstring.write(blockstart,canbreak=True)
        while len(self.output_order)>0:
           #print "Remaining to output " + `self.output_order`
           itemname = self.output_order.pop(0)
           if not isinstance(itemname,int):  #no loop
                   item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()]
                   if len(item_spec)>0:
                       item_spec = item_spec[0]
                       col_pos = item_spec.get('column',-1)
                       name_pos = item_spec.get('name_pos',-1)
                   else:
                       col_pos = -1
                       item_spec = {}
                       name_pos = -1
                   if col_pos < 0: col_pos = 40
                   outstring.set_tab(col_pos)
                   itemvalue = self[itemname]
                   outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos)
                   outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)    #space after itemname
                   self.format_value(itemvalue,outstring,hints=item_spec)
           else:# we are asked to print a loop block
                    outstring.set_tab(10)       #guess this is OK?
                    loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop']
                    if loop_spec:
                        loop_indent = max(loop_spec[0],0)
                    else:
                        loop_indent = indent
                    outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent)
                    self.format_names(outstring,indent+2,loop_no=itemname)
                    self.format_packets(outstring,indent+2,loop_no=itemname)
        else:
            returnstring = outstring.getvalue()
        outstring.close()
        return returnstring

    def format_names(self,outstring,indent=0,loop_no=-1):
        """Print datanames from `loop_no` one per line"""
        temp_order = self.loops[loop_no][:]   #copy
        format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order])
        while len(temp_order)>0:
            itemname = temp_order.pop(0)
            req_indent = format_hints.get(itemname,{}).get('name_pos',indent)
            outstring.write(' ' * req_indent,do_tab=False)
            outstring.write(self.true_case[itemname],do_tab=False)
            outstring.write("\n",do_tab=False)

    def format_packets(self,outstring,indent=0,loop_no=-1):
       alldata = [self[a] for a in self.loops[loop_no]]
       loopnames = self.loops[loop_no]
       #print 'Alldata: %s' % `alldata`
       packet_data = list(zip(*alldata))
       #print 'Packet data: %s' % `packet_data`
       #create a dictionary for quick lookup of formatting requirements
       format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames])
       for position in range(len(packet_data)):
           if position > 0:
               outstring.write("\n")    #new line each packet except first
           for point in range(len(packet_data[position])):
               datapoint = packet_data[position][point]
               format_hint = format_hints.get(loopnames[point],{})
               packstring = self.format_packet_item(datapoint,indent,outstring,format_hint)
               outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)

    def format_packet_item(self,pack_item,indent,outstring,format_hint):
           # print 'Formatting %s' % `pack_item`
           # temporary check for any non-unicode items
           if isinstance(pack_item,str) and not isinstance(pack_item,unicode):
               raise StarError("Item {0!r} is not unicode".format(pack_item))
           if isinstance(pack_item,unicode):
               delimiter = format_hint.get('delimiter',None)
               startcol = format_hint.get('column',-1)
               outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol)
           else:
               self.format_value(pack_item,outstring,hints = format_hint)

    def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}):
        if hints.get("reformat",False) and "\n" in instring:
            instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"])
        allowed_delimiters = set(self.string_delimiters)
        if len(instring)==0: allowed_delimiters.difference_update([None])
        if len(instring) > (self.maxoutlength-2) or '\n' in instring:
                allowed_delimiters.intersection_update(["\n;","'''",'"""'])
        if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring:
                allowed_delimiters.difference_update([None])
        if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'):
                allowed_delimiters.difference_update([None])
        if len(instring)>5 and instring[:6].lower()=='global':
                allowed_delimiters.difference_update([None])
        if '"' in instring: allowed_delimiters.difference_update(['"',None])
        if "'" in instring: allowed_delimiters.difference_update(["'",None])
        out_delimiter = "\n;"  #default (most conservative)
        if delimiter in allowed_delimiters:
            out_delimiter = delimiter
        elif "'" in allowed_delimiters: out_delimiter = "'"
        elif '"' in allowed_delimiters: out_delimiter = '"'
        if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
        elif out_delimiter is None: return instring
        # we are left with semicolon strings
        # use our protocols:
        maxlinelength = max([len(a) for a in instring.split('\n')])
        if maxlinelength > self.maxoutlength:
            protocol_string = apply_line_folding(instring)
        else:
            protocol_string = instring
        # now check for embedded delimiters
        if "\n;" in protocol_string:
            prefix = "CIF:"
            while prefix in protocol_string: prefix = prefix + ":"
            protocol_string = apply_line_prefix(protocol_string,prefix+"> ")
        return "\n;" + protocol_string + "\n;"

    def format_value(self,itemvalue,stringsink,compound=False,hints={}):
        """Format a Star data value"""
        global have_numpy
        delimiter = hints.get('delimiter',None)
        startcol = hints.get('column',-1)
        if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed
            raise StarError("Non-unicode value {0} found in block".format(itemvalue))
        if isinstance(itemvalue,unicode):  #need to sanitize
            stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol)
        elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy
           stringsink.set_tab(0)
           stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol)
           if len(itemvalue)>0:
               self.format_value(itemvalue[0],stringsink)
               for listval in itemvalue[1:]:
                  # print 'Formatting %s' % `listval`
                  stringsink.write(self.list_delimiter,do_tab=False)
                  self.format_value(listval,stringsink,compound=True)
           stringsink.write(']',unindent=True)
        elif isinstance(itemvalue,dict):
           stringsink.set_tab(0)
           stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol)  #start a new line inside
           items = list(itemvalue.items())
           if len(items)>0:
               stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
               self.format_value(items[0][1],stringsink)
               for key,value in items[1:]:
                   stringsink.write(self.list_delimiter)
                   stringsink.write("'"+key+"'"+":",canbreak=True)
                   self.format_value(value,stringsink)   #never break between key and value
           stringsink.write('}',unindent=True)
        elif isinstance(itemvalue,(float,int)) or \
             (have_numpy and isinstance(itemvalue,(numpy.number))):  #TODO - handle uncertainties
           stringsink.write(str(itemvalue),canbreak=True,startcol=startcol)   #numbers
        else:
           raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue ))

    def create_ordering(self,finish_at,start_from):
        """Create a canonical ordering that includes loops using our formatting hints dictionary"""
        requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop'])
        new_order = []
        for item in requested_order:
           if isinstance(item,unicode) and item.lower() in self.item_order:
               new_order.append(item.lower())
           elif item in self:    #in a loop somewhere
               target_loop = self.FindLoop(item)
               if target_loop not in new_order:
                   new_order.append(target_loop)
                   # adjust loop name order
                   loopnames = self.loops[target_loop]
                   loop_order = [i for i in requested_order if i in loopnames]
                   unordered = [i for i in loopnames if i not in loop_order]
                   self.loops[target_loop] = loop_order + unordered
        extras = list([i for i in self.item_order if i not in new_order])
        self.output_order = new_order + extras
        # now handle partial output
        if start_from != '':
            if start_from in requested_order:
                sfi = requested_order.index(start_from)
                loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0]
                candidates = list([k for k in self.output_order if k in requested_order[sfi:]])
                cand_pos = len(new_order)
                if len(candidates)>0:
                    cand_pos = self.output_order.index(candidates[0])
                if len(loop_order)>0:
                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
                if cand_pos < len(self.output_order):
                    print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from))
                    self.output_order = self.output_order[cand_pos:]
                else:
                    print('Start is beyond end of output list')
                    self.output_order = []
            elif start_from in extras:
               self.output_order = self.output_order[self.output_order.index(start_from):]
            else:
               self.output_order = []
        if finish_at != '':
            if finish_at in requested_order:
                fai = requested_order.index(finish_at)
                loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0])
                candidates = list([k for k in self.output_order if k in requested_order[fai:]])
                cand_pos = len(new_order)
                if len(candidates)>0:
                    cand_pos = self.output_order.index(candidates[0])
                if len(loop_order)>0:
                    cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
                if cand_pos < len(self.output_order):
                    print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at))
                    self.output_order = self.output_order[:cand_pos]
                else:
                    print('All of block output')
            elif finish_at in extras:
               self.output_order = self.output_order[:self.output_order.index(finish_at)]
        #print('Final order: ' + repr(self.output_order))

    def convert_to_string(self,dataname):
        """Convert values held in dataname value fork to string version"""
        v,is_value = self.GetFullItemValue(dataname)
        if not is_value:
            return v
        if check_stringiness(v): return v   #already strings
        # TODO...something else
        return v

    def do_wrapping(self,instring,indent=3):
        """Wrap the provided string"""
        if "   " in instring:   #already formatted
            return instring
        self.wrapper.initial_indent = ' '*indent
        self.wrapper.subsequent_indent = ' '*indent
        # remove leading and trailing space
        instring = instring.strip()
        # split into paragraphs
        paras = instring.split("\n\n")
        wrapped_paras = [self.wrapper.fill(p) for p in paras]
        return "\n".join(wrapped_paras)


    def merge(self,new_block,mode="strict",match_att=[],match_function=None,
                   rel_keys = []):
        if mode == 'strict':
           for key in new_block.keys():
               if key in self and key not in match_att:
                  raise StarError( "Identical keys %s in strict merge mode" % key)
               elif key not in match_att:           #a new dataname
                   self[key] = new_block[key]
           # we get here if there are no keys in common, so we can now copy
           # the loops and not worry about overlaps
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'replace':
           newkeys = list(new_block.keys())
           for ma in match_att:
              try:
                   newkeys.remove(ma)        #don't touch the special ones
              except ValueError:
                   pass
           for key in new_block.keys():
                  if isinstance(key,unicode):
                      self[key] = new_block[key]
           # creating the loop will remove items from other loops
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'overlay':
           print('Overlay mode, current overwrite is %s' % self.overwrite)
           raise StarError('Overlay block merge mode not implemented')
           save_overwrite = self.overwrite
           self.overwrite = True
           for attribute in new_block.keys():
               if attribute in match_att: continue      #ignore this one
               new_value = new_block[attribute]
               #non-looped items
               if new_block.FindLoop(attribute)<0:     #not looped
                  self[attribute] = new_value
           my_loops = self.loops.values()
           perfect_overlaps = [a for a in new_block.loops if a in my_loops]
           for po in perfect_overlaps:
              loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
              try:
                  newkeypos = map(lambda a:newkeys.index(a),loop_keys)
                  newkeypos = newkeypos[0]      #one key per loop for now
                  loop_keys = loop_keys[0]
              except (ValueError,IndexError):
                  newkeypos = []
                  overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
                  new_data = map(lambda a:new_block[a],overlaps) #new packet data
                  packet_data = transpose(overlap_data)
                  new_p_data = transpose(new_data)
                  # remove any packets for which the keys match between old and new; we
                  # make the arbitrary choice that the old data stays
                  if newkeypos:
                      # get matching values in new list
                      print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos])))
                      key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                      # filter out any new data with these key values
                      new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                      if new_p_data:
                          new_data = transpose(new_p_data)
                      else: new_data = []
                  # wipe out the old data and enter the new stuff
                  byebyeloop = self.GetLoop(overlaps[0])
                  # print("Removing '%r' with overlaps '%r'" % (byebyeloop, overlaps))
                  # Note that if, in the original dictionary, overlaps are not
                  # looped, GetLoop will return the block itself.  So we check
                  # for this case...
                  if byebyeloop != self:
                      self.remove_loop(byebyeloop)
                  self.AddLoopItem((overlaps,overlap_data))  #adding old packets
                  for pd in new_p_data:                             #adding new packets
                     if pd not in packet_data:
                        for i in range(len(overlaps)):
                            #don't do this at home; we are appending
                            #to something in place
                            self[overlaps[i]].append(pd[i])
           self.overwrite = save_overwrite

    def assign_dictionary(self,dic):
        if not dic.diclang=="DDLm":
            print("Warning: ignoring dictionary %s" % dic.my_uri)
            return
        self.dictionary = dic

    def unassign_dictionary(self):
        """Remove dictionary-dependent behaviour"""
        self.dictionary = None

Ancestors (in MRO)

Methods

def AddItem(

self, key, value, precheck=False)

Add dataname key to block with value value. value may be a single value, a list or a tuple. If precheck is False (the default), all values will be checked and converted to unicode strings as necessary. If precheck is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.

def AddItem(self,key,value,precheck=False):
    """Add dataname `key` to block with value `value`.  `value` may be
    a single value, a list or a tuple. If `precheck` is False (the default),
    all values will be checked and converted to unicode strings as necessary. If
    `precheck` is True, this checking is bypassed.  No checking is necessary
    when values are read from a CIF file as they are already in correct form."""
    if not isinstance(key,(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
    key = unicode(key)    #everything is unicode internally
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
    # check for overwriting
    if key in self:
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    if not precheck:   #need to sanitise
        regval,empty_val = self.regularise_data(value)
        pure_string = check_stringiness(regval)
        self.check_item_value(regval)
    else:
        regval,empty_val = value,None
        pure_string = True
    # update ancillary information first
    lower_key = key.lower()
    if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    try:
        del self.true_case[lower_key]
    except KeyError:
        pass
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

Deprecated. Use AddItem followed by CreateLoop if necessary.

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Add newname to the loop containing oldname. If it is already in the new loop, no error is raised. If newname is in a different loop, it is removed from that loop. The number of values associated with newname must match the number of values associated with all other columns of the new loop or a ValueError will be raised.

def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise ValueError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

def AddToLoop(

self, dataname, loopdata)

Deprecated. Use AddItem followed by calls to AddLoopName.

Add multiple columns to the loop containing dataname. loopdata is a collection of (key,value) pairs, where key is the new dataname and value is a list of values for that dataname

def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.
    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError("Number of values for looped datanames %s not equal to %d" \
           % (repr( bad_vals ),loop_len))
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Move the printout order of itemname to newpos. If itemname is in a loop, newpos refers to the order within the loop.

def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1, length_check=True)

Create a loop in the datablock. datanames is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If order is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""
       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

def FindLoop(

self, keyname)

Find the loop that contains keyname and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.

def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetCompoundKeyedPacket(

self, keydict)

Return the loop packet (a StarPacket object) where the {key:(value,caseless)} pairs in keydict take the appropriate values. Ignore case for a given key if caseless is True. ValueError is raised if no packet is found or more than one packet is found.

def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

def GetFullItemValue(

self, itemname)

Return the value associated with itemname, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and StarList objects.

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index

def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

def GetItemPosition(

self, itemname)

A utility function to get the numerical order in the printout of itemname. An item has coordinate (loop_no,pos) with the top level having a loop_no of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.

def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Return value of itemname. If itemname is looped, a list of all values will be returned.

def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Return the loop packet (a StarPacket object) where keyname has value keyvalue. Ignore case in keyvalue if no_case is True. ValueError is raised if no packet is found or more than one packet is found.

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Return a complete packet for category cat_id where the category key for the category equals keyvalue. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from both categories.

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Return a StarFile.LoopBlock object constructed from the loop containing keyname. keyname is only significant as a way to specify the loop.

def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with keyname

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

def GetMultiKeyedSemanticPacket(

self, keydict, cat_id)

Return a complete packet for category cat_id where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. _atom_site and _atom_site_aniso), the returned StarPacket object will contain datanames from the requested category and any children.

def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def RemoveItem(

self, itemname)

Remove itemname from the block.

def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

Remove the packet for which dataname keyname takes value keyvalue. Only the first such occurrence is removed.

def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

Deprecated. Use RemoveItem instead

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

Set the maximum output line length (maxoutlength) and the line length to wrap at (wraplength). The wrap length is a target only and may not always be possible.

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

class StarDerivationError

class StarDerivationError(Exception):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed, None returned" % self.fail_name

Ancestors (in MRO)

class StarDerivationFailure

class StarDerivationFailure(AttributeError):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed" % self.fail_name

Ancestors (in MRO)

  • StarDerivationFailure
  • exceptions.AttributeError
  • exceptions.StandardError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class StarDict

class StarDict(dict):
    pass

Ancestors (in MRO)

  • StarDict
  • __builtin__.dict
  • __builtin__.object

class StarError

class StarError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar Format error: '+ self.value

Ancestors (in MRO)

  • StarError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class StarFile

class StarFile(BlockCollection):
    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
                scoping='instance',grammar='1.1',scantype='standard',
                 permissive=False,**kwargs):
        super(StarFile,self).__init__(datasource=datasource,**kwargs)
        self.my_uri = getattr(datasource,'my_uri','')
        if maxoutlength == 0:
            self.maxoutlength = 2048
        else:
            self.maxoutlength = maxoutlength
        self.scoping = scoping
        if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"):
            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
                     maxlength = maxinlength,permissive=permissive)
        self.header_comment = \
"""#\\#STAR
##########################################################################
#               STAR Format file
#               Produced by PySTARRW module
#
#  This is a STAR file.  STAR is a superset of the CIF file type.  For
#  more information, please refer to International Tables for Crystallography,
#  Volume G, Chapter 2.1
#
##########################################################################
"""
    def set_uri(self,my_uri): self.my_uri = my_uri

Ancestors (in MRO)

Instance variables

var PC

Inheritance: BlockCollection.PC

Methods

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Inheritance: BlockCollection.NewBlock

Add a new block named blockname with contents blockcontents. If fix is True, blockname will have spaces and tabs replaced by underscores. parent allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

def SetTemplate(

self, template_file)

Inheritance: BlockCollection.SetTemplate

Use template_file as a template for all block output

def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

def WriteOut(

self, comment=u'', wraplength=80, maxoutlength=0, blockorder=None, saves_after=None)

Inheritance: BlockCollection.WriteOut

Return the contents of this file as a string, wrapping if possible at wraplength characters and restricting maximum line length to maxoutlength. Delimiters and save frame nesting are controlled by self.grammar. If blockorder is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. saves_after inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.

def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
    """Return the contents of this file as a string, wrapping if possible at `wraplength`
    characters and restricting maximum line length to `maxoutlength`.  Delimiters and
    save frame nesting are controlled by `self.grammar`. If `blockorder` is
    provided, blocks are output in this order unless nested save frames have been
    requested (STAR2). The default block order is the order in which blocks were input.
    `saves_after` inserts all save frames after the given dataname,
    which allows less important items to appear later.  Useful in conjunction with a
    template for dictionary files."""
    if maxoutlength != 0:
        self.SetOutputLength(maxoutlength)
    if not comment:
        comment = self.header_comment
    outstring = StringIO()
    if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
        outstring.write(r"#\#CIF_2.0" + "\n")
    outstring.write(comment)
    # prepare all blocks
    for b in self.dictionary.values():
        b.set_grammar(self.grammar)
        b.formatting_hints = self.master_template
        b.SetOutputLength(wraplength,self.maxoutlength)
    # loop over top-level
    # monitor output
    all_names = list(self.child_table.keys())   #i.e. lower case
    if blockorder is None:
        blockorder = self.block_input_order
    top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
    for blockref,blockname in top_block_names:
        print('Writing %s, ' % blockname + repr(self[blockref]))
        outstring.write('\n' + 'data_' +blockname+'\n')
        all_names.remove(blockref)
        if self.standard == 'Dic':              #put contents before save frames
            outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
        if self.grammar == 'STAR2':  #nested save frames
            child_refs = self.get_immediate_children(blockref)
            for child_ref,child_info in child_refs:
                child_name = child_info.block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                self.block_to_string_nested(child_ref,child_name,outstring,4)
                outstring.write('\n' + 'save_'+ '\n')
        elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
            child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
            for child_ref in child_refs:
                child_name = self.child_table[child_ref].block_id
                outstring.write('\n\n' + 'save_' + child_name + '\n')
                outstring.write(str(self[child_ref]))
                outstring.write('\n\n' + 'save_' + '\n')
                all_names.remove(child_ref.lower())
        else:
            raise StarError('Grammar %s is not recognised for output' % self.grammar)
        if self.standard != 'Dic':              #put contents after save frames
            outstring.write(str(self[blockref]))
        else:
            outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
    returnstring =  outstring.getvalue()
    outstring.close()
    if len(all_names)>0:
        print('WARNING: following blocks not output: %s' % repr(all_names))
    else:
        print('All blocks output.')
    return returnstring

class StarLengthError

class StarLengthError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar length error: ' + self.value

Ancestors (in MRO)

  • StarLengthError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

class StarList

class StarList(list):
    def __getitem__(self,args):
        if isinstance(args,(int,slice)):
            return super(StarList,self).__getitem__(args)
        elif isinstance(args,tuple) and len(args)>1:   #extended comma notation
            return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:])
        else:
            return super(StarList,self).__getitem__(args[0])

    def __str__(self):
        return "SL("+super(StarList,self).__str__() + ")"

Ancestors (in MRO)

  • StarList
  • __builtin__.list
  • __builtin__.object

class StarPacket

class StarPacket(list):
    def merge_packet(self,incoming):
        """Merge contents of incoming packet with this packet"""
        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
        self.extend(incoming)
        for na in new_attrs:
            setattr(self,na,getattr(incoming,na))

    def __getattr__(self,att_name):
        """Derive a missing attribute"""
        if att_name.lower() in self.__dict__:
            return getattr(self,att_name.lower())
        if att_name in ('cif_dictionary','fulldata','key'):
            raise AttributeError('Programming error: can only assign value of %s' % att_name)
        d = self.cif_dictionary
        c = self.fulldata
        k = self.key
        assert isinstance(k,list)
        d.derive_item(att_name,c,store_value=True)
        #
        # now pick out the new value
        # self.key is a list of the key values
        keydict = dict([(v,(getattr(self,v),True)) for v in k])
        full_pack = c.GetCompoundKeyedPacket(keydict)
        return getattr(full_pack,att_name)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/StarScan.m.html000066400000000000000000000543111345362224200201430ustar00rootroot00000000000000 CifFile.StarScan API documentation Top

CifFile.StarScan module

A tokeniser for Star files

pycifrw-4.4/docs/CifFile/TestDrel.m.html000066400000000000000000007076671345362224200201760ustar00rootroot00000000000000 CifFile.TestDrel API documentation Top

CifFile.TestDrel module

# Test suite for the dRel parser
#

import unittest
from drel import drel_lex,drel_ast_yacc,py_from_ast,drel_runtime
import numpy
import CifFile
from CifFile import StarFile

class dRELRuntimeTestCase(unittest.TestCase):
    def setUp(self):
        pass

    def testListAppend(self):
        a = [[1,2],[3,4]]
        b = drel_runtime.aug_append(a,1)
        c = drel_runtime.aug_append(a,[3])
        d = drel_runtime.aug_append(a,[[4,5,6]])
        self.failUnless(b == [[1,2],[3,4],1])
        self.failUnless(c == [[1,2],[3,4],3])
        self.failUnless(d == [[1,2],[3,4],[4,5,6]])

    def testListAdd(self):
        a = [[1,2],[3,4]]
        aa = 5
        b = drel_runtime.aug_add(a,1)
        c = drel_runtime.aug_add(a,[[1,2],[7,6]])
        d = drel_runtime.aug_add(5,2)
        self.failUnless((c == numpy.array([[2,4],[10,10]])).all())
        self.failUnless((b == numpy.array([[2,3],[4,5]])).all())
        self.failUnless(d == 7)

    def testListUnappend(self):
        a = [[1,2],[3,4]]
        c = drel_runtime.aug_remove(a,[1,2])
        self.failUnless(c == [[3,4]])

    def testListSubtract(self):
        a = [[1,2],[3,4]]
        aa = 5
        b = drel_runtime.aug_sub(a,1)
        c = drel_runtime.aug_sub(a,[[1,2],[7,6]])
        d = drel_runtime.aug_sub(5,2)
        self.failUnless((c == numpy.array([[0,0],[-4,-2]])).all())
        self.failUnless((b == numpy.array([[0,1],[2,3]])).all())
        self.failUnless(d == 3)

    def testDotProduct(self):
        """Test that multiplication works correctly"""
        a = numpy.array([1,2,3])
        b = numpy.array([4,5,6])
        d = drel_runtime.drel_dot(a,b)
        self.failUnless(d == 32)

    def testMatrixMultiply(self):
        """Test that matrix * matrix works"""
        a = numpy.matrix([[1,0,0],[0,1,0],[0,0,1]])
        b = numpy.matrix([[3,4,5],[6,7,8],[9,10,11]])
        c = drel_runtime.drel_dot(a,b)
        self.failUnless((c == numpy.matrix([[3,4,5],[6,7,8],[9,10,11]])).any())

    def testMatVecMultiply(self):
        """Test that matrix * vec works"""
        a = numpy.array([0,1,0])
        b = numpy.matrix([[3,4,5],[6,7,8],[9,10,11]])
        c = drel_runtime.drel_dot(a,b)
        d = drel_runtime.drel_dot(b,a)
        self.failUnless((d == numpy.matrix([4,7,10])).any())
        self.failUnless((c == numpy.matrix([6,7,8])).any())

    def testScalarVecMult(self):
        """Test that multiplying by a scalar works"""
        a = [1,2,3]
        b = 4
        c = drel_runtime.drel_dot(b,a)
        d = drel_runtime.drel_dot(a,b)
        self.failUnless((c == numpy.matrix([4,8,12])).any())
        self.failUnless((d == numpy.matrix([4,8,12])).any())


# Test simple statements

class SingleSimpleStatementTestCase(unittest.TestCase):
    def setUp(self):
        #create our lexer and parser
        self.lexer = drel_lex.lexer
        self.parser = drel_ast_yacc.parser
        self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

    def create_test(self,instring,right_value,debug=False,array=False):
        """Given a string, create and call a function then check result"""
        if instring[-1]!="\n":
           instring += '\n'
        res = self.parser.parse(instring,debug=debug,lexer=self.lexer)
        if debug: print "%s\n -> \n%s \n" % (instring,`res`)
        realfunc = py_from_ast.make_python_function(res,"myfunc",'_a.b',have_sn=False,
                                                    cif_dic=self.dic)
        if debug: print "-> %s" % realfunc
        exec realfunc
        answer = myfunc(self)
        if debug: print " -> %s" % `answer`
        if not array:
            self.failUnless(answer == right_value)
        else:
            try:
                self.failUnless((answer == right_value).all())
            except:
                self.failUnless(answer == right_value)

# as we disallow simple expressions on a separate line to avoid a 
# reduce/reduce conflict for identifiers, we need at least an 
# assignment statement

    def testrealnum(self):
        """test parsing of real numbers"""
        self.create_test('_a.b=5.45',5.45)
        self.create_test('_a.b=.45e-24',.45e-24)

    def testinteger(self):
        """test parsing an integer"""
        resm = [0,0,0,0]
        checkm = [1230,77,5,473]
        self.create_test('_a.b = 1230',1230)
        self.create_test('_a.b = 0x4D',77)
        self.create_test('_a.b = 0B0101',5)
        self.create_test('_a.b = 0o731',473)

    def testcomplex(self):
        """test parsing a complex number"""
        self.create_test('_a.b = 13.45j',13.45j)

    def testList(self):
        """test parsing a list over two lines"""
        self.create_test('_a.b = [1,2,\n 3,4,\n 5,6]',StarFile.StarList([1,2,3,4,5,6]))

    def testparenth(self):
        """test parsing a parenthesis over two lines"""
        self.create_test('_a.b = (1,2,\n3,4)',(1,2,3,4))

    def testshortstring(self):
        """test parsing a one-line string"""
        jk = "_a.b = \"my pink pony's mane\""
        jl = "_a.b = 'my pink pony\"s mane'"
        self.create_test(jk,jk[8:-1])
        self.create_test(jl,jl[8:-1])
#
# This fails due to extra indentation introduced when constructing the
# enclosing function
#
    def testlongstring(self):
        """test parsing multi-line strings"""
        jk = '''_a.b = """  a  long string la la la '"'
                  some more
          end""" '''
        jl = """_a.b = '''  a  long string la la la '"'
                  some more
          end''' """
        self.create_test(jk,jk[7:-3])
        self.create_test(jl,jl[7:-3])

    def testmathexpr(self):
        """test simple maths expressions """
        testexpr = (("_a.b = 5.45 + 23.6e05",5.45+23.6e05), 
                    ("_a.b = 11 - 45",11-45),
                    ("_a.b = 45.6 / 22.2",45.6/22.2))
        for test,check in testexpr:
            self.create_test(test,check)

    def testexprlist(self):
        """test comma-separated expressions"""
        test = "_a.b = 5,6,7+8.5e2"
        self.create_test(test,(5,6,7+8.5e2))

    def testparen(self):
        """test parentheses"""
        test = "_a.b = ('once', 'upon', 6,7j +.5e2)"
        self.create_test(test,('once' , 'upon' , 6 , 7j + .5e2 ))

    def testlists(self):
        """test list parsing"""
        test = "_a.b = ['once', 'upon', 6,7j +.5e2]"
        self.create_test(test,StarFile.StarList(['once' , 'upon' , 6 , 7j + .5e2 ]))

    def test_multistatements(self):
        """test multiple statements"""
        test = "_a.b = 1.2\nb = 'abc'\nqrs = 4.4\n"
        self.create_test(test,1.2)

    def test_semicolon_sep(self):
        """test multiple statements between semicolons"""
        test = "_a.b = 1.2;b = 'abc';qrs = 4.4"
        self.create_test(test,1.2)

    def test_slicing(self):
        """Test that our slicing is parsed correctly"""
        test = "b = array([[1,2],[3,4],[5,6]]);_a.b=b[0,1]"
        self.create_test(test,2)

    def test_slice_2(self):
        """Test that first/last slicing works"""
        test = "b = 'abcdef';_a.b=b[1:3]"
        self.create_test(test,'bc')

    def test_paren_balance(self):
        """Test that multi-line parentheses work """
        test = """b = (
                       (1,2,(
                             3,4
                            )
                       ,5),6
                     ,7)\n _a.b=b[0][2][0]"""
        self.create_test(test,3)
    
    def test_list_constructor(self):
        """Test that the list constructor works"""
        test = """_a.b = List(1,2)"""
        self.create_test(test,[1,2])

    def test_non_python_ops(self):
        """Test operators that have no direct Python equivalents"""
        test_expr = (("b = [1,2]; _a.b = [3,4]; _a.b++=b",StarFile.StarList([3,4,1,2])),
        ("b = [1,2]; _a.b = [3,4]; _a.b+=b",[4,6]),
        ("b = 3; _a.b = [3,4]; _a.b-=b",[0,1]),
        ("b = [1,2]; _a.b = [[1,2],[3,4]]; _a.b--=b",[[3,4]]))
        for one_expr in test_expr:
            self.create_test(one_expr[0],one_expr[1],array=True)

    def test_tables(self):
       """Test that tables are parsed correctly"""
       teststrg = """
       c = Table()
       c['bx'] = 25
       _a.b = c
       """
       print "Table test:"
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic=self.dic)
       print realfunc
       exec realfunc
       b = myfunc(self)
       self.failUnless(b['bx']==25)

    def test_Tables_2(self):
       """Test that brace-delimited tables are parsed correctly"""
       teststrg = """
       c = {'hello':1,'goodbye':2}
       _a.b = c['hello']
       """
       print "Table test:"
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic=self.dic)
       print realfunc
       exec realfunc
       b = myfunc(self)
       self.failUnless(b==1)

    def test_subscription(self):
       """Test proper list of dependencies is returned"""
       teststrg = """
       m   = [15,25,35]
       _a.b = m [1]
       """
       self.create_test(teststrg,25)

class SimpleCompoundStatementTestCase(unittest.TestCase):
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.lexer.lineno = 0
       self.parser = drel_ast_yacc.parser
       self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

   def create_test(self,instring,right_value,varname="_a.b",debug=False):
       """Given a string, create and call a function then check result"""
       if instring[-1]!="\n":
           instring += "\n"   # correct termination
       res = self.parser.parse(instring,debug=debug,lexer=self.lexer)
       if debug: print "%s\n -> \n%s \n" % (instring,`res`)
       realfunc = py_from_ast.make_python_function(res,"myfunc",varname,have_sn=False,
                                                   cif_dic=self.dic)
       if debug: print "-> %s" % realfunc
       exec realfunc
       self.failUnless(myfunc(self) == right_value)

   def test_do_stmt(self):
       """Test how a do statement comes out"""
       teststrg = """
       _a.b = 0
       dummy = 1
       do jkl = 0,20,2 {
          if (dummy == 1) print 'dummy is 1'
          _a.b = _a.b + jkl
          }
       do emm = 1,5 {
          _a.b = _a.b + emm
          }
       """
       self.create_test(teststrg,125)

   def test_do_stmt_2(self):
       """Test how another do statement comes out with long suite"""
       teststrg = """
       _a.b = 0
       geom_hbond = [(1,2),(2,3),(3,4)]
       do i= 0,1 {
          l,s = geom_hbond [i] 
          a = 'hello'
          c = int(4.5)
          bb = [1,c,a]
          _a.b += s
          }
       """
       self.create_test(teststrg,5)

   def test_if_stmt(self):
       """test parsing of if statement"""
       teststrg = """
       dmin = 5.0
       d1 = 4.0
       rad1 = 2.2
       radius_bond = 2.0
       If (d1<dmin or d1>(rad1+radius_bond)) _a.b = 5 
       """
       self.create_test(teststrg,5)

   def test_double_if_stmt(self):
       """test parsing of if statement"""
       teststrg = """
       dmin = 5.0
       d1 = 4.0
       rad1 = 2.2
       radius_bond = 2.0
       If (d1<dmin or d1>(rad1+radius_bond)) _a.b = 5 

       if (d1>dmin or d1<(rad1+radius_bond)) _a.b = 11
       if (5 > 6 and 6 < 4) _a.b = -2
       """
       self.create_test(teststrg,11)

   def test_if_else(self):
       """Test that else is properly handled"""
       teststrg = """drp = 'electron'
                     If (drp == "neutron")  _a.b =  "femtometres"
                     Else If (drp == "electron") _a.b =  "volts"
                     Else      _a.b =  "electrons" """
       self.create_test(teststrg,'volts')

   def test_for_statement(self):
       """Test for statement with list"""
       teststrg = """
       _a.b = 0
       for [c,d] in [[1,2],[3,4],[5,6]] {
           _a.b += c + 2*d
       }"""
       self.create_test(teststrg,33)

   def test_funcdef(self):
       """Test function conversion"""
       teststrg = """
       function Closest( v :[Array, Real],   # coord vector to be cell translated
                       w :[Array, Real]) { # target vector

            d  =  v - w
            t  =  Int( Mod( 99.5 + d, 1.0 ) - d )
            q = 1 + 1
            Closest = [ v+t, t ]
       } """
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc",None, func_def = True)
       # print "Function -> \n" + realfunc
       exec realfunc
       retval = Closest(0.2,0.8)
       print 'Closest 0.2,0.8 returns ' + ",".join([`retval[0]`,`retval[1]`])
       self.failUnless(retval == StarFile.StarList([1.2,1]))

class MoreComplexTestCase(unittest.TestCase):
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.lexer.lineno = 0
       self.parser = drel_ast_yacc.parser
       self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

   def test_nested_stmt(self):
       """Test how a nested do statement executes"""
       teststrg = """
       total = 0
       _a.b = 0
       do jkl = 0,20,2 { total = total + jkl 
          do emm = 1,5 { _a.b = _a.b + 1
          } 
          }
       end_of_loop = -25.6
       """
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic = self.dic)
       exec realfunc
       othertotal = myfunc(self)
       self.failUnless(othertotal==55)

   def test_complex_if(self):
       """Test if with single-statement suite"""
       teststrg = """
       setting = 'triclinic'
       a   = 20.0
       b   = 20.0
       c   = 20.0
       d   = 0.01
       alp = 90.0
       bet = 90.0
       gam = 90.0
       warn_len = 'Possible mismatch between cell lengths and cell setting'
       warn_ang = 'Possible mismatch between cell angles and cell setting'
 
       If(setting == 'triclinic') {
         If( Abs(a-b)<d || Abs(a-c)<d || Abs(b-c)<d )          _a.b = ('B', warn_len)
         If( Abs(alp-90)<d || Abs(bet-90)<d || Abs(gam-90)<d ) _a.b = ('B', warn_ang)
       } else _a.b = ('None',"")
       """
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic = self.dic)
       exec realfunc
       b = myfunc(self)
       print "if returns " + `b` 
       self.failUnless(b==('B', 'Possible mismatch between cell angles and cell setting'))


# We don't test the return value until we have a way to actually access it!
   def test_fancy_assign(self):
       """Test fancy assignment"""
       teststrg = """
       a = [2,3,4] 
       b = 3
       c= 4
       do jkl = 1,5,1 {
          geom_angle( .id = [a,b,c],
                      .distances = [b,c],
                      .value = jkl)
                      }
       """
       res = self.parser.parse(teststrg + "\n", lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","geom_angle",cat_meth = True,have_sn=False,
                                                   cif_dic = testdic)
       print "Fancy assign: %s" % res[0]
       exec realfunc
       b = myfunc(self)
       print "Geom_angle.angle = %s" % b['_geom_angle.value']
       self.failUnless(b['_geom_angle.value']==[1,2,3,4,5])

class WithDictTestCase(unittest.TestCase):
   """Now test flow control which requires a dictionary present"""
   #Dictionaries are required whenever a calculation is performed on a
   #datafile-derived object in order to use the correct types.
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.parser = drel_ast_yacc.parser
       self.parser.lineno = 0
       #use
       self.testblock = CifFile.CifFile("drel/nick1.cif",grammar="STAR2")["saly2_all_aniso"]
       self.testblock.assign_dictionary(testdic)
       self.testblock.provide_value = True  #get values back
       self.testdic = testdic
       #create the global namespace
       self.namespace = self.testblock.keys()
       self.namespace = dict(map(None,self.namespace,self.namespace))
       self.special_ids = [self.namespace]

   def testLists(self):
       """Test case found in Cif dictionary """
       teststrg = """# Store unique sites as a local list
 
     atomlist  = List()
     Loop  a  as  atom_site  {
        axyz       =    a.fract_xyz
        cxyz       =   _atom_sites_Cartn_transform.matrix * axyz
        radb       =   _atom_type[a.type_symbol].radius_bond
        radc       =   _atom_type[a.type_symbol].radius_contact
        ls         =   List ( a.label, "1_555" )
        atomlist ++=   [ls, axyz, cxyz, radb, radc, 0]
     }
     _geom_bond.id = atomlist
"""    
       loop_cats = {"atom_site":["label",["fract_xyz","type_symbol","label"]],
                    "atom_type":["id",["id","radius_bond","radius_contact"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_geom_bond.id",cat_meth=True,
                   loopable=loop_cats,have_sn=False,depends=True,cif_dic=testdic)
       print 'Simple function becomes:'
       print realfunc
       print 'Depends on: ' + `dependencies`
       exec realfunc
       # Add drel functions for deriving items
       testdic.initialise_drel()
       b = myfunc(self.testblock)
       print "subscription returns " + `b` 

   def test_with_stmt(self):
       """Test what comes out of a simple flow statement, including
          multiple with statements"""
       teststrg = """
       with e as exptl
       with c as cell_length {
           x = 22
           j = 25
           jj = e.crystals_number
           px = c.a
           _exptl.method = "single-crystal diffraction"
           }"""
       loopable_cats = {}   #none looped
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_exptl.method",cif_dic=testdic)
       print "With statement -> \n" + realfunc
       exec realfunc
       # attach dictionary  
       self.testblock.assign_dictionary(self.testdic)
       newmeth = myfunc(self.testblock)
       print 'exptl method now %s' % newmeth 
       self.failUnless(newmeth == "single-crystal diffraction")

   def test_loop_with_statement(self):
       """Test with statement on a looped category"""
       teststrg = """ 
       with t as atom_type
       {
       t.analytical_mass_percent = t.number_in_cell * 10
       }
       """
       loopable_cats = {'atom_type':["id",["id","number_in_cell"]]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.analytical_mass_percent",
                                                   cif_dic=testdic,loopable=loopable_cats)
       print "With statement for looped category -> \n" + realfunc
       exec realfunc
       #  
       atmass = myfunc(self.testblock)
       print 'test value now %s' % `atmass`  
       self.failUnless(atmass == [120,280,240])
       
   def test_loop_with_stmt_2(self):
       """Test with statement on a looped category, no aliasing"""
       teststrg = """ 
       _atom_type.analytical_mass_percent = _atom_type.number_in_cell * 10
       """
       loopable_cats = {'atom_type':["id",["id",'number_in_cell','test']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.analytical_mass_percent",
                                                   loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "With statement for looped category -> \n" + realfunc
       exec realfunc
       atmass = myfunc(self.testblock)
       print 'test value now %s' % `atmass`  
       self.failUnless(atmass == [120,280,240])

   def test_subscription(self):
       """Test proper list of dependencies is returned"""
       teststrg = """
       _model_site.symop = _model_site.id [1]
       """
       loopable_cats = {"model_site":["id",["id","symop"]]}
       res = self.parser.parse(teststrg,lexer=self.lexer)
       print `res`
       realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_model_site.symop",
                                                                loopable=loopable_cats,depends=True,
                                                                cif_dic=testdic)
       print realfunc, `dependencies`
       self.failUnless(dependencies == set(['_model_site.id']))

   def test_current_row(self):
       """Test that methods using Current_Row work properly"""
       teststrg = """
       _atom_type.description = Current_Row() + 1
       """
       loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass','num']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.description",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Current row statement -> \n" + realfunc
       exec realfunc
       rownums = myfunc(self.testblock)
       print 'row id now %s' % `rownums`
       self.failUnless(rownums == [1,2,3])
 
   def test_loop_statement(self):
       """Test proper processing of loop statements"""
       teststrg = """
       mass = 0.
       Loop t as atom_type  {
                   mass += t.number_in_cell * t.atomic_mass
       }
       _cell.atomic_mass = mass
            """
       loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_cell.atomic_mass",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Loop statement -> \n" + realfunc
       exec realfunc
       atmass = myfunc(self.testblock)
       print 'atomic mass now %f' % atmass  
       self.failUnless(atmass == 552.488)
       
   def test_complex_f(self):
       """This calculation failed during testing"""
       teststrg = """
   With r  as  refln
 
      fc  =   Complex (0., 0.)
      h   =   r.hkl
 
   Loop a  as  atom_site  {
 
          f  =   a.site_symmetry_multiplicity * a.occupancy * (
                 r.form_factor_table [a.type_symbol]      +
                        _atom_type_scat[a.type_symbol].dispersion  )
 
      Loop s  as  symmetry_equiv  {
 
          t   =  Exp(-h * s.R * a.tensor_beta * s.RT * h)
 
          fc +=  f * t * ExpImag(TwoPi *( h *( s.R * a.fract_xyz + s.T)))
   }  }
          _refln.F_complex  =   fc / _symmetry.multiplicity
       """
       loopable_cats = {'symmetry_equiv':["id",["id","R","RT","T"]],
                        'atom_site':["id",["id","type_symbol","occupancy","site_symmetry_multiplicity",
                                           "tensor_beta","fract_xyz"]],
                        'atom_type_scat':["id",["id","dispersion"]],
                        'refln':["hkl",["hkl","form_factor_table"]]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_refln.F_complex",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Incoming AST: " + `ast`
       print "F_complex statement -> \n" + realfunc
       exec realfunc

       # This one also doesn't return anything sensible yet, just a generation check
   def test_fancy_packets(self):
       """Test that full packets can be dealt with properly"""
       teststrg = """[label,symop] =   _model_site.id
 
     a = atom_site[label]
     s = symmetry_equiv[SymKey(symop)]
 
     _model_site.adp_matrix_beta =  s.R * a.tensor_beta * s.RT"""
       loopable = {"model_site":["id",["id"]],
                   "atom_site":["label",["tensor_beta","label"]],
                   "symmetry_equiv":["id",["id","RT","R"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.adp_matrix_beta",
                                                   depends = True,have_sn=False,
                                                        loopable=loopable,cif_dic=testdic)
       print 'model_site.adp_matrix_beta becomes...'
       print realfunc
       print deps
       self.failUnless('_symmetry_equiv.RT' in deps)

   def test_array_access(self):
       """Test that arrays are converted and returned correctly"""
       teststrg = """
      _model_site.symop = _model_site.id[1]
      """
       loopable = {"model_site":["id",["id","symop","adp_eigen_system"]],
                   "atom_site":["label",["tensor_beta","label"]],
                   "symmetry_equiv":["id",["id","RT","R"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.symop",
                                                   depends = True,have_sn=False,
                                                        loopable=loopable,cif_dic=testdic)
       print realfunc
       exec realfunc
       self.testblock.assign_dictionary(testdic)
       b = myfunc(self.testblock)
       print 'symops are now ' + `b`
       self.failUnless(b[1] == '1_555')
      
if __name__=='__main__':
    global testdic
    testdic = CifFile.CifDic("drel/testing/cif_core.dic",grammar="STAR2",do_minimum=True)
    unittest.main()
    #suite = unittest.TestLoader().loadTestsFromTestCase(WithDictTestCase)
    #suite = unittest.TestLoader().loadTestsFromTestCase(SimpleCompoundStatementTestCase)
    #suite = unittest.TestLoader().loadTestsFromTestCase(SingleSimpleStatementTestCase)
    #suite = unittest.TestLoader().loadTestsFromTestCase(MoreComplexTestCase) 
    #suite = unittest.TestLoader().loadTestsFromTestCase(dRELRuntimeTestCase)
    #unittest.TextTestRunner(verbosity=2).run(suite)

Classes

class MoreComplexTestCase

class MoreComplexTestCase(unittest.TestCase):
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.lexer.lineno = 0
       self.parser = drel_ast_yacc.parser
       self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

   def test_nested_stmt(self):
       """Test how a nested do statement executes"""
       teststrg = """
       total = 0
       _a.b = 0
       do jkl = 0,20,2 { total = total + jkl 
          do emm = 1,5 { _a.b = _a.b + 1
          } 
          }
       end_of_loop = -25.6
       """
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic = self.dic)
       exec realfunc
       othertotal = myfunc(self)
       self.failUnless(othertotal==55)

   def test_complex_if(self):
       """Test if with single-statement suite"""
       teststrg = """
       setting = 'triclinic'
       a   = 20.0
       b   = 20.0
       c   = 20.0
       d   = 0.01
       alp = 90.0
       bet = 90.0
       gam = 90.0
       warn_len = 'Possible mismatch between cell lengths and cell setting'
       warn_ang = 'Possible mismatch between cell angles and cell setting'
 
       If(setting == 'triclinic') {
         If( Abs(a-b)<d || Abs(a-c)<d || Abs(b-c)<d )          _a.b = ('B', warn_len)
         If( Abs(alp-90)<d || Abs(bet-90)<d || Abs(gam-90)<d ) _a.b = ('B', warn_ang)
       } else _a.b = ('None',"")
       """
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic = self.dic)
       exec realfunc
       b = myfunc(self)
       print "if returns " + `b` 
       self.failUnless(b==('B', 'Possible mismatch between cell angles and cell setting'))


# We don't test the return value until we have a way to actually access it!
   def test_fancy_assign(self):
       """Test fancy assignment"""
       teststrg = """
       a = [2,3,4] 
       b = 3
       c= 4
       do jkl = 1,5,1 {
          geom_angle( .id = [a,b,c],
                      .distances = [b,c],
                      .value = jkl)
                      }
       """
       res = self.parser.parse(teststrg + "\n", lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","geom_angle",cat_meth = True,have_sn=False,
                                                   cif_dic = testdic)
       print "Fancy assign: %s" % res[0]
       exec realfunc
       b = myfunc(self)
       print "Geom_angle.angle = %s" % b['_geom_angle.value']
       self.failUnless(b['_geom_angle.value']==[1,2,3,4,5])

Ancestors (in MRO)

class SimpleCompoundStatementTestCase

class SimpleCompoundStatementTestCase(unittest.TestCase):
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.lexer.lineno = 0
       self.parser = drel_ast_yacc.parser
       self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

   def create_test(self,instring,right_value,varname="_a.b",debug=False):
       """Given a string, create and call a function then check result"""
       if instring[-1]!="\n":
           instring += "\n"   # correct termination
       res = self.parser.parse(instring,debug=debug,lexer=self.lexer)
       if debug: print "%s\n -> \n%s \n" % (instring,`res`)
       realfunc = py_from_ast.make_python_function(res,"myfunc",varname,have_sn=False,
                                                   cif_dic=self.dic)
       if debug: print "-> %s" % realfunc
       exec realfunc
       self.failUnless(myfunc(self) == right_value)

   def test_do_stmt(self):
       """Test how a do statement comes out"""
       teststrg = """
       _a.b = 0
       dummy = 1
       do jkl = 0,20,2 {
          if (dummy == 1) print 'dummy is 1'
          _a.b = _a.b + jkl
          }
       do emm = 1,5 {
          _a.b = _a.b + emm
          }
       """
       self.create_test(teststrg,125)

   def test_do_stmt_2(self):
       """Test how another do statement comes out with long suite"""
       teststrg = """
       _a.b = 0
       geom_hbond = [(1,2),(2,3),(3,4)]
       do i= 0,1 {
          l,s = geom_hbond [i] 
          a = 'hello'
          c = int(4.5)
          bb = [1,c,a]
          _a.b += s
          }
       """
       self.create_test(teststrg,5)

   def test_if_stmt(self):
       """test parsing of if statement"""
       teststrg = """
       dmin = 5.0
       d1 = 4.0
       rad1 = 2.2
       radius_bond = 2.0
       If (d1<dmin or d1>(rad1+radius_bond)) _a.b = 5 
       """
       self.create_test(teststrg,5)

   def test_double_if_stmt(self):
       """test parsing of if statement"""
       teststrg = """
       dmin = 5.0
       d1 = 4.0
       rad1 = 2.2
       radius_bond = 2.0
       If (d1<dmin or d1>(rad1+radius_bond)) _a.b = 5 

       if (d1>dmin or d1<(rad1+radius_bond)) _a.b = 11
       if (5 > 6 and 6 < 4) _a.b = -2
       """
       self.create_test(teststrg,11)

   def test_if_else(self):
       """Test that else is properly handled"""
       teststrg = """drp = 'electron'
                     If (drp == "neutron")  _a.b =  "femtometres"
                     Else If (drp == "electron") _a.b =  "volts"
                     Else      _a.b =  "electrons" """
       self.create_test(teststrg,'volts')

   def test_for_statement(self):
       """Test for statement with list"""
       teststrg = """
       _a.b = 0
       for [c,d] in [[1,2],[3,4],[5,6]] {
           _a.b += c + 2*d
       }"""
       self.create_test(teststrg,33)

   def test_funcdef(self):
       """Test function conversion"""
       teststrg = """
       function Closest( v :[Array, Real],   # coord vector to be cell translated
                       w :[Array, Real]) { # target vector

            d  =  v - w
            t  =  Int( Mod( 99.5 + d, 1.0 ) - d )
            q = 1 + 1
            Closest = [ v+t, t ]
       } """
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc",None, func_def = True)
       # print "Function -> \n" + realfunc
       exec realfunc
       retval = Closest(0.2,0.8)
       print 'Closest 0.2,0.8 returns ' + ",".join([`retval[0]`,`retval[1]`])
       self.failUnless(retval == StarFile.StarList([1.2,1]))

Ancestors (in MRO)

class SingleSimpleStatementTestCase

class SingleSimpleStatementTestCase(unittest.TestCase):
    def setUp(self):
        #create our lexer and parser
        self.lexer = drel_lex.lexer
        self.parser = drel_ast_yacc.parser
        self.dic = CifFile.CifDic("dic_for_tests.dic",grammar="STAR2")

    def create_test(self,instring,right_value,debug=False,array=False):
        """Given a string, create and call a function then check result"""
        if instring[-1]!="\n":
           instring += '\n'
        res = self.parser.parse(instring,debug=debug,lexer=self.lexer)
        if debug: print "%s\n -> \n%s \n" % (instring,`res`)
        realfunc = py_from_ast.make_python_function(res,"myfunc",'_a.b',have_sn=False,
                                                    cif_dic=self.dic)
        if debug: print "-> %s" % realfunc
        exec realfunc
        answer = myfunc(self)
        if debug: print " -> %s" % `answer`
        if not array:
            self.failUnless(answer == right_value)
        else:
            try:
                self.failUnless((answer == right_value).all())
            except:
                self.failUnless(answer == right_value)

# as we disallow simple expressions on a separate line to avoid a 
# reduce/reduce conflict for identifiers, we need at least an 
# assignment statement

    def testrealnum(self):
        """test parsing of real numbers"""
        self.create_test('_a.b=5.45',5.45)
        self.create_test('_a.b=.45e-24',.45e-24)

    def testinteger(self):
        """test parsing an integer"""
        resm = [0,0,0,0]
        checkm = [1230,77,5,473]
        self.create_test('_a.b = 1230',1230)
        self.create_test('_a.b = 0x4D',77)
        self.create_test('_a.b = 0B0101',5)
        self.create_test('_a.b = 0o731',473)

    def testcomplex(self):
        """test parsing a complex number"""
        self.create_test('_a.b = 13.45j',13.45j)

    def testList(self):
        """test parsing a list over two lines"""
        self.create_test('_a.b = [1,2,\n 3,4,\n 5,6]',StarFile.StarList([1,2,3,4,5,6]))

    def testparenth(self):
        """test parsing a parenthesis over two lines"""
        self.create_test('_a.b = (1,2,\n3,4)',(1,2,3,4))

    def testshortstring(self):
        """test parsing a one-line string"""
        jk = "_a.b = \"my pink pony's mane\""
        jl = "_a.b = 'my pink pony\"s mane'"
        self.create_test(jk,jk[8:-1])
        self.create_test(jl,jl[8:-1])
#
# This fails due to extra indentation introduced when constructing the
# enclosing function
#
    def testlongstring(self):
        """test parsing multi-line strings"""
        jk = '''_a.b = """  a  long string la la la '"'
                  some more
          end""" '''
        jl = """_a.b = '''  a  long string la la la '"'
                  some more
          end''' """
        self.create_test(jk,jk[7:-3])
        self.create_test(jl,jl[7:-3])

    def testmathexpr(self):
        """test simple maths expressions """
        testexpr = (("_a.b = 5.45 + 23.6e05",5.45+23.6e05), 
                    ("_a.b = 11 - 45",11-45),
                    ("_a.b = 45.6 / 22.2",45.6/22.2))
        for test,check in testexpr:
            self.create_test(test,check)

    def testexprlist(self):
        """test comma-separated expressions"""
        test = "_a.b = 5,6,7+8.5e2"
        self.create_test(test,(5,6,7+8.5e2))

    def testparen(self):
        """test parentheses"""
        test = "_a.b = ('once', 'upon', 6,7j +.5e2)"
        self.create_test(test,('once' , 'upon' , 6 , 7j + .5e2 ))

    def testlists(self):
        """test list parsing"""
        test = "_a.b = ['once', 'upon', 6,7j +.5e2]"
        self.create_test(test,StarFile.StarList(['once' , 'upon' , 6 , 7j + .5e2 ]))

    def test_multistatements(self):
        """test multiple statements"""
        test = "_a.b = 1.2\nb = 'abc'\nqrs = 4.4\n"
        self.create_test(test,1.2)

    def test_semicolon_sep(self):
        """test multiple statements between semicolons"""
        test = "_a.b = 1.2;b = 'abc';qrs = 4.4"
        self.create_test(test,1.2)

    def test_slicing(self):
        """Test that our slicing is parsed correctly"""
        test = "b = array([[1,2],[3,4],[5,6]]);_a.b=b[0,1]"
        self.create_test(test,2)

    def test_slice_2(self):
        """Test that first/last slicing works"""
        test = "b = 'abcdef';_a.b=b[1:3]"
        self.create_test(test,'bc')

    def test_paren_balance(self):
        """Test that multi-line parentheses work """
        test = """b = (
                       (1,2,(
                             3,4
                            )
                       ,5),6
                     ,7)\n _a.b=b[0][2][0]"""
        self.create_test(test,3)
    
    def test_list_constructor(self):
        """Test that the list constructor works"""
        test = """_a.b = List(1,2)"""
        self.create_test(test,[1,2])

    def test_non_python_ops(self):
        """Test operators that have no direct Python equivalents"""
        test_expr = (("b = [1,2]; _a.b = [3,4]; _a.b++=b",StarFile.StarList([3,4,1,2])),
        ("b = [1,2]; _a.b = [3,4]; _a.b+=b",[4,6]),
        ("b = 3; _a.b = [3,4]; _a.b-=b",[0,1]),
        ("b = [1,2]; _a.b = [[1,2],[3,4]]; _a.b--=b",[[3,4]]))
        for one_expr in test_expr:
            self.create_test(one_expr[0],one_expr[1],array=True)

    def test_tables(self):
       """Test that tables are parsed correctly"""
       teststrg = """
       c = Table()
       c['bx'] = 25
       _a.b = c
       """
       print "Table test:"
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic=self.dic)
       print realfunc
       exec realfunc
       b = myfunc(self)
       self.failUnless(b['bx']==25)

    def test_Tables_2(self):
       """Test that brace-delimited tables are parsed correctly"""
       teststrg = """
       c = {'hello':1,'goodbye':2}
       _a.b = c['hello']
       """
       print "Table test:"
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_a.b",have_sn=False,
                                                   cif_dic=self.dic)
       print realfunc
       exec realfunc
       b = myfunc(self)
       self.failUnless(b==1)

    def test_subscription(self):
       """Test proper list of dependencies is returned"""
       teststrg = """
       m   = [15,25,35]
       _a.b = m [1]
       """
       self.create_test(teststrg,25)

Ancestors (in MRO)

class WithDictTestCase

Now test flow control which requires a dictionary present

class WithDictTestCase(unittest.TestCase):
   """Now test flow control which requires a dictionary present"""
   #Dictionaries are required whenever a calculation is performed on a
   #datafile-derived object in order to use the correct types.
   def setUp(self):
       #create our lexer and parser
       self.lexer = drel_lex.lexer
       self.parser = drel_ast_yacc.parser
       self.parser.lineno = 0
       #use
       self.testblock = CifFile.CifFile("drel/nick1.cif",grammar="STAR2")["saly2_all_aniso"]
       self.testblock.assign_dictionary(testdic)
       self.testblock.provide_value = True  #get values back
       self.testdic = testdic
       #create the global namespace
       self.namespace = self.testblock.keys()
       self.namespace = dict(map(None,self.namespace,self.namespace))
       self.special_ids = [self.namespace]

   def testLists(self):
       """Test case found in Cif dictionary """
       teststrg = """# Store unique sites as a local list
 
     atomlist  = List()
     Loop  a  as  atom_site  {
        axyz       =    a.fract_xyz
        cxyz       =   _atom_sites_Cartn_transform.matrix * axyz
        radb       =   _atom_type[a.type_symbol].radius_bond
        radc       =   _atom_type[a.type_symbol].radius_contact
        ls         =   List ( a.label, "1_555" )
        atomlist ++=   [ls, axyz, cxyz, radb, radc, 0]
     }
     _geom_bond.id = atomlist
"""    
       loop_cats = {"atom_site":["label",["fract_xyz","type_symbol","label"]],
                    "atom_type":["id",["id","radius_bond","radius_contact"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_geom_bond.id",cat_meth=True,
                   loopable=loop_cats,have_sn=False,depends=True,cif_dic=testdic)
       print 'Simple function becomes:'
       print realfunc
       print 'Depends on: ' + `dependencies`
       exec realfunc
       # Add drel functions for deriving items
       testdic.initialise_drel()
       b = myfunc(self.testblock)
       print "subscription returns " + `b` 

   def test_with_stmt(self):
       """Test what comes out of a simple flow statement, including
          multiple with statements"""
       teststrg = """
       with e as exptl
       with c as cell_length {
           x = 22
           j = 25
           jj = e.crystals_number
           px = c.a
           _exptl.method = "single-crystal diffraction"
           }"""
       loopable_cats = {}   #none looped
       res = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(res,"myfunc","_exptl.method",cif_dic=testdic)
       print "With statement -> \n" + realfunc
       exec realfunc
       # attach dictionary  
       self.testblock.assign_dictionary(self.testdic)
       newmeth = myfunc(self.testblock)
       print 'exptl method now %s' % newmeth 
       self.failUnless(newmeth == "single-crystal diffraction")

   def test_loop_with_statement(self):
       """Test with statement on a looped category"""
       teststrg = """ 
       with t as atom_type
       {
       t.analytical_mass_percent = t.number_in_cell * 10
       }
       """
       loopable_cats = {'atom_type':["id",["id","number_in_cell"]]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.analytical_mass_percent",
                                                   cif_dic=testdic,loopable=loopable_cats)
       print "With statement for looped category -> \n" + realfunc
       exec realfunc
       #  
       atmass = myfunc(self.testblock)
       print 'test value now %s' % `atmass`  
       self.failUnless(atmass == [120,280,240])
       
   def test_loop_with_stmt_2(self):
       """Test with statement on a looped category, no aliasing"""
       teststrg = """ 
       _atom_type.analytical_mass_percent = _atom_type.number_in_cell * 10
       """
       loopable_cats = {'atom_type':["id",["id",'number_in_cell','test']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.analytical_mass_percent",
                                                   loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "With statement for looped category -> \n" + realfunc
       exec realfunc
       atmass = myfunc(self.testblock)
       print 'test value now %s' % `atmass`  
       self.failUnless(atmass == [120,280,240])

   def test_subscription(self):
       """Test proper list of dependencies is returned"""
       teststrg = """
       _model_site.symop = _model_site.id [1]
       """
       loopable_cats = {"model_site":["id",["id","symop"]]}
       res = self.parser.parse(teststrg,lexer=self.lexer)
       print `res`
       realfunc,dependencies = py_from_ast.make_python_function(res,"myfunc","_model_site.symop",
                                                                loopable=loopable_cats,depends=True,
                                                                cif_dic=testdic)
       print realfunc, `dependencies`
       self.failUnless(dependencies == set(['_model_site.id']))

   def test_current_row(self):
       """Test that methods using Current_Row work properly"""
       teststrg = """
       _atom_type.description = Current_Row() + 1
       """
       loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass','num']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_atom_type.description",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Current row statement -> \n" + realfunc
       exec realfunc
       rownums = myfunc(self.testblock)
       print 'row id now %s' % `rownums`
       self.failUnless(rownums == [1,2,3])
 
   def test_loop_statement(self):
       """Test proper processing of loop statements"""
       teststrg = """
       mass = 0.
       Loop t as atom_type  {
                   mass += t.number_in_cell * t.atomic_mass
       }
       _cell.atomic_mass = mass
            """
       loopable_cats = {'atom_type':["id",['number_in_cell','atomic_mass']]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_cell.atomic_mass",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Loop statement -> \n" + realfunc
       exec realfunc
       atmass = myfunc(self.testblock)
       print 'atomic mass now %f' % atmass  
       self.failUnless(atmass == 552.488)
       
   def test_complex_f(self):
       """This calculation failed during testing"""
       teststrg = """
   With r  as  refln
 
      fc  =   Complex (0., 0.)
      h   =   r.hkl
 
   Loop a  as  atom_site  {
 
          f  =   a.site_symmetry_multiplicity * a.occupancy * (
                 r.form_factor_table [a.type_symbol]      +
                        _atom_type_scat[a.type_symbol].dispersion  )
 
      Loop s  as  symmetry_equiv  {
 
          t   =  Exp(-h * s.R * a.tensor_beta * s.RT * h)
 
          fc +=  f * t * ExpImag(TwoPi *( h *( s.R * a.fract_xyz + s.T)))
   }  }
          _refln.F_complex  =   fc / _symmetry.multiplicity
       """
       loopable_cats = {'symmetry_equiv':["id",["id","R","RT","T"]],
                        'atom_site':["id",["id","type_symbol","occupancy","site_symmetry_multiplicity",
                                           "tensor_beta","fract_xyz"]],
                        'atom_type_scat':["id",["id","dispersion"]],
                        'refln':["hkl",["hkl","form_factor_table"]]}   #
       ast = self.parser.parse(teststrg+"\n",lexer=self.lexer)
       realfunc = py_from_ast.make_python_function(ast,"myfunc","_refln.F_complex",loopable=loopable_cats,
                                                   cif_dic=testdic)
       print "Incoming AST: " + `ast`
       print "F_complex statement -> \n" + realfunc
       exec realfunc

       # This one also doesn't return anything sensible yet, just a generation check
   def test_fancy_packets(self):
       """Test that full packets can be dealt with properly"""
       teststrg = """[label,symop] =   _model_site.id
 
     a = atom_site[label]
     s = symmetry_equiv[SymKey(symop)]
 
     _model_site.adp_matrix_beta =  s.R * a.tensor_beta * s.RT"""
       loopable = {"model_site":["id",["id"]],
                   "atom_site":["label",["tensor_beta","label"]],
                   "symmetry_equiv":["id",["id","RT","R"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.adp_matrix_beta",
                                                   depends = True,have_sn=False,
                                                        loopable=loopable,cif_dic=testdic)
       print 'model_site.adp_matrix_beta becomes...'
       print realfunc
       print deps
       self.failUnless('_symmetry_equiv.RT' in deps)

   def test_array_access(self):
       """Test that arrays are converted and returned correctly"""
       teststrg = """
      _model_site.symop = _model_site.id[1]
      """
       loopable = {"model_site":["id",["id","symop","adp_eigen_system"]],
                   "atom_site":["label",["tensor_beta","label"]],
                   "symmetry_equiv":["id",["id","RT","R"]]}
       res = self.parser.parse(teststrg + "\n",lexer=self.lexer)
       realfunc,deps = py_from_ast.make_python_function(res,"myfunc","_model_site.symop",
                                                   depends = True,have_sn=False,
                                                        loopable=loopable,cif_dic=testdic)
       print realfunc
       exec realfunc
       self.testblock.assign_dictionary(testdic)
       b = myfunc(self.testblock)
       print 'symops are now ' + `b`
       self.failUnless(b[1] == '1_555')

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/TestPyCifRW.m.html000066400000000000000000016031451345362224200205560ustar00rootroot00000000000000 CifFile.TestPyCifRW API documentation Top

CifFile.TestPyCifRW module

# Testing of the PyCif module using the PyUnit framework
# 
import sys
sys.path[0] = '.'
print sys.path
import unittest, CifFile
import StarFile
import re

# Test general string and number manipulation functions
class BasicUtilitiesTestCase(unittest.TestCase):
    def testPlainLineFolding(self):
       """Test that we can fold a line correctly"""
       test_string = "1234567890123456789012"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       out_lines = outstring.split('\n')
       #print outstring
       self.failUnless(out_lines[0]=="\\")
       self.failUnless(len(out_lines[1])==10)

    def testPreWrappedFolding(self):
       """Test that pre-wrapped lines are untouched"""
       test_string = "123456789\n012345678\n9012"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring == test_string)

    def testManyLineEndings(self):
       """Test that empty lines are handled OK"""
       test_string = "123456789\n\n012345678\n\n9012\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring == test_string)

    def testOptionalBreak(self):
       """Test that internal whitespace is used to break"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       #print "\n;" + outstring + "\n;"
       out_lines = outstring.split('\n')
       self.failUnless(len(out_lines[1]) == 7)

    def testCorrectEnding(self):
       """Make sure that no line feeds are added/removed"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring[-4:] == "12\n\n")

    def testFoldingRemoval(self):
       """Test that we round-trip correctly"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testTrickyFoldingRemoval(self):
       """Try to produce a tough string for unfolding"""
       test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testTrailingBackslash(self):
       """Make sure that a trailing backslash is not removed"""
       test_string = "\n123\\\n 456\\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testFinalBackslash(self):
        """Make sure that a single final backslash is removed when unfolding"""
        test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
        folded_string = StarFile.apply_line_folding(test_string,5,10)
        folded_string = folded_string + "\ "
        old_string = StarFile.remove_line_folding(folded_string)
        self.failUnless(old_string == test_string)

    def testAddIndent(self):
        """Test insertion of a line prefix"""
        test_string = "\n12345\n678910\n\n"
        outstring = StarFile.apply_line_prefix(test_string,"abc>")
        print "Converted %s to %s " %(test_string,outstring)
        self.failUnless(outstring == "abc>\\\nabc>\nabc>12345\nabc>678910\nabc>\nabc>")

    def testRemoveIndent(self):
        """Test removal of a line prefix"""
        test_string = "abc>\\\nabc>12345\nabc>678910\nabc>\nabc>"
        outstring = StarFile.remove_line_prefix(test_string)
        print "Removed indent: " + `outstring`
        self.failUnless(outstring == "12345\n678910\n\n")

    def testReverseIndent(self):
        """Test reversible indentation of line"""
        test_string = "12345\n678910\n\n"
        outstring = StarFile.apply_line_prefix(test_string,"cif><")
        newtest = StarFile.remove_line_prefix(outstring)
        print 'Before indenting: ' + `test_string`
        print 'After indenting: ' + `outstring`
        print 'After unindent: ' + `newtest`
        self.failUnless(newtest == test_string)

    def testPrefixAndFold(self):
        """Test reversible folding and indenting"""
        test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
        outstring = StarFile.apply_line_folding(test_string,5,10)
        indoutstring = StarFile.apply_line_prefix(outstring,"CIF>")
        newoutstring = StarFile.remove_line_prefix(indoutstring)
        newtest_string = StarFile.remove_line_folding(newoutstring)
        print "%s -> %s -> %s -> %s -> %s" % (`test_string`,`outstring`,`indoutstring`,`newoutstring`,`newtest_string`)
        self.failUnless(newtest_string == test_string)

# Test basic setting and reading of the CifBlock

class BlockRWTestCase(unittest.TestCase):
    def setUp(self):
    	# we want to get a datablock ready so that the test
	# case will be able to write a single item
	self.cf = CifFile.CifBlock()

    def tearDown(self):
        # get rid of our test object
	del self.cf
	
    def testTupleNumberSet(self):
        """Test tuple setting with numbers"""
        self.cf['_test_tuple'] = (11,13.5,-5.6)
        self.failUnless(map(float,
	     self.cf['_test_tuple']))== [11,13.5,-5.6]

    def testTupleComplexSet(self):
        """Test setting multiple names in loop"""
	names = (('_item_name_1','_item_name#2','_item_%$#3'),)
	values = (((1,2,3,4),('hello','good_bye','a space','# 4'),
	          (15.462, -99.34,10804,0.0001)),)
        self.cf.AddCifItem((names,values))
	self.failUnless(tuple(map(float, self.cf[names[0][0]])) == values[0][0])
	self.failUnless(tuple(self.cf[names[0][1]]) == values[0][1])
	self.failUnless(tuple(map(float, self.cf[names[0][2]])) == values[0][2])

    def testStringSet(self):
        """test string setting"""
        self.cf['_test_string_'] = 'A short string'
	self.failUnless(self.cf['_test_string_'] == 'A short string')

    def testTooLongSet(self):
        """test setting overlong data names"""
        dataname = '_a_long_long_'*7
        try:
            self.cf[dataname] = 1.0
        except (StarFile.StarError,CifFile.CifError): pass
        else: self.fail()

    def testTooLongLoopSet(self):
        """test setting overlong data names in a loop"""
        dataname = '_a_long_long_'*7
        try:
            self.cf[dataname] = (1.0,2.0,3.0)
        except (StarFile.StarError,CifFile.CifError): pass
        else: self.fail()

    def testBadStringSet(self):
        """test setting values with bad characters"""
        dataname = '_name_is_ok'
        try:
            self.cf[dataname] = "eca234\f\vaqkadlf"
        except StarFile.StarError: pass
        else: self.fail()

    def testBadNameSet(self):
        """test setting names with bad characters"""
        dataname = "_this_is_not ok"
        try:
            self.cf[dataname] = "nnn"
        except StarFile.StarError: pass
        else: self.fail()

    def testMoreBadStrings(self):
        dataname = "_name_is_ok"
        val = u"so far, ok, but now we have a " + unichr(128)
        try:
            self.cf[dataname] = val
        except StarFile.StarError: pass
        else: self.fail()

    def testEmptyString(self):
        """An empty string is, in fact, legal"""
        self.cf['_an_empty_string'] = ''
        
    def testStarList(self):
        """Test that a StarList is treated as a primitive item"""
        self.cf['_a_star_list'] = StarFile.StarList([1,2,3,4])
        jj = self.cf.GetLoop('_a_star_list')
        self.failUnless(jj.dimension==0)
       
# Now test operations which require a preexisting block
#

class BlockChangeTestCase(unittest.TestCase):
   def setUp(self):
        self.cf = CifFile.CifBlock()
	self.names = (('_item_name_1','_item_name#2','_item_%$#3'),)
	self.values = (((1,2,3,4),('hello','good_bye','a space','# 4'),
	          (15.462, -99.34,10804,0.0001)),)
        self.cf.AddCifItem((self.names,self.values))
	self.cf['_non_loop_item'] = 'Non loop string item'
	self.cf['_number_item'] = 15.65
        self.cf['_planet'] = 'Saturn'
        self.cf['_satellite'] = 'Titan'
        self.cf['_rings']  = 'True'
       
   def tearDown(self):
       del self.cf

   def testFromBlockSet(self):
        """Test that we can use a CifBlock to set a CifBlock"""
        df = CifFile.CifFile()
        df.NewBlock('testname',self.cf)

   def testLoop(self):
        """Check GetLoop returns values and names in matching order"""
   	results = self.cf.GetLoop(self.names[0][2])
	for key in results.keys():
	    self.failUnless(key in self.names[0])
	    self.failUnless(tuple(results[key]) == self.values[0][list(self.names[0]).index(key)])
	
   def testSimpleRemove(self):
       """Check item deletion outside loop"""
       self.cf.RemoveCifItem('_non_loop_item')
       try:
           a = self.cf['_non_loop_item']
       except KeyError: pass
       else: self.fail()

   def testLoopRemove(self):
       """Check item deletion inside loop"""
       print "Before:\n"
       print self.cf.printsection()
       self.cf.RemoveCifItem(self.names[0][1])
       print "After:\n"
       print self.cf.printsection()
       try:
           a = self.cf[self.names[0][1]]
       except KeyError: pass
       else: self.fail()

   def testFullLoopRemove(self):
       """Check removal of all loop items"""
       for name in self.names[0]: self.cf.RemoveCifItem(name)
       self.failUnless(len(self.cf.loops)==0, `self.cf.loops`)

# test adding data to a loop.  We test straight addition, then make sure the errors
# happen at the right time
#
   def testAddToLoop(self):
       """Test adding to a loop"""
       adddict = {'_address':['1 high street','2 high street','3 high street','4 high st'],
                  '_address2':['Ecuador','Bolivia','Colombia','Mehico']}
       self.cf.AddToLoop('_item_name#2',adddict)
       newkeys = self.cf.GetLoop('_item_name#2').keys()
       self.failUnless(adddict.keys()[0] in newkeys)
       self.failUnless(len(self.cf.GetLoop('_item_name#2'))==len(self.values[0])+2)
       
   def testBadAddToLoop(self):
       """Test incorrect loop addition"""
       adddict = {'_address':['1 high street','2 high street','3 high street'],
                  '_address2':['Ecuador','Bolivia','Colombia']}
       try:
           self.cf.AddToLoop('_no_item',adddict)
       except KeyError: pass
       else: self.fail()
       try:
           self.cf.AddToLoop('_item_name#2',adddict)
       except StarFile.StarLengthError:
           pass 
       else: self.fail()

   def testChangeLoop(self):
       """Test changing pre-existing item in loop"""
       # Items should be silently replaced, but if an
       # item exists in a loop already, it should be
       # deleted from that loop first
       self.cf["_item_name_1"] = (5,6,7,8)

   def testLoopify(self):
       """Test changing unlooped data to looped data"""
       self.cf.Loopify(["_planet","_satellite","_rings"])
       newloop = self.cf.GetLoop("_rings")
       self.assertFalse(newloop.has_key("_number_item"))
       
   def testLoopifyCif(self):
       """Test changing unlooped data to looped data does 
          not touch already looped data for a CIF file"""
#      from IPython.Debugger import Tracer; debug_here = Tracer()
#      debug_here()
       self.cf.Loopify(["_planet","_satellite","_rings"])
       newloop = self.cf.GetLoop("_rings")
       newloop.Loopify(["_planet","_rings"])
       innerloop = newloop.GetLoop("_planet")
       self.assertTrue(innerloop.has_key("_satellite"))
       
#
#  Test the mapping type implementation
#
   def testGetOperation(self):
       """Test the get mapping call"""
       self.cf.get("_item_name_1")
       self.cf.get("_item_name_nonexist")

#
#  Test case insensitivity
#
   def testDataNameCase(self):
       """Test same name, different case causes error"""
       self.assertEqual(self.cf["_Item_Name_1"],self.cf["_item_name_1"])
       self.cf["_Item_NaMe_1"] = "the quick pewse fox"
       self.assertEqual(self.cf["_Item_NaMe_1"],self.cf["_item_name_1"])

#  Test iteration
#
   def testIteration(self):
       """We create an iterator and iterate"""
       testloop = self.cf.GetLoop("_item_name_1")
       i = 0
       for test_pack in testloop:
           self.assertEqual(test_pack._item_name_1,self.values[0][0][i]) 
           self.assertEqual(getattr(test_pack,"_item_name#2"),self.values[0][1][i]) 
           i += 1

   def testPacketContents(self):
       """Test that body of packet is filled in as well"""
       testloop = self.cf.GetLoop("_item_name_1")
       it_order = testloop.GetItemOrder()
       itn_pos = it_order.index("_item_name_1")
       for test_pack in testloop:
           print 'Test pack: ' + `test_pack`
           self.assertEqual(test_pack._item_name_1,test_pack[itn_pos])

   def testPacketAttr(self):
       """Test that packets have attributes"""
       testloop = self.cf.GetLoop("_item_name_1")
       self.assertEqual(testloop[1]._item_name_1,2)

   def testKeyPacket(self):
       """Test that a packet can be returned by key value"""
       testloop = self.cf.GetLoop("_item_name_1")
       testpack = testloop.GetKeyedPacket("_item_name_1",2)
       self.assertEqual("good_bye",getattr(testpack,"_item_name#2"))

   def testRemovePacket(self):
       """Test that removing a packet works properly"""
       print 'Before packet removal'
       print str(self.cf)
       testloop = self.cf.GetLoop("_item_name_1")
       testloop.RemoveKeyedPacket("_item_name_1",3)
       jj = testloop.GetKeyedPacket("_item_name_1",2)
       kk = testloop.GetKeyedPacket("_item_name_1",4)
       self.assertEqual(getattr(jj,"_item_name#2"),"good_bye")
       self.assertEqual(getattr(kk,"_item_name#2"),"# 4")
       self.assertRaises(KeyError,testloop.GetKeyedPacket,"_item_name_1",3)
       print 'After packet removal:'
       print str(self.cf)

   def testAddPacket(self):
       """Test that we can add a packet"""
       import copy
       testloop = self.cf.GetLoop("_item_name_1")
       workingpacket = copy.copy(testloop.GetPacket(0))
       workingpacket._item_name_1 = 5
       workingpacket.__setattr__("_item_name#2", 'new' )
       testloop.AddPacket(workingpacket)
       # note we assume that this adds on to the end, which is not 
       # a CIF requirement
       self.assertEqual(testloop["_item_name_1"][4],5)
       self.assertEqual(testloop["_item_name#2"][4],'new')

#
#  Test changing item order
#
   def testChangeOrder(self):
       """We move some stuff around"""
       testloop = self.cf.GetLoop("_item_name_1")
       self.cf.ChangeItemOrder("_Number_Item",0)
       testloop.ChangeItemOrder("_Item_Name_1",2)
       self.assertEqual(testloop.GetItemOrder()[2],"_Item_Name_1")
       self.assertEqual(self.cf.GetItemOrder()[0],"_Number_Item")
       
   def testGetOrder(self):
       """Test that the correct order value is returned"""
       self.assertEqual(self.cf.GetItemPosition("_Number_Item"),2)

   def testReplaceOrder(self):
       """Test that a replaced item is at the same position it
	  previously held"""
       testloop = self.cf.GetLoop("_item_name_1")
       oldpos = testloop.GetItemPosition('_item_name#2')
       testloop['_item_name#2'] = ("I'm",' a ','little','teapot')
       self.assertEqual(testloop.GetItemPosition('_item_name#2'),oldpos)
#
#  Test setting of block names
#

class BlockNameTestCase(unittest.TestCase):
   def testBlockName(self):
       """Make sure long block names cause errors"""
       df = CifFile.CifBlock()
       cf = CifFile.CifFile()
       try:
           cf['a_very_long_block_name_which_should_be_rejected_out_of_hand123456789012345678']=df
       except StarFile.StarError: pass
       else: self.fail()

   def testBlockOverwrite(self):
       """Upper/lower case should be seen as identical"""
       df = CifFile.CifBlock()
       ef = CifFile.CifBlock()
       cf = CifFile.CifFile(standard=None)
       df['_random_1'] = 'oldval'
       ef['_random_1'] = 'newval'
       print 'cf.standard is ' + `cf.standard`
       cf['_lowercaseblock'] = df
       cf['_LowerCaseBlock'] = ef
       assert(cf['_Lowercaseblock']['_random_1'] == 'newval')
       assert(len(cf) == 1)

   def testEmptyBlock(self):
       """Test that empty blocks are not the same object"""
       cf = CifFile.CifFile()
       cf.NewBlock('first_block')
       cf.NewBlock('second_block')
       cf['first_block']['_test1'] = 'abc'
       cf['second_block']['_test1'] = 'def'
       self.failUnless(cf['first_block']['_test1']=='abc')

#
#   Test reading cases
#
class FileWriteTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again. Non alphabetic ordering to
          check order preservation and mixed case."""
       # fill up the block with stuff
       items = (('_item_1','Some data'),
             ('_item_3','34.2332'),
             ('_item_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'),
             ('_item_2','Some_underline_data'),
             ('_item_empty',''),
             ('_item_quote',"'ABC"),
             ('_item_apost','"def'),
             ('_item_sws'," \n "),
             (('_item_5','_item_7','_item_6'),
             ([1,2,3,4],
              ['a','b','c','d'],
              [5,6,7,8])),
             (('_string_1','_string_2'),
              ([';this string begins with a semicolon',
               'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough',
               ';just_any_old_semicolon-starting-string'],
               ['a string with a final quote"',
               'a string with a " and a safe\';',
               'a string with a final \''])))
       # save block items as well
       s_items = (('_sitem_1','Some save data'),
             ('_sitem_2','Some_underline_data'),
             ('_sitem_3','34.2332'),
             ('_sitem_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'),
             (('_sitem_5','_sitem_6','_sitem_7'),
             ([1,2,3,4],
              [5,6,7,8],
              ['a','b','c','d'])),
             (('_string_1','_string_2'),
              ([';this string begins with a semicolon',
               'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough',
               ';just_any_old_semicolon-starting-string'],
               ['a string with a final quote"',
               'a string with a " and a safe\';',
               'a string with a final \''])))
       self.cf = CifFile.CifBlock(items)
       cif = CifFile.CifFile(scoping='dictionary')
       cif['Testblock'] = self.cf
       # Add some comments
       self.cf.AddComment('_item_empty',"Test of an empty string")
       self.cf.AddComment('_item_apost',"Test of a trailing apostrophe")
       self.save_block = CifFile.CifBlock(s_items)
       cif.NewBlock("test_Save_frame",self.save_block,parent='testblock')
       self.cfs = cif["test_save_frame"]
       outfile = open('test.cif','w')
       outfile.write(str(cif))
       outfile.close()
       self.ef = CifFile.CifFile('test.cif',scoping='dictionary')
       self.df = self.ef['testblock']
       self.dfs = self.ef["test_save_frame"]
       flfile = CifFile.ReadCif('test.cif',scantype="flex",scoping='dictionary')
       # test passing a stream directly
       tstream = open('test.cif')
       CifFile.CifFile(tstream,scantype="flex")
       self.flf = flfile['testblock']
       self.flfs = flfile["Test_save_frame"]

   def tearDown(self):
       import os
       #os.remove('test.cif')
       del self.dfs
       del self.df
       del self.cf
       del self.ef
       del self.flf
       del self.flfs

   def testStringInOut(self):
       """Test writing short strings in and out"""
       self.failUnless(self.cf['_item_1']==self.df['_item_1'])
       self.failUnless(self.cf['_item_2']==self.df['_item_2'])
       self.failUnless(self.cfs['_sitem_1']==self.dfs['_sitem_1'])
       self.failUnless(self.cfs['_sitem_2']==self.dfs['_sitem_2'])
       self.failUnless(self.cfs['_sitem_1']==self.flfs['_sitem_1'])
       self.failUnless(self.cfs['_sitem_2']==self.flfs['_sitem_2'])

   def testApostropheInOut(self):
       """Test correct behaviour for values starting with apostrophes
       or quotation marks"""
       self.failUnless(self.cf['_item_quote']==self.df['_item_quote'])
       self.failUnless(self.cf['_item_apost']==self.df['_item_apost'])
       self.failUnless(self.cf['_item_quote']==self.flf['_item_quote'])
       self.failUnless(self.cf['_item_apost']==self.flf['_item_apost'])
       
   def testNumberInOut(self):
       """Test writing number in and out"""
       self.failUnless(self.cf['_item_3']==(self.df['_item_3']))
       self.failUnless(self.cfs['_sitem_3']==(self.dfs['_sitem_3']))
       self.failUnless(self.cf['_item_3']==(self.flf['_item_3']))
       self.failUnless(self.cfs['_sitem_3']==(self.flfs['_sitem_3']))

   def testLongStringInOut(self):
       """Test writing long string in and out
          Note that whitespace may vary due to carriage returns,
	  so we remove all returns before comparing"""
       import re
       compstring = re.sub('\n','',self.df['_item_4'])
       self.failUnless(compstring == self.cf['_item_4'])
       compstring = re.sub('\n','',self.dfs['_sitem_4'])
       self.failUnless(compstring == self.cfs['_sitem_4'])
       compstring = re.sub('\n','',self.flf['_item_4'])
       self.failUnless(compstring == self.cf['_item_4'])
       compstring = re.sub('\n','',self.flfs['_sitem_4'])
       self.failUnless(compstring == self.cfs['_sitem_4'])

   def testEmptyStringInOut(self):
       """An empty string is in fact kosher""" 
       self.failUnless(self.cf['_item_empty']=='')
       self.failUnless(self.flf['_item_empty']=='')

   def testSemiWhiteSpace(self):
       """Test that white space in a semicolon string is preserved"""
       self.failUnless(self.cf['_item_sws']==self.df['_item_sws'])
       self.failUnless(self.cf['_item_sws']==self.flf['_item_sws'])

   def testLoopDataInOut(self):
       """Test writing in and out loop data"""
       olditems = self.cf.GetLoop('_item_5')
       for key,value in olditems.items():
           self.failUnless(tuple(map(str,value))==tuple(self.df[key]))
           self.failUnless(tuple(map(str,value))==tuple(self.flf[key]))
       # save frame test
       olditems = self.cfs.GetLoop('_sitem_5').items()
       for key,value in olditems:
           self.failUnless(tuple(map(str,value))==tuple(self.dfs[key]))
           self.failUnless(tuple(map(str,value))==tuple(self.flfs[key]))

   def testLoopStringInOut(self):
       """Test writing in and out string loop data"""
       olditems = self.cf.GetLoop('_string_1')
       newitems = self.df.GetLoop('_string_1')
       flexnewitems = self.flf.GetLoop('_string_1')
       for key,value in olditems.items():
           compstringa = map(lambda a:re.sub('\n','',a),value)
           compstringb = map(lambda a:re.sub('\n','',a),self.df[key])
           compstringc = map(lambda a:re.sub('\n','',a),self.flf[key])
           self.failUnless(compstringa==compstringb and compstringa==compstringc)

   def testGetLoopData(self):
       """Test the get method for looped data"""
       newvals = self.df.get('_string_1')
       self.failUnless(len(newvals)==3)

   def testCopySaveFrame(self):
       """Early implementations didn't copy the save frame properly"""
       jj = CifFile.CifFile(self.ef,scoping='dictionary')  #this will trigger a copy
       self.failUnless(len(jj["test_save_frame"])>0)

   def testFirstBlock(self):
       """Test that first_block returns a block"""
       self.ef.scoping = 'instance'  #otherwise all blocks are available
       jj = self.ef.first_block()
       self.failUnless(jj==self.df)

   def testDupName(self):
       """Test that duplicate blocknames are allowed in non-standard mode"""
       outstr = """data_block1 _data_1 b save_ab1 _data_2 c
                  save_
                  save_ab1 _data_3 d save_"""
       b = open("test2.cif","w")
       b.write(outstr)
       b.close()
       testin = CifFile.CifFile("test2.cif",standard=None)

class TemplateTestCase(unittest.TestCase):
   def setUp(self):
       """Create a template"""
       self.template = """#
# Template
#
data_TEST_DIC
 
    _dictionary.title            DDL_DIC
    _definition.update           2011-07-27
    _description.text
;
     This dictionary specifies through its layout how we desire to
     format datanames.  It is not a valid dictionary, but it must 
     be a valid CIF file.
;

    _name.object_id              ALIAS
    _category.key_id           '_alias.definition_id'
    _category.key_list        ['_alias.definition_id']
    _type.purpose                Key     
    _type.dimension              [*]
    _import.get    [{"file":'templ_enum.cif',"save":'units_code'}]
     loop_
    _enumeration_set.state
    _enumeration_set.detail
          Dictionary        "applies to all defined items in the dictionary"
          Category          "applies to all defined items in the category"
          Item              "applies to a single item definition"
    _enumeration.default        Item   
"""  

   def testTemplateInput(self):
       """Test that an output template is successfully input"""
       p = CifFile.CifBlock()
       p.process_template(self.template)
       self.failUnless(p.form_hints[0]['dataname']=='_dictionary.title')
       self.failUnless(p.form_hints[4]['column']==31)
       self.failUnless(p.form_hints[2]['delimiter']==';')
       self.failUnless(p.form_hints[9]['column']==10)
       self.failUnless(p.form_hints[10]['delimiter']=='"')

###### template tests #####
##############################################################
#
#   Test alternative grammars (1.0, DDLm)
#
##############################################################
class GrammarTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again."""
       teststr1_0 = """
       #A test CIF file, grammar version 1.0 conformant
       data_Test
         _item_1 'A simple item'
         _item_2 '(Bracket always ok in quotes)'
         _item_3 (can_have_bracket_here_if_1.0)
       """
       f = open("test_1.0","w")
       f.write(teststr1_0)
       f.close()

   def tearDown(self):
	pass

   def testold(self):
       """Read in 1.0 conformant file; should not fail"""
       f = CifFile.ReadCif("test_1.0",grammar="1.0")  
       print f["test"]["_item_3"]
      
   def testNew(self):
       """Read in a 1.0 conformant file with 1.1 grammar; should fail"""
       try:
           f = CifFile.ReadCif("test_1.0",grammar="1.1")  
       except StarFile.StarError:
           pass

   def testObject(self):
       """Test use of grammar keyword when initialising object"""
       try:
           f = CifFile.CifFile("test_1.0",grammar="1.0")
       except StarFile.StarError:
           pass

class ParentChildTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a multi-save-frame file, read in again"""
       outstring = """
data_Toplevel
 _item_1         a
 save_1
   _s1_item1     b
   save_12
   _s12_item1    c
   save_
   save_13
   _s13_item1    d
   save_
 save_
 _item_2         e
 save_2
   _s2_item1     f
   save_21
   _s21_item1    g
     save_211
     _s211_item1 h
     save_
     save_212
     _s212_item1 i
     save_
    save_
   save_22
    _s22_item1   j
   save_
 save_
 save_toplevel
   _item_1       k
 save_
"""
       f = open('save_test.cif','w')
       f.write(outstring)
       f.close()
       self.testcif = CifFile.CifFile('save_test.cif',scoping='dictionary')

   def testGoodRead(self):
       """Check that there is a top level block"""
       self.failUnless('toplevel+' in [a[0] for a in self.testcif.child_table.items() if a[1].parent is None])
       self.failUnless(self.testcif.child_table['toplevel'].parent == 'toplevel+')

   def testGetParent(self):
       """Check that parent is correctly identified"""
       self.failUnless(self.testcif.get_parent('212')=='21')
       self.failUnless(self.testcif.get_parent('12')=='1')

   def testGetChildren(self):
       """Test that our child blocks are constructed correctly"""
       p = self.testcif.get_children('1')
       self.failUnless(p.has_key('13'))
       self.failUnless(not p.has_key('1'))
       self.failUnless(p.get_parent('13')==None)
       self.failUnless(p['12']['_s12_item1']=='c')

   def testGetChildrenwithParent(self):
       """Test that the parent is included if necessary"""
       p = self.testcif.get_children('1',include_parent=True)
       self.failUnless(p.has_key('1')) 
       self.failUnless(p.get_parent('13')=='1')
  
   def testSetParent(self):
       """Test that the parent is correctly set"""
       self.testcif.set_parent('1','211')
       q = self.testcif.get_children('1')
       self.failUnless('211' in q.keys())

   def testChangeParent(self):
       """Test that a duplicated save frame is OK if the duplicate name is a data block"""
       self.failUnless('toplevel+' in self.testcif.keys())
       self.failUnless(self.testcif.get_parent('1')=='toplevel+')

   def testRename1(self):
       """Test that re-identifying a datablock works"""
       self.testcif._rekey('2','timey-wimey')
       self.failUnless(self.testcif.get_parent('21')=='timey-wimey')
       self.failUnless(self.testcif.has_key('timey-wimey'))
       self.failUnless(self.testcif['timey-wimey']['_s2_item1']=='f')
       print str(self.testcif)
 
   def testRename2(self):
       """Test that renamng a block works"""
       self.testcif.rename('2','Timey-wimey')
       self.failUnless(self.testcif.has_key('timey-wimey'))
       self.failUnless(self.testcif.child_table['timey-wimey'].block_id=='Timey-wimey')
   
   def testUnlock(self):
       """Test that unlocking will change overwrite flag"""
       self.testcif['2'].overwrite = False
       self.testcif.unlock()
       self.failUnless(self.testcif['2'].overwrite is True)

class DDLmTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again."""
       teststr1_2 = """
       #A test CIF file, grammar version 1.2 nonconformant
       data_Test
         _item_1 'A simple item'
         _item_2 '(Bracket always ok in quotes)'
         _item_3 (can_have_bracket_here_if_1.2)
         _item_4 This_is_so_wrong?*~
       """
       goodstr1_2 = """
       #A test CIF file, grammar version 1.2 conformant with nested save frames
       data_Test
          _name.category_id           CIF_DIC
          _name.object_id             CIF_CORE
          _import.get       
        [{"save":'EXPERIMENTAL', "file":'core_exptl.dic', "mode":'full' },
         {"save":'DIFFRACTION',  "file":'core_diffr.dic', "mode":'full' },
         {"save":'STRUCTURE',    "file":'core_struc.dic', "mode":'full' },
         {"save":'MODEL',        "file":'core_model.dic', "mode":'full' },
         {"save":'PUBLICATION',  "file":'core_publn.dic', "mode":'full' },
         {"save":'FUNCTION',     "file":'core_funct.dic', "mode":'full' }]
        save_Savelevel1
         _item_in_save [1,2,3,4]
         save_saveLevel2
            _item_in_inside_save {"hello":"goodbye","e":"mc2"}
         save_
        save_
         _test.1 {"piffle":poffle,"wiffle":3,'''woffle''':9.2}
         _test_2 {"ping":[1,2,3,4],"pong":[a,b,c,d]}
         _test_3 {"ppp":{'qqq':2,'poke':{'joke':[5,6,7],'jike':[{'aa':bb,'cc':dd},{'ee':ff,"gg":100}]}},"rrr":[11,12,13]}
         _triple_quote_test '''The comma is ok if, the quotes
                                are ok'''
         _underscore_test underscores_are_allowed_inside_text
       """
       f = open("test_1.2","w")
       f.write(teststr1_2)
       f.close()
       f = open("goodtest_1.2","w")
       f.write(goodstr1_2)
       f.close()

   def tearDown(self):
	pass

   def testold(self):
       """Read in 1.2 nonconformant file; should fail"""
       try:
           f = CifFile.ReadCif("test_1.2",grammar="DDLm")  
       except StarFile.StarError:
           pass
      
   def testgood(self):
       """Read in 1.2 conformant file: should succeed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       
   def testTables(self):
       """Test that DDLm tables are properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_test.1"]["wiffle"] == '3')

   def testTables2(self):
       """Test that a plain table is properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_import.get"][0]["file"] == 'core_exptl.dic')

   def testTables3(self):
       """Test that a nested structure is properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_test_3"]["ppp"]["poke"]["jike"][1]["gg"]=='100')

   def testTripleQuote(self):
       """Test that triple quoted values are treated correctly"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       print f["test"]["_triple_quote_test"]
       self.failUnless(f["test"]["_triple_quote_test"][:9] == 'The comma')

   def testRoundTrip(self):
       """Test that a DDLm file can be read in, written out and read in again"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       g = open("newgoodtest_1.2.cif","w")
       g.write(str(f))
       g.close()
       h = CifFile.ReadCif("newgoodtest_1.2.cif",grammar="DDLm")
       #print h['Test']
       #print h['Test']['_import.get']
       #print h['Test']['_import.get'][2]
       #print h['Test']['_import.get'][2]['file']
       self.failUnless(h['Test']['_import.get'][2]['file']=='core_struc.dic')

##########
#
# Test DDLm imports
#
##########
class DDLmImportCase(unittest.TestCase):
    def setUp(self):
        pass

##############################################################
#
# Test dictionary type
#
##############################################################
#ddl1dic = CifFile.CifDic("dictionaries/cif_core.dic",do_minimum=True)

class DictTestCase(unittest.TestCase):
    def setUp(self):
	self.ddldic = CifFile.CifDic("tests/ddl.dic",grammar='DDLm',scoping='dictionary',do_minimum=True)  #small DDLm dictionary
    
    def tearDown(self):
	pass

    def testnum_and_esd(self):
        """Test conversion of numbers with esds"""
        testnums = ["5.65","-76.24(3)","8(2)","6.24(3)e3","55.2(2)d4"]
        res = map(CifFile.get_number_with_esd,testnums)
        print `res`
        self.failUnless(res[0]==(5.65,None))
        self.failUnless(res[1]==(-76.24,0.03))
        self.failUnless(res[2]==(8,2))
        self.failUnless(res[3]==(6240,30))
        self.failUnless(res[4]==(552000,2000))
         
    def testdot(self):
        """Make sure a single dot is skipped"""
        res1,res2 = CifFile.get_number_with_esd(".")
        self.failUnless(res1==None)

    def testCategoryRename(self):
        """Test that renaming a category works correctly"""
        self.ddldic.change_category_name('Description','Opisanie')
        self.failUnless(self.ddldic.has_key('opisanie'))
        self.failUnless(self.ddldic['opisanie']['_name.object_id']=='Opisanie')
        self.failUnless(self.ddldic.has_key('opisanie.text'))
        self.failUnless(self.ddldic['opisanie.text']['_name.category_id']=='Opisanie')
        self.failUnless(self.ddldic['opisanie.text']['_definition.id']=='_Opisanie.text')
        self.failUnless(self.ddldic.has_key('description_example'))

    def testChangeItemCategory(self):
        """Test that changing an item's category works"""
        self.ddldic.change_category('_description.common','type')
        self.failUnless('_type.common' in self.ddldic)
        self.failUnless('_description.common' not in self.ddldic)
        self.failUnless(self.ddldic['_type.common']['_name.category_id'].lower()=='type')
        self.failUnless(self.ddldic.get_parent('_type.common')=='type')

    def testChangeCategoryCategory(self):
        """Test that changing a category's category works"""
        self.ddldic.change_category('description_example','attributes')
        self.failUnless(self.ddldic['description_example']['_name.category_id'].lower()=='attributes')
        self.failUnless(self.ddldic.get_parent('description_example')=='attributes')

    def testChangeName(self):
        """Test that changing the object_id works"""
        self.ddldic.change_name('_description.common','uncommon')
        self.failUnless('_description.uncommon' in self.ddldic)
        self.failUnless('_description.common' not in self.ddldic)
        self.failUnless(self.ddldic['_description.uncommon']['_name.object_id']=='uncommon')
        self.failUnless(self.ddldic['_description.uncommon']['_definition.id']=='_description.uncommon')

    def testNewCategory(self):
        """Test that we can add a new category"""
        self.ddldic.add_category('brand-new')
        self.failUnless('brand-new' in self.ddldic)
        self.failUnless(self.ddldic['brand-new']['_name.object_id']=='brand-new')
        self.failUnless(self.ddldic.get_parent('brand-new').lower()=='attributes')
        self.failUnless(self.ddldic['brand-new']['_name.category_id'].lower()=='attributes')

    def testNewDefinition(self):
        """Test that we can add a new definition"""
        self.ddldic.add_definition('_junkety._junkjunk_','description')
        self.failUnless('_description.junkjunk' in self.ddldic)
        self.failUnless(self.ddldic['_description.junkjunk']['_name.category_id'].lower()=='description')
        self.failUnless(self.ddldic['_description.junkjunk']['_name.object_id']=='junkjunk')
        self.failUnless(self.ddldic['_description.junkjunk']['_definition.id']=='_description.junkjunk')

    def testDeleteDefinition(self):
        """Test that we can delete a definition"""
        self.ddldic.remove_definition('_alias.deprecation_date')
        self.failUnless('_alias.deprecation_date' not in self.ddldic)

    def testDeleteCategory(self):
        """test that we can delete whole categories"""
        self.ddldic.remove_definition('description')
        self.failUnless('description' not in self.ddldic)
        self.failUnless('description_example' not in self.ddldic)

# now for some value testing
class DDLmValueTestCase(unittest.TestCase):
    def setUp(self):
        filedata = """
data_testblock
_float.value 4.2
_hex.value 0xA2
_list1.value [1.2, 2.3, 4.5]
_list2.value [['i',4.2],['j',1.5],['lmnop',-4.5]]
_matrix.value [[1,2,3],[4,5,6],[7,8,9]]
"""
        p = open('ddlm_testdata','w')
        p.write(filedata)
        p.close()
        self.testblock = CifFile.CifFile('ddlm_testdata',grammar="DDLm")['testblock']
    
    def testTypeInterpretation(self):
        """Test that we decode DDLm type.contents correctly"""
        import TypeContentsParser as t
        p = t.TypeParser(t.TypeParserScanner('List(Real,Real,Real)'))
        q = getattr(p,"input")()
        print `q`
        self.failUnless(q == ['Real','Real','Real'])
        p = t.TypeParser(t.TypeParserScanner('List(Real,List(Integer,Real),Real)'))
        q = getattr(p,"input")()
        print `q`
        self.failUnless(q == ['Real',['Integer','Real'],'Real'])

    def testSingleConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'Single'
        namedef['_type.contents'] = 'Real'
        result = CifFile.convert_type(namedef)(self.testblock['_float.value'])
        self.failUnless(result == 4.2)

    def testListConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'List'
        namedef['_type.contents'] = 'List(Text,Real)'
        result = CifFile.convert_type(namedef)(self.testblock['_list2.value'])
        print 'Result: ' + `result`
        self.failUnless(result ==  [['i',4.2],['j',1.5],['lmnop',-4.5]])

    def testSimpleListConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'List'
        namedef['_type.contents'] = 'Real'
        result = CifFile.convert_type(namedef)(self.testblock['_list1.value'])
        self.failUnless(result ==  [1.2, 2.3, 4.5])

    def testMatrixConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'Matrix'
        namedef['_type.contents'] = 'Integer'
        result = CifFile.convert_type(namedef)(self.testblock['_matrix.value'])
        self.failUnless(result[1][2] == 6)

    def testValuesReturned(self):
        """Test that values are returned transparently converted when a dictionary is supplied"""
        pass

##############################################################
#
#  Validation testing
#
##############################################################

# We first test single item checking
class DDL1TestCase(unittest.TestCase):

    def setUp(self):
	# self.ddl1dic = CifFile.CifFile("dictionaries/cif_core.dic")
	#items = (("_atom_site_label","S1"),
	#	 ("_atom_site_fract_x","0.74799(9)"),
        #         ("_atom_site_adp_type","Umpe"),
	#	 ("_this_is_not_in_dict","not here"))
	bl = CifFile.CifBlock()
	self.cf = CifFile.ValidCifFile(dic=ddl1dic)
	self.cf["test_block"] = bl
	self.cf["test_block"].AddCifItem(("_atom_site_label",
	      ["C1","Cr2","H3","U4"]))	

    def tearDown(self):
        del self.cf

    def testItemType(self):
        """Test that types are correctly checked and reported"""
        #numbers
        self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75"
        try:
            self.cf["test_block"]["_diffrn_radiation_wavelength"] = "moly"
        except CifFile.ValidCifError: pass

    def testItemEsd(self):
        """Test that non-esd items are not allowed with esds"""
        #numbers
        try:
            self.cf["test_block"]["_chemical_melting_point_gt"] = "1325(6)"
        except CifFile.ValidCifError: pass

    def testItemEnum(self):
        """Test that enumerations are understood"""
        self.cf["test_block"]["_diffrn_source_target"]="Cr"
        try:
            self.cf["test_block"]["_diffrn_source_target"]="2.5"
        except CifFile.ValidCifError: pass 
        else: self.fail()

    def testItemRange(self):
        """Test that ranges are correctly handled"""
        self.cf["test_block"]["_diffrn_source_power"] = "0.0"
        self.cf["test_block"]["_diffrn_standards_decay_%"] = "98"

    def testItemLooping(self):
        """test that list yes/no/both works"""
        pass

    def testListReference(self):
        """Test that _list_reference is handled correctly"""
        #can be both looped and unlooped; if unlooped, no need for ref.
        self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75"
        try:
            self.cf["test_block"].AddCifItem(((
                "_diffrn_radiation_wavelength",
                "_diffrn_radiation_wavelength_wt"),(("0.75","0.71"),("0.5","0.1"))))
        except CifFile.ValidCifError: pass
        else: self.fail()
        
    def testUniqueness(self):
        """Test that non-unique values are found"""
        # in cif_core.dic only one set is available
        try:
            self.cf["test_block"].AddCifItem(((
                "_publ_body_label",
                "_publ_body_element"),
                  (
                   ("1.1","1.2","1.3","1.2"),
                   ("section","section","section","section") 
                     )))
        except CifFile.ValidCifError: pass
        else: self.fail()

    def testParentChild(self):
	"""Test that non-matching values are reported"""
        self.assertRaises(CifFile.ValidCifError,
	    self.cf["test_block"].AddCifItem,
	    (("_geom_bond_atom_site_label_1","_geom_bond_atom_site_label_2"),
	      [["C1","C2","H3","U4"],
	      ["C1","Cr2","H3","U4"]]))	
	# now we test that a missing parent is flagged
        # self.assertRaises(CifFile.ValidCifError,
	#     self.cf["test_block"].AddCifItem,
	#     (("_atom_site_type_symbol","_atom_site_label"),
	#       [["C","C","N"],["C1","C2","N1"]]))

    def testReport(self):
        CifFile.validate_report(CifFile.validate("tests/C13H2203_with_errors.cif",dic=ddl1dic))

class FakeDicTestCase(unittest.TestCase):
# we test stuff that hasn't been used in official dictionaries to date.
    def setUp(self):
        self.testcif = CifFile.CifFile("dictionaries/novel_test.cif")

    def testTypeConstruct(self):
        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
                           diclist=["dictionaries/novel.dic"],datasource=self.testcif)
          
class DicMergeTestCase(unittest.TestCase):
    def setUp(self):
        self.offdic = CifFile.CifFile("dictionaries/dict_official",standard=None)
        self.adic = CifFile.CifFile("dictionaries/dict_A",standard=None)
        self.bdic = CifFile.CifFile("dictionaries/dict_B",standard=None)
        self.cdic = CifFile.CifFile("dictionaries/dict_C",standard=None)
        self.cvdica = CifFile.CifFile("dictionaries/cvdica.dic",standard=None)
        self.cvdicb = CifFile.CifFile("dictionaries/cvdicb.dic",standard=None)
        self.cvdicc = CifFile.CifFile("dictionaries/cvdicc.dic",standard=None)
        self.cvdicd = CifFile.CifFile("dictionaries/cvdicd.dic",standard=None)
        self.testcif = CifFile.CifFile("dictionaries/merge_test.cif",standard=None)
       
    def testAStrict(self):
        self.assertRaises(StarFile.StarError,CifFile.merge_dic,[self.offdic,self.adic],mergemode="strict")
        
    def testAOverlay(self):
        newdic = CifFile.merge_dic([self.offdic,self.adic],mergemode='overlay')
        # print newdic.__str__()
        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
                                  datasource="dictionaries/merge_test.cif",
                                  dic=newdic)
        
#    def testAReverseO(self):
#        # the reverse should be OK!
#        newdic = CifFile.merge_dic([self.adic,self.offdic],mergemode='overlay')
#        jj = CifFile.ValidCifFile(datasource="dictionaries/merge_test.cif",
#                                 dic = newdic)

#    def testCOverlay(self):
#        self.offdic = CifFile.merge_dic([self.offdic,self.cdic],mergemode='replace') 
#        print "New dic..."
#        print self.offdic.__str__()
#        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
#                          datasource="dictionaries/merge_test.cif",
#                          dic = self.offdic)

    # now for the final example in "maintenance.html"
    def testCVOverlay(self):
        jj = open("merge_debug","w")
        newdic = CifFile.merge_dic([self.cvdica,self.cvdicb,self.cvdicc,self.cvdicd],mergemode='overlay')
        jj.write(newdic.__str__())

#    def testKeyOverlay(self):
#        """Test that duplicate key values are not overlayed in loops"""
#        ff = CifFile.CifFile("dictionaries/merge_test_2.cif")["block_a"]
#        gg = CifFile.CifFile("dictionaries/merge_test_2.cif")["block_b"]
#        ff.merge(gg,mode="overlay",rel_keys = ["_loop_key"])
#        target_loop = ff.GetLoop("_loop_key")
#        print ff.__str__()

    def tearDown(self):
        pass

if __name__=='__main__':
#     suite = unittest.TestLoader().loadTestsFromTestCase(BlockRWTestCase)
#     unittest.TextTestRunner(verbosity=2).run(suite)
     unittest.main()

Classes

class BasicUtilitiesTestCase

class BasicUtilitiesTestCase(unittest.TestCase):
    def testPlainLineFolding(self):
       """Test that we can fold a line correctly"""
       test_string = "1234567890123456789012"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       out_lines = outstring.split('\n')
       #print outstring
       self.failUnless(out_lines[0]=="\\")
       self.failUnless(len(out_lines[1])==10)

    def testPreWrappedFolding(self):
       """Test that pre-wrapped lines are untouched"""
       test_string = "123456789\n012345678\n9012"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring == test_string)

    def testManyLineEndings(self):
       """Test that empty lines are handled OK"""
       test_string = "123456789\n\n012345678\n\n9012\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring == test_string)

    def testOptionalBreak(self):
       """Test that internal whitespace is used to break"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       #print "\n;" + outstring + "\n;"
       out_lines = outstring.split('\n')
       self.failUnless(len(out_lines[1]) == 7)

    def testCorrectEnding(self):
       """Make sure that no line feeds are added/removed"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       self.failUnless(outstring[-4:] == "12\n\n")

    def testFoldingRemoval(self):
       """Test that we round-trip correctly"""
       test_string = "123456  7890123  45678\n90 12\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testTrickyFoldingRemoval(self):
       """Try to produce a tough string for unfolding"""
       test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testTrailingBackslash(self):
       """Make sure that a trailing backslash is not removed"""
       test_string = "\n123\\\n 456\\n\n"
       outstring = StarFile.apply_line_folding(test_string,5,10)
       old_string = StarFile.remove_line_folding(outstring)
       #print "Test:" + `test_string`
       #print "Fold:" + `outstring`
       #print "UnFo:" + `old_string`
       self.failUnless(old_string == test_string)

    def testFinalBackslash(self):
        """Make sure that a single final backslash is removed when unfolding"""
        test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
        folded_string = StarFile.apply_line_folding(test_string,5,10)
        folded_string = folded_string + "\ "
        old_string = StarFile.remove_line_folding(folded_string)
        self.failUnless(old_string == test_string)

    def testAddIndent(self):
        """Test insertion of a line prefix"""
        test_string = "\n12345\n678910\n\n"
        outstring = StarFile.apply_line_prefix(test_string,"abc>")
        print "Converted %s to %s " %(test_string,outstring)
        self.failUnless(outstring == "abc>\\\nabc>\nabc>12345\nabc>678910\nabc>\nabc>")

    def testRemoveIndent(self):
        """Test removal of a line prefix"""
        test_string = "abc>\\\nabc>12345\nabc>678910\nabc>\nabc>"
        outstring = StarFile.remove_line_prefix(test_string)
        print "Removed indent: " + `outstring`
        self.failUnless(outstring == "12345\n678910\n\n")

    def testReverseIndent(self):
        """Test reversible indentation of line"""
        test_string = "12345\n678910\n\n"
        outstring = StarFile.apply_line_prefix(test_string,"cif><")
        newtest = StarFile.remove_line_prefix(outstring)
        print 'Before indenting: ' + `test_string`
        print 'After indenting: ' + `outstring`
        print 'After unindent: ' + `newtest`
        self.failUnless(newtest == test_string)

    def testPrefixAndFold(self):
        """Test reversible folding and indenting"""
        test_string = "\n1234567890\\\n r t s 345 19\n\nlife don't talk to me about life"
        outstring = StarFile.apply_line_folding(test_string,5,10)
        indoutstring = StarFile.apply_line_prefix(outstring,"CIF>")
        newoutstring = StarFile.remove_line_prefix(indoutstring)
        newtest_string = StarFile.remove_line_folding(newoutstring)
        print "%s -> %s -> %s -> %s -> %s" % (`test_string`,`outstring`,`indoutstring`,`newoutstring`,`newtest_string`)
        self.failUnless(newtest_string == test_string)

Ancestors (in MRO)

class BlockChangeTestCase

class BlockChangeTestCase(unittest.TestCase):
   def setUp(self):
        self.cf = CifFile.CifBlock()
	self.names = (('_item_name_1','_item_name#2','_item_%$#3'),)
	self.values = (((1,2,3,4),('hello','good_bye','a space','# 4'),
	          (15.462, -99.34,10804,0.0001)),)
        self.cf.AddCifItem((self.names,self.values))
	self.cf['_non_loop_item'] = 'Non loop string item'
	self.cf['_number_item'] = 15.65
        self.cf['_planet'] = 'Saturn'
        self.cf['_satellite'] = 'Titan'
        self.cf['_rings']  = 'True'
       
   def tearDown(self):
       del self.cf

   def testFromBlockSet(self):
        """Test that we can use a CifBlock to set a CifBlock"""
        df = CifFile.CifFile()
        df.NewBlock('testname',self.cf)

   def testLoop(self):
        """Check GetLoop returns values and names in matching order"""
   	results = self.cf.GetLoop(self.names[0][2])
	for key in results.keys():
	    self.failUnless(key in self.names[0])
	    self.failUnless(tuple(results[key]) == self.values[0][list(self.names[0]).index(key)])
	
   def testSimpleRemove(self):
       """Check item deletion outside loop"""
       self.cf.RemoveCifItem('_non_loop_item')
       try:
           a = self.cf['_non_loop_item']
       except KeyError: pass
       else: self.fail()

   def testLoopRemove(self):
       """Check item deletion inside loop"""
       print "Before:\n"
       print self.cf.printsection()
       self.cf.RemoveCifItem(self.names[0][1])
       print "After:\n"
       print self.cf.printsection()
       try:
           a = self.cf[self.names[0][1]]
       except KeyError: pass
       else: self.fail()

   def testFullLoopRemove(self):
       """Check removal of all loop items"""
       for name in self.names[0]: self.cf.RemoveCifItem(name)
       self.failUnless(len(self.cf.loops)==0, `self.cf.loops`)

# test adding data to a loop.  We test straight addition, then make sure the errors
# happen at the right time
#
   def testAddToLoop(self):
       """Test adding to a loop"""
       adddict = {'_address':['1 high street','2 high street','3 high street','4 high st'],
                  '_address2':['Ecuador','Bolivia','Colombia','Mehico']}
       self.cf.AddToLoop('_item_name#2',adddict)
       newkeys = self.cf.GetLoop('_item_name#2').keys()
       self.failUnless(adddict.keys()[0] in newkeys)
       self.failUnless(len(self.cf.GetLoop('_item_name#2'))==len(self.values[0])+2)
       
   def testBadAddToLoop(self):
       """Test incorrect loop addition"""
       adddict = {'_address':['1 high street','2 high street','3 high street'],
                  '_address2':['Ecuador','Bolivia','Colombia']}
       try:
           self.cf.AddToLoop('_no_item',adddict)
       except KeyError: pass
       else: self.fail()
       try:
           self.cf.AddToLoop('_item_name#2',adddict)
       except StarFile.StarLengthError:
           pass 
       else: self.fail()

   def testChangeLoop(self):
       """Test changing pre-existing item in loop"""
       # Items should be silently replaced, but if an
       # item exists in a loop already, it should be
       # deleted from that loop first
       self.cf["_item_name_1"] = (5,6,7,8)

   def testLoopify(self):
       """Test changing unlooped data to looped data"""
       self.cf.Loopify(["_planet","_satellite","_rings"])
       newloop = self.cf.GetLoop("_rings")
       self.assertFalse(newloop.has_key("_number_item"))
       
   def testLoopifyCif(self):
       """Test changing unlooped data to looped data does 
          not touch already looped data for a CIF file"""
#      from IPython.Debugger import Tracer; debug_here = Tracer()
#      debug_here()
       self.cf.Loopify(["_planet","_satellite","_rings"])
       newloop = self.cf.GetLoop("_rings")
       newloop.Loopify(["_planet","_rings"])
       innerloop = newloop.GetLoop("_planet")
       self.assertTrue(innerloop.has_key("_satellite"))
       
#
#  Test the mapping type implementation
#
   def testGetOperation(self):
       """Test the get mapping call"""
       self.cf.get("_item_name_1")
       self.cf.get("_item_name_nonexist")

#
#  Test case insensitivity
#
   def testDataNameCase(self):
       """Test same name, different case causes error"""
       self.assertEqual(self.cf["_Item_Name_1"],self.cf["_item_name_1"])
       self.cf["_Item_NaMe_1"] = "the quick pewse fox"
       self.assertEqual(self.cf["_Item_NaMe_1"],self.cf["_item_name_1"])

#  Test iteration
#
   def testIteration(self):
       """We create an iterator and iterate"""
       testloop = self.cf.GetLoop("_item_name_1")
       i = 0
       for test_pack in testloop:
           self.assertEqual(test_pack._item_name_1,self.values[0][0][i]) 
           self.assertEqual(getattr(test_pack,"_item_name#2"),self.values[0][1][i]) 
           i += 1

   def testPacketContents(self):
       """Test that body of packet is filled in as well"""
       testloop = self.cf.GetLoop("_item_name_1")
       it_order = testloop.GetItemOrder()
       itn_pos = it_order.index("_item_name_1")
       for test_pack in testloop:
           print 'Test pack: ' + `test_pack`
           self.assertEqual(test_pack._item_name_1,test_pack[itn_pos])

   def testPacketAttr(self):
       """Test that packets have attributes"""
       testloop = self.cf.GetLoop("_item_name_1")
       self.assertEqual(testloop[1]._item_name_1,2)

   def testKeyPacket(self):
       """Test that a packet can be returned by key value"""
       testloop = self.cf.GetLoop("_item_name_1")
       testpack = testloop.GetKeyedPacket("_item_name_1",2)
       self.assertEqual("good_bye",getattr(testpack,"_item_name#2"))

   def testRemovePacket(self):
       """Test that removing a packet works properly"""
       print 'Before packet removal'
       print str(self.cf)
       testloop = self.cf.GetLoop("_item_name_1")
       testloop.RemoveKeyedPacket("_item_name_1",3)
       jj = testloop.GetKeyedPacket("_item_name_1",2)
       kk = testloop.GetKeyedPacket("_item_name_1",4)
       self.assertEqual(getattr(jj,"_item_name#2"),"good_bye")
       self.assertEqual(getattr(kk,"_item_name#2"),"# 4")
       self.assertRaises(KeyError,testloop.GetKeyedPacket,"_item_name_1",3)
       print 'After packet removal:'
       print str(self.cf)

   def testAddPacket(self):
       """Test that we can add a packet"""
       import copy
       testloop = self.cf.GetLoop("_item_name_1")
       workingpacket = copy.copy(testloop.GetPacket(0))
       workingpacket._item_name_1 = 5
       workingpacket.__setattr__("_item_name#2", 'new' )
       testloop.AddPacket(workingpacket)
       # note we assume that this adds on to the end, which is not 
       # a CIF requirement
       self.assertEqual(testloop["_item_name_1"][4],5)
       self.assertEqual(testloop["_item_name#2"][4],'new')

#
#  Test changing item order
#
   def testChangeOrder(self):
       """We move some stuff around"""
       testloop = self.cf.GetLoop("_item_name_1")
       self.cf.ChangeItemOrder("_Number_Item",0)
       testloop.ChangeItemOrder("_Item_Name_1",2)
       self.assertEqual(testloop.GetItemOrder()[2],"_Item_Name_1")
       self.assertEqual(self.cf.GetItemOrder()[0],"_Number_Item")
       
   def testGetOrder(self):
       """Test that the correct order value is returned"""
       self.assertEqual(self.cf.GetItemPosition("_Number_Item"),2)

   def testReplaceOrder(self):
       """Test that a replaced item is at the same position it
	  previously held"""
       testloop = self.cf.GetLoop("_item_name_1")
       oldpos = testloop.GetItemPosition('_item_name#2')
       testloop['_item_name#2'] = ("I'm",' a ','little','teapot')
       self.assertEqual(testloop.GetItemPosition('_item_name#2'),oldpos)

Ancestors (in MRO)

class BlockNameTestCase

class BlockNameTestCase(unittest.TestCase):
   def testBlockName(self):
       """Make sure long block names cause errors"""
       df = CifFile.CifBlock()
       cf = CifFile.CifFile()
       try:
           cf['a_very_long_block_name_which_should_be_rejected_out_of_hand123456789012345678']=df
       except StarFile.StarError: pass
       else: self.fail()

   def testBlockOverwrite(self):
       """Upper/lower case should be seen as identical"""
       df = CifFile.CifBlock()
       ef = CifFile.CifBlock()
       cf = CifFile.CifFile(standard=None)
       df['_random_1'] = 'oldval'
       ef['_random_1'] = 'newval'
       print 'cf.standard is ' + `cf.standard`
       cf['_lowercaseblock'] = df
       cf['_LowerCaseBlock'] = ef
       assert(cf['_Lowercaseblock']['_random_1'] == 'newval')
       assert(len(cf) == 1)

   def testEmptyBlock(self):
       """Test that empty blocks are not the same object"""
       cf = CifFile.CifFile()
       cf.NewBlock('first_block')
       cf.NewBlock('second_block')
       cf['first_block']['_test1'] = 'abc'
       cf['second_block']['_test1'] = 'def'
       self.failUnless(cf['first_block']['_test1']=='abc')

Ancestors (in MRO)

class BlockRWTestCase

class BlockRWTestCase(unittest.TestCase):
    def setUp(self):
    	# we want to get a datablock ready so that the test
	# case will be able to write a single item
	self.cf = CifFile.CifBlock()

    def tearDown(self):
        # get rid of our test object
	del self.cf
	
    def testTupleNumberSet(self):
        """Test tuple setting with numbers"""
        self.cf['_test_tuple'] = (11,13.5,-5.6)
        self.failUnless(map(float,
	     self.cf['_test_tuple']))== [11,13.5,-5.6]

    def testTupleComplexSet(self):
        """Test setting multiple names in loop"""
	names = (('_item_name_1','_item_name#2','_item_%$#3'),)
	values = (((1,2,3,4),('hello','good_bye','a space','# 4'),
	          (15.462, -99.34,10804,0.0001)),)
        self.cf.AddCifItem((names,values))
	self.failUnless(tuple(map(float, self.cf[names[0][0]])) == values[0][0])
	self.failUnless(tuple(self.cf[names[0][1]]) == values[0][1])
	self.failUnless(tuple(map(float, self.cf[names[0][2]])) == values[0][2])

    def testStringSet(self):
        """test string setting"""
        self.cf['_test_string_'] = 'A short string'
	self.failUnless(self.cf['_test_string_'] == 'A short string')

    def testTooLongSet(self):
        """test setting overlong data names"""
        dataname = '_a_long_long_'*7
        try:
            self.cf[dataname] = 1.0
        except (StarFile.StarError,CifFile.CifError): pass
        else: self.fail()

    def testTooLongLoopSet(self):
        """test setting overlong data names in a loop"""
        dataname = '_a_long_long_'*7
        try:
            self.cf[dataname] = (1.0,2.0,3.0)
        except (StarFile.StarError,CifFile.CifError): pass
        else: self.fail()

    def testBadStringSet(self):
        """test setting values with bad characters"""
        dataname = '_name_is_ok'
        try:
            self.cf[dataname] = "eca234\f\vaqkadlf"
        except StarFile.StarError: pass
        else: self.fail()

    def testBadNameSet(self):
        """test setting names with bad characters"""
        dataname = "_this_is_not ok"
        try:
            self.cf[dataname] = "nnn"
        except StarFile.StarError: pass
        else: self.fail()

    def testMoreBadStrings(self):
        dataname = "_name_is_ok"
        val = u"so far, ok, but now we have a " + unichr(128)
        try:
            self.cf[dataname] = val
        except StarFile.StarError: pass
        else: self.fail()

    def testEmptyString(self):
        """An empty string is, in fact, legal"""
        self.cf['_an_empty_string'] = ''
        
    def testStarList(self):
        """Test that a StarList is treated as a primitive item"""
        self.cf['_a_star_list'] = StarFile.StarList([1,2,3,4])
        jj = self.cf.GetLoop('_a_star_list')
        self.failUnless(jj.dimension==0)

Ancestors (in MRO)

class DDL1TestCase

class DDL1TestCase(unittest.TestCase):

    def setUp(self):
	# self.ddl1dic = CifFile.CifFile("dictionaries/cif_core.dic")
	#items = (("_atom_site_label","S1"),
	#	 ("_atom_site_fract_x","0.74799(9)"),
        #         ("_atom_site_adp_type","Umpe"),
	#	 ("_this_is_not_in_dict","not here"))
	bl = CifFile.CifBlock()
	self.cf = CifFile.ValidCifFile(dic=ddl1dic)
	self.cf["test_block"] = bl
	self.cf["test_block"].AddCifItem(("_atom_site_label",
	      ["C1","Cr2","H3","U4"]))	

    def tearDown(self):
        del self.cf

    def testItemType(self):
        """Test that types are correctly checked and reported"""
        #numbers
        self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75"
        try:
            self.cf["test_block"]["_diffrn_radiation_wavelength"] = "moly"
        except CifFile.ValidCifError: pass

    def testItemEsd(self):
        """Test that non-esd items are not allowed with esds"""
        #numbers
        try:
            self.cf["test_block"]["_chemical_melting_point_gt"] = "1325(6)"
        except CifFile.ValidCifError: pass

    def testItemEnum(self):
        """Test that enumerations are understood"""
        self.cf["test_block"]["_diffrn_source_target"]="Cr"
        try:
            self.cf["test_block"]["_diffrn_source_target"]="2.5"
        except CifFile.ValidCifError: pass 
        else: self.fail()

    def testItemRange(self):
        """Test that ranges are correctly handled"""
        self.cf["test_block"]["_diffrn_source_power"] = "0.0"
        self.cf["test_block"]["_diffrn_standards_decay_%"] = "98"

    def testItemLooping(self):
        """test that list yes/no/both works"""
        pass

    def testListReference(self):
        """Test that _list_reference is handled correctly"""
        #can be both looped and unlooped; if unlooped, no need for ref.
        self.cf["test_block"]["_diffrn_radiation_wavelength"] = "0.75"
        try:
            self.cf["test_block"].AddCifItem(((
                "_diffrn_radiation_wavelength",
                "_diffrn_radiation_wavelength_wt"),(("0.75","0.71"),("0.5","0.1"))))
        except CifFile.ValidCifError: pass
        else: self.fail()
        
    def testUniqueness(self):
        """Test that non-unique values are found"""
        # in cif_core.dic only one set is available
        try:
            self.cf["test_block"].AddCifItem(((
                "_publ_body_label",
                "_publ_body_element"),
                  (
                   ("1.1","1.2","1.3","1.2"),
                   ("section","section","section","section") 
                     )))
        except CifFile.ValidCifError: pass
        else: self.fail()

    def testParentChild(self):
	"""Test that non-matching values are reported"""
        self.assertRaises(CifFile.ValidCifError,
	    self.cf["test_block"].AddCifItem,
	    (("_geom_bond_atom_site_label_1","_geom_bond_atom_site_label_2"),
	      [["C1","C2","H3","U4"],
	      ["C1","Cr2","H3","U4"]]))	
	# now we test that a missing parent is flagged
        # self.assertRaises(CifFile.ValidCifError,
	#     self.cf["test_block"].AddCifItem,
	#     (("_atom_site_type_symbol","_atom_site_label"),
	#       [["C","C","N"],["C1","C2","N1"]]))

    def testReport(self):
        CifFile.validate_report(CifFile.validate("tests/C13H2203_with_errors.cif",dic=ddl1dic))

Ancestors (in MRO)

class DDLmImportCase

class DDLmImportCase(unittest.TestCase):
    def setUp(self):
        pass

Ancestors (in MRO)

class DDLmTestCase

class DDLmTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again."""
       teststr1_2 = """
       #A test CIF file, grammar version 1.2 nonconformant
       data_Test
         _item_1 'A simple item'
         _item_2 '(Bracket always ok in quotes)'
         _item_3 (can_have_bracket_here_if_1.2)
         _item_4 This_is_so_wrong?*~
       """
       goodstr1_2 = """
       #A test CIF file, grammar version 1.2 conformant with nested save frames
       data_Test
          _name.category_id           CIF_DIC
          _name.object_id             CIF_CORE
          _import.get       
        [{"save":'EXPERIMENTAL', "file":'core_exptl.dic', "mode":'full' },
         {"save":'DIFFRACTION',  "file":'core_diffr.dic', "mode":'full' },
         {"save":'STRUCTURE',    "file":'core_struc.dic', "mode":'full' },
         {"save":'MODEL',        "file":'core_model.dic', "mode":'full' },
         {"save":'PUBLICATION',  "file":'core_publn.dic', "mode":'full' },
         {"save":'FUNCTION',     "file":'core_funct.dic', "mode":'full' }]
        save_Savelevel1
         _item_in_save [1,2,3,4]
         save_saveLevel2
            _item_in_inside_save {"hello":"goodbye","e":"mc2"}
         save_
        save_
         _test.1 {"piffle":poffle,"wiffle":3,'''woffle''':9.2}
         _test_2 {"ping":[1,2,3,4],"pong":[a,b,c,d]}
         _test_3 {"ppp":{'qqq':2,'poke':{'joke':[5,6,7],'jike':[{'aa':bb,'cc':dd},{'ee':ff,"gg":100}]}},"rrr":[11,12,13]}
         _triple_quote_test '''The comma is ok if, the quotes
                                are ok'''
         _underscore_test underscores_are_allowed_inside_text
       """
       f = open("test_1.2","w")
       f.write(teststr1_2)
       f.close()
       f = open("goodtest_1.2","w")
       f.write(goodstr1_2)
       f.close()

   def tearDown(self):
	pass

   def testold(self):
       """Read in 1.2 nonconformant file; should fail"""
       try:
           f = CifFile.ReadCif("test_1.2",grammar="DDLm")  
       except StarFile.StarError:
           pass
      
   def testgood(self):
       """Read in 1.2 conformant file: should succeed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       
   def testTables(self):
       """Test that DDLm tables are properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_test.1"]["wiffle"] == '3')

   def testTables2(self):
       """Test that a plain table is properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_import.get"][0]["file"] == 'core_exptl.dic')

   def testTables3(self):
       """Test that a nested structure is properly parsed"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       self.failUnless(f["test"]["_test_3"]["ppp"]["poke"]["jike"][1]["gg"]=='100')

   def testTripleQuote(self):
       """Test that triple quoted values are treated correctly"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       print f["test"]["_triple_quote_test"]
       self.failUnless(f["test"]["_triple_quote_test"][:9] == 'The comma')

   def testRoundTrip(self):
       """Test that a DDLm file can be read in, written out and read in again"""
       f = CifFile.ReadCif("goodtest_1.2",grammar="DDLm")
       g = open("newgoodtest_1.2.cif","w")
       g.write(str(f))
       g.close()
       h = CifFile.ReadCif("newgoodtest_1.2.cif",grammar="DDLm")
       #print h['Test']
       #print h['Test']['_import.get']
       #print h['Test']['_import.get'][2]
       #print h['Test']['_import.get'][2]['file']
       self.failUnless(h['Test']['_import.get'][2]['file']=='core_struc.dic')

Ancestors (in MRO)

class DDLmValueTestCase

class DDLmValueTestCase(unittest.TestCase):
    def setUp(self):
        filedata = """
data_testblock
_float.value 4.2
_hex.value 0xA2
_list1.value [1.2, 2.3, 4.5]
_list2.value [['i',4.2],['j',1.5],['lmnop',-4.5]]
_matrix.value [[1,2,3],[4,5,6],[7,8,9]]
"""
        p = open('ddlm_testdata','w')
        p.write(filedata)
        p.close()
        self.testblock = CifFile.CifFile('ddlm_testdata',grammar="DDLm")['testblock']
    
    def testTypeInterpretation(self):
        """Test that we decode DDLm type.contents correctly"""
        import TypeContentsParser as t
        p = t.TypeParser(t.TypeParserScanner('List(Real,Real,Real)'))
        q = getattr(p,"input")()
        print `q`
        self.failUnless(q == ['Real','Real','Real'])
        p = t.TypeParser(t.TypeParserScanner('List(Real,List(Integer,Real),Real)'))
        q = getattr(p,"input")()
        print `q`
        self.failUnless(q == ['Real',['Integer','Real'],'Real'])

    def testSingleConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'Single'
        namedef['_type.contents'] = 'Real'
        result = CifFile.convert_type(namedef)(self.testblock['_float.value'])
        self.failUnless(result == 4.2)

    def testListConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'List'
        namedef['_type.contents'] = 'List(Text,Real)'
        result = CifFile.convert_type(namedef)(self.testblock['_list2.value'])
        print 'Result: ' + `result`
        self.failUnless(result ==  [['i',4.2],['j',1.5],['lmnop',-4.5]])

    def testSimpleListConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'List'
        namedef['_type.contents'] = 'Real'
        result = CifFile.convert_type(namedef)(self.testblock['_list1.value'])
        self.failUnless(result ==  [1.2, 2.3, 4.5])

    def testMatrixConversion(self):
        namedef = CifFile.CifBlock()
        namedef['_type.container'] = 'Matrix'
        namedef['_type.contents'] = 'Integer'
        result = CifFile.convert_type(namedef)(self.testblock['_matrix.value'])
        self.failUnless(result[1][2] == 6)

    def testValuesReturned(self):
        """Test that values are returned transparently converted when a dictionary is supplied"""
        pass

Ancestors (in MRO)

class DicMergeTestCase

class DicMergeTestCase(unittest.TestCase):
    def setUp(self):
        self.offdic = CifFile.CifFile("dictionaries/dict_official",standard=None)
        self.adic = CifFile.CifFile("dictionaries/dict_A",standard=None)
        self.bdic = CifFile.CifFile("dictionaries/dict_B",standard=None)
        self.cdic = CifFile.CifFile("dictionaries/dict_C",standard=None)
        self.cvdica = CifFile.CifFile("dictionaries/cvdica.dic",standard=None)
        self.cvdicb = CifFile.CifFile("dictionaries/cvdicb.dic",standard=None)
        self.cvdicc = CifFile.CifFile("dictionaries/cvdicc.dic",standard=None)
        self.cvdicd = CifFile.CifFile("dictionaries/cvdicd.dic",standard=None)
        self.testcif = CifFile.CifFile("dictionaries/merge_test.cif",standard=None)
       
    def testAStrict(self):
        self.assertRaises(StarFile.StarError,CifFile.merge_dic,[self.offdic,self.adic],mergemode="strict")
        
    def testAOverlay(self):
        newdic = CifFile.merge_dic([self.offdic,self.adic],mergemode='overlay')
        # print newdic.__str__()
        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
                                  datasource="dictionaries/merge_test.cif",
                                  dic=newdic)
        
#    def testAReverseO(self):
#        # the reverse should be OK!
#        newdic = CifFile.merge_dic([self.adic,self.offdic],mergemode='overlay')
#        jj = CifFile.ValidCifFile(datasource="dictionaries/merge_test.cif",
#                                 dic = newdic)

#    def testCOverlay(self):
#        self.offdic = CifFile.merge_dic([self.offdic,self.cdic],mergemode='replace') 
#        print "New dic..."
#        print self.offdic.__str__()
#        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
#                          datasource="dictionaries/merge_test.cif",
#                          dic = self.offdic)

    # now for the final example in "maintenance.html"
    def testCVOverlay(self):
        jj = open("merge_debug","w")
        newdic = CifFile.merge_dic([self.cvdica,self.cvdicb,self.cvdicc,self.cvdicd],mergemode='overlay')
        jj.write(newdic.__str__())

#    def testKeyOverlay(self):
#        """Test that duplicate key values are not overlayed in loops"""
#        ff = CifFile.CifFile("dictionaries/merge_test_2.cif")["block_a"]
#        gg = CifFile.CifFile("dictionaries/merge_test_2.cif")["block_b"]
#        ff.merge(gg,mode="overlay",rel_keys = ["_loop_key"])
#        target_loop = ff.GetLoop("_loop_key")
#        print ff.__str__()

    def tearDown(self):
        pass

Ancestors (in MRO)

class DictTestCase

class DictTestCase(unittest.TestCase):
    def setUp(self):
	self.ddldic = CifFile.CifDic("tests/ddl.dic",grammar='DDLm',scoping='dictionary',do_minimum=True)  #small DDLm dictionary
    
    def tearDown(self):
	pass

    def testnum_and_esd(self):
        """Test conversion of numbers with esds"""
        testnums = ["5.65","-76.24(3)","8(2)","6.24(3)e3","55.2(2)d4"]
        res = map(CifFile.get_number_with_esd,testnums)
        print `res`
        self.failUnless(res[0]==(5.65,None))
        self.failUnless(res[1]==(-76.24,0.03))
        self.failUnless(res[2]==(8,2))
        self.failUnless(res[3]==(6240,30))
        self.failUnless(res[4]==(552000,2000))
         
    def testdot(self):
        """Make sure a single dot is skipped"""
        res1,res2 = CifFile.get_number_with_esd(".")
        self.failUnless(res1==None)

    def testCategoryRename(self):
        """Test that renaming a category works correctly"""
        self.ddldic.change_category_name('Description','Opisanie')
        self.failUnless(self.ddldic.has_key('opisanie'))
        self.failUnless(self.ddldic['opisanie']['_name.object_id']=='Opisanie')
        self.failUnless(self.ddldic.has_key('opisanie.text'))
        self.failUnless(self.ddldic['opisanie.text']['_name.category_id']=='Opisanie')
        self.failUnless(self.ddldic['opisanie.text']['_definition.id']=='_Opisanie.text')
        self.failUnless(self.ddldic.has_key('description_example'))

    def testChangeItemCategory(self):
        """Test that changing an item's category works"""
        self.ddldic.change_category('_description.common','type')
        self.failUnless('_type.common' in self.ddldic)
        self.failUnless('_description.common' not in self.ddldic)
        self.failUnless(self.ddldic['_type.common']['_name.category_id'].lower()=='type')
        self.failUnless(self.ddldic.get_parent('_type.common')=='type')

    def testChangeCategoryCategory(self):
        """Test that changing a category's category works"""
        self.ddldic.change_category('description_example','attributes')
        self.failUnless(self.ddldic['description_example']['_name.category_id'].lower()=='attributes')
        self.failUnless(self.ddldic.get_parent('description_example')=='attributes')

    def testChangeName(self):
        """Test that changing the object_id works"""
        self.ddldic.change_name('_description.common','uncommon')
        self.failUnless('_description.uncommon' in self.ddldic)
        self.failUnless('_description.common' not in self.ddldic)
        self.failUnless(self.ddldic['_description.uncommon']['_name.object_id']=='uncommon')
        self.failUnless(self.ddldic['_description.uncommon']['_definition.id']=='_description.uncommon')

    def testNewCategory(self):
        """Test that we can add a new category"""
        self.ddldic.add_category('brand-new')
        self.failUnless('brand-new' in self.ddldic)
        self.failUnless(self.ddldic['brand-new']['_name.object_id']=='brand-new')
        self.failUnless(self.ddldic.get_parent('brand-new').lower()=='attributes')
        self.failUnless(self.ddldic['brand-new']['_name.category_id'].lower()=='attributes')

    def testNewDefinition(self):
        """Test that we can add a new definition"""
        self.ddldic.add_definition('_junkety._junkjunk_','description')
        self.failUnless('_description.junkjunk' in self.ddldic)
        self.failUnless(self.ddldic['_description.junkjunk']['_name.category_id'].lower()=='description')
        self.failUnless(self.ddldic['_description.junkjunk']['_name.object_id']=='junkjunk')
        self.failUnless(self.ddldic['_description.junkjunk']['_definition.id']=='_description.junkjunk')

    def testDeleteDefinition(self):
        """Test that we can delete a definition"""
        self.ddldic.remove_definition('_alias.deprecation_date')
        self.failUnless('_alias.deprecation_date' not in self.ddldic)

    def testDeleteCategory(self):
        """test that we can delete whole categories"""
        self.ddldic.remove_definition('description')
        self.failUnless('description' not in self.ddldic)
        self.failUnless('description_example' not in self.ddldic)

Ancestors (in MRO)

class FakeDicTestCase

class FakeDicTestCase(unittest.TestCase):
# we test stuff that hasn't been used in official dictionaries to date.
    def setUp(self):
        self.testcif = CifFile.CifFile("dictionaries/novel_test.cif")

    def testTypeConstruct(self):
        self.assertRaises(CifFile.ValidCifError,CifFile.ValidCifFile,
                           diclist=["dictionaries/novel.dic"],datasource=self.testcif)

Ancestors (in MRO)

class FileWriteTestCase

class FileWriteTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again. Non alphabetic ordering to
          check order preservation and mixed case."""
       # fill up the block with stuff
       items = (('_item_1','Some data'),
             ('_item_3','34.2332'),
             ('_item_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'),
             ('_item_2','Some_underline_data'),
             ('_item_empty',''),
             ('_item_quote',"'ABC"),
             ('_item_apost','"def'),
             ('_item_sws'," \n "),
             (('_item_5','_item_7','_item_6'),
             ([1,2,3,4],
              ['a','b','c','d'],
              [5,6,7,8])),
             (('_string_1','_string_2'),
              ([';this string begins with a semicolon',
               'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough',
               ';just_any_old_semicolon-starting-string'],
               ['a string with a final quote"',
               'a string with a " and a safe\';',
               'a string with a final \''])))
       # save block items as well
       s_items = (('_sitem_1','Some save data'),
             ('_sitem_2','Some_underline_data'),
             ('_sitem_3','34.2332'),
             ('_sitem_4','Some very long data which we hope will overflow the single line and force printing of another line aaaaa bbbbbb cccccc dddddddd eeeeeeeee fffffffff hhhhhhhhh iiiiiiii jjjjjj'),
             (('_sitem_5','_sitem_6','_sitem_7'),
             ([1,2,3,4],
              [5,6,7,8],
              ['a','b','c','d'])),
             (('_string_1','_string_2'),
              ([';this string begins with a semicolon',
               'this string is way way too long and should overflow onto the next line eventually if I keep typing for long enough',
               ';just_any_old_semicolon-starting-string'],
               ['a string with a final quote"',
               'a string with a " and a safe\';',
               'a string with a final \''])))
       self.cf = CifFile.CifBlock(items)
       cif = CifFile.CifFile(scoping='dictionary')
       cif['Testblock'] = self.cf
       # Add some comments
       self.cf.AddComment('_item_empty',"Test of an empty string")
       self.cf.AddComment('_item_apost',"Test of a trailing apostrophe")
       self.save_block = CifFile.CifBlock(s_items)
       cif.NewBlock("test_Save_frame",self.save_block,parent='testblock')
       self.cfs = cif["test_save_frame"]
       outfile = open('test.cif','w')
       outfile.write(str(cif))
       outfile.close()
       self.ef = CifFile.CifFile('test.cif',scoping='dictionary')
       self.df = self.ef['testblock']
       self.dfs = self.ef["test_save_frame"]
       flfile = CifFile.ReadCif('test.cif',scantype="flex",scoping='dictionary')
       # test passing a stream directly
       tstream = open('test.cif')
       CifFile.CifFile(tstream,scantype="flex")
       self.flf = flfile['testblock']
       self.flfs = flfile["Test_save_frame"]

   def tearDown(self):
       import os
       #os.remove('test.cif')
       del self.dfs
       del self.df
       del self.cf
       del self.ef
       del self.flf
       del self.flfs

   def testStringInOut(self):
       """Test writing short strings in and out"""
       self.failUnless(self.cf['_item_1']==self.df['_item_1'])
       self.failUnless(self.cf['_item_2']==self.df['_item_2'])
       self.failUnless(self.cfs['_sitem_1']==self.dfs['_sitem_1'])
       self.failUnless(self.cfs['_sitem_2']==self.dfs['_sitem_2'])
       self.failUnless(self.cfs['_sitem_1']==self.flfs['_sitem_1'])
       self.failUnless(self.cfs['_sitem_2']==self.flfs['_sitem_2'])

   def testApostropheInOut(self):
       """Test correct behaviour for values starting with apostrophes
       or quotation marks"""
       self.failUnless(self.cf['_item_quote']==self.df['_item_quote'])
       self.failUnless(self.cf['_item_apost']==self.df['_item_apost'])
       self.failUnless(self.cf['_item_quote']==self.flf['_item_quote'])
       self.failUnless(self.cf['_item_apost']==self.flf['_item_apost'])
       
   def testNumberInOut(self):
       """Test writing number in and out"""
       self.failUnless(self.cf['_item_3']==(self.df['_item_3']))
       self.failUnless(self.cfs['_sitem_3']==(self.dfs['_sitem_3']))
       self.failUnless(self.cf['_item_3']==(self.flf['_item_3']))
       self.failUnless(self.cfs['_sitem_3']==(self.flfs['_sitem_3']))

   def testLongStringInOut(self):
       """Test writing long string in and out
          Note that whitespace may vary due to carriage returns,
	  so we remove all returns before comparing"""
       import re
       compstring = re.sub('\n','',self.df['_item_4'])
       self.failUnless(compstring == self.cf['_item_4'])
       compstring = re.sub('\n','',self.dfs['_sitem_4'])
       self.failUnless(compstring == self.cfs['_sitem_4'])
       compstring = re.sub('\n','',self.flf['_item_4'])
       self.failUnless(compstring == self.cf['_item_4'])
       compstring = re.sub('\n','',self.flfs['_sitem_4'])
       self.failUnless(compstring == self.cfs['_sitem_4'])

   def testEmptyStringInOut(self):
       """An empty string is in fact kosher""" 
       self.failUnless(self.cf['_item_empty']=='')
       self.failUnless(self.flf['_item_empty']=='')

   def testSemiWhiteSpace(self):
       """Test that white space in a semicolon string is preserved"""
       self.failUnless(self.cf['_item_sws']==self.df['_item_sws'])
       self.failUnless(self.cf['_item_sws']==self.flf['_item_sws'])

   def testLoopDataInOut(self):
       """Test writing in and out loop data"""
       olditems = self.cf.GetLoop('_item_5')
       for key,value in olditems.items():
           self.failUnless(tuple(map(str,value))==tuple(self.df[key]))
           self.failUnless(tuple(map(str,value))==tuple(self.flf[key]))
       # save frame test
       olditems = self.cfs.GetLoop('_sitem_5').items()
       for key,value in olditems:
           self.failUnless(tuple(map(str,value))==tuple(self.dfs[key]))
           self.failUnless(tuple(map(str,value))==tuple(self.flfs[key]))

   def testLoopStringInOut(self):
       """Test writing in and out string loop data"""
       olditems = self.cf.GetLoop('_string_1')
       newitems = self.df.GetLoop('_string_1')
       flexnewitems = self.flf.GetLoop('_string_1')
       for key,value in olditems.items():
           compstringa = map(lambda a:re.sub('\n','',a),value)
           compstringb = map(lambda a:re.sub('\n','',a),self.df[key])
           compstringc = map(lambda a:re.sub('\n','',a),self.flf[key])
           self.failUnless(compstringa==compstringb and compstringa==compstringc)

   def testGetLoopData(self):
       """Test the get method for looped data"""
       newvals = self.df.get('_string_1')
       self.failUnless(len(newvals)==3)

   def testCopySaveFrame(self):
       """Early implementations didn't copy the save frame properly"""
       jj = CifFile.CifFile(self.ef,scoping='dictionary')  #this will trigger a copy
       self.failUnless(len(jj["test_save_frame"])>0)

   def testFirstBlock(self):
       """Test that first_block returns a block"""
       self.ef.scoping = 'instance'  #otherwise all blocks are available
       jj = self.ef.first_block()
       self.failUnless(jj==self.df)

   def testDupName(self):
       """Test that duplicate blocknames are allowed in non-standard mode"""
       outstr = """data_block1 _data_1 b save_ab1 _data_2 c
                  save_
                  save_ab1 _data_3 d save_"""
       b = open("test2.cif","w")
       b.write(outstr)
       b.close()
       testin = CifFile.CifFile("test2.cif",standard=None)

Ancestors (in MRO)

class GrammarTestCase

class GrammarTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a file, then read it in again."""
       teststr1_0 = """
       #A test CIF file, grammar version 1.0 conformant
       data_Test
         _item_1 'A simple item'
         _item_2 '(Bracket always ok in quotes)'
         _item_3 (can_have_bracket_here_if_1.0)
       """
       f = open("test_1.0","w")
       f.write(teststr1_0)
       f.close()

   def tearDown(self):
	pass

   def testold(self):
       """Read in 1.0 conformant file; should not fail"""
       f = CifFile.ReadCif("test_1.0",grammar="1.0")  
       print f["test"]["_item_3"]
      
   def testNew(self):
       """Read in a 1.0 conformant file with 1.1 grammar; should fail"""
       try:
           f = CifFile.ReadCif("test_1.0",grammar="1.1")  
       except StarFile.StarError:
           pass

   def testObject(self):
       """Test use of grammar keyword when initialising object"""
       try:
           f = CifFile.CifFile("test_1.0",grammar="1.0")
       except StarFile.StarError:
           pass

Ancestors (in MRO)

class ParentChildTestCase

class ParentChildTestCase(unittest.TestCase):
   def setUp(self):
       """Write out a multi-save-frame file, read in again"""
       outstring = """
data_Toplevel
 _item_1         a
 save_1
   _s1_item1     b
   save_12
   _s12_item1    c
   save_
   save_13
   _s13_item1    d
   save_
 save_
 _item_2         e
 save_2
   _s2_item1     f
   save_21
   _s21_item1    g
     save_211
     _s211_item1 h
     save_
     save_212
     _s212_item1 i
     save_
    save_
   save_22
    _s22_item1   j
   save_
 save_
 save_toplevel
   _item_1       k
 save_
"""
       f = open('save_test.cif','w')
       f.write(outstring)
       f.close()
       self.testcif = CifFile.CifFile('save_test.cif',scoping='dictionary')

   def testGoodRead(self):
       """Check that there is a top level block"""
       self.failUnless('toplevel+' in [a[0] for a in self.testcif.child_table.items() if a[1].parent is None])
       self.failUnless(self.testcif.child_table['toplevel'].parent == 'toplevel+')

   def testGetParent(self):
       """Check that parent is correctly identified"""
       self.failUnless(self.testcif.get_parent('212')=='21')
       self.failUnless(self.testcif.get_parent('12')=='1')

   def testGetChildren(self):
       """Test that our child blocks are constructed correctly"""
       p = self.testcif.get_children('1')
       self.failUnless(p.has_key('13'))
       self.failUnless(not p.has_key('1'))
       self.failUnless(p.get_parent('13')==None)
       self.failUnless(p['12']['_s12_item1']=='c')

   def testGetChildrenwithParent(self):
       """Test that the parent is included if necessary"""
       p = self.testcif.get_children('1',include_parent=True)
       self.failUnless(p.has_key('1')) 
       self.failUnless(p.get_parent('13')=='1')
  
   def testSetParent(self):
       """Test that the parent is correctly set"""
       self.testcif.set_parent('1','211')
       q = self.testcif.get_children('1')
       self.failUnless('211' in q.keys())

   def testChangeParent(self):
       """Test that a duplicated save frame is OK if the duplicate name is a data block"""
       self.failUnless('toplevel+' in self.testcif.keys())
       self.failUnless(self.testcif.get_parent('1')=='toplevel+')

   def testRename1(self):
       """Test that re-identifying a datablock works"""
       self.testcif._rekey('2','timey-wimey')
       self.failUnless(self.testcif.get_parent('21')=='timey-wimey')
       self.failUnless(self.testcif.has_key('timey-wimey'))
       self.failUnless(self.testcif['timey-wimey']['_s2_item1']=='f')
       print str(self.testcif)
 
   def testRename2(self):
       """Test that renamng a block works"""
       self.testcif.rename('2','Timey-wimey')
       self.failUnless(self.testcif.has_key('timey-wimey'))
       self.failUnless(self.testcif.child_table['timey-wimey'].block_id=='Timey-wimey')
   
   def testUnlock(self):
       """Test that unlocking will change overwrite flag"""
       self.testcif['2'].overwrite = False
       self.testcif.unlock()
       self.failUnless(self.testcif['2'].overwrite is True)

Ancestors (in MRO)

class TemplateTestCase

class TemplateTestCase(unittest.TestCase):
   def setUp(self):
       """Create a template"""
       self.template = """#
# Template
#
data_TEST_DIC
 
    _dictionary.title            DDL_DIC
    _definition.update           2011-07-27
    _description.text
;
     This dictionary specifies through its layout how we desire to
     format datanames.  It is not a valid dictionary, but it must 
     be a valid CIF file.
;

    _name.object_id              ALIAS
    _category.key_id           '_alias.definition_id'
    _category.key_list        ['_alias.definition_id']
    _type.purpose                Key     
    _type.dimension              [*]
    _import.get    [{"file":'templ_enum.cif',"save":'units_code'}]
     loop_
    _enumeration_set.state
    _enumeration_set.detail
          Dictionary        "applies to all defined items in the dictionary"
          Category          "applies to all defined items in the category"
          Item              "applies to a single item definition"
    _enumeration.default        Item   
"""  

   def testTemplateInput(self):
       """Test that an output template is successfully input"""
       p = CifFile.CifBlock()
       p.process_template(self.template)
       self.failUnless(p.form_hints[0]['dataname']=='_dictionary.title')
       self.failUnless(p.form_hints[4]['column']==31)
       self.failUnless(p.form_hints[2]['delimiter']==';')
       self.failUnless(p.form_hints[9]['column']==10)
       self.failUnless(p.form_hints[10]['delimiter']=='"')

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/TypeContentsParser.m.html000066400000000000000000001503551345362224200222460ustar00rootroot00000000000000 CifFile.TypeContentsParser API documentation Top

CifFile.TypeContentsParser module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value


# Begin -- grammar generated by Yapps
import sys, re
from . import yapps3_compiled_rt as yappsrt

class TypeParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r])', '([ \t\n\r])'),
         ('container', '[A-Za-z]+\\('),
         ('identifier', '[A-Za-z]+'),
         ('c_c_b', '\\)'),
         ('o_c_b', '\\('),
         ('comma', '\\,'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r])'],*args,**kwargs)

class TypeParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [])
        base_element = self.base_element(_context)
        p = [base_element]
        while self._peek('END', 'comma') == 'comma':
            comma = self._scan('comma')
            base_element = self.base_element(_context)
            p.append(base_element)
        if self._peek() not in ['END', 'comma']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'END']))
        END = self._scan('END')
        if len(p)==1: p = p[0]
        return p

    def base_element(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'base_element', [])
        _token = self._peek('container', 'identifier')
        if _token == 'container':
            container = self._scan('container')
            element_list = self.element_list(_context)
            c_c_b = self._scan('c_c_b')
            return element_list
        else: # == 'identifier'
            identifier = self._scan('identifier')
        return identifier

    def element_list(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'element_list', [])
        base_element = self.base_element(_context)
        p = [base_element]
        while self._peek('comma', 'c_c_b') == 'comma':
            comma = self._scan('comma')
            base_element = self.base_element(_context)
            p.append(base_element)
        if self._peek() not in ['comma', 'c_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'c_c_b']))
        return p


def parse(rule, text):
    P = TypeParser(TypeParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Classes

class TypeParser

class TypeParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [])
        base_element = self.base_element(_context)
        p = [base_element]
        while self._peek('END', 'comma') == 'comma':
            comma = self._scan('comma')
            base_element = self.base_element(_context)
            p.append(base_element)
        if self._peek() not in ['END', 'comma']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'END']))
        END = self._scan('END')
        if len(p)==1: p = p[0]
        return p

    def base_element(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'base_element', [])
        _token = self._peek('container', 'identifier')
        if _token == 'container':
            container = self._scan('container')
            element_list = self.element_list(_context)
            c_c_b = self._scan('c_c_b')
            return element_list
        else: # == 'identifier'
            identifier = self._scan('identifier')
        return identifier

    def element_list(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'element_list', [])
        base_element = self.base_element(_context)
        p = [base_element]
        while self._peek('comma', 'c_c_b') == 'comma':
            comma = self._scan('comma')
            base_element = self.base_element(_context)
            p.append(base_element)
        if self._peek() not in ['comma', 'c_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'c_c_b']))
        return p

Ancestors (in MRO)

Class variables

var Context

class TypeParserScanner

class TypeParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r])', '([ \t\n\r])'),
         ('container', '[A-Za-z]+\\('),
         ('identifier', '[A-Za-z]+'),
         ('c_c_b', '\\)'),
         ('o_c_b', '\\('),
         ('comma', '\\,'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r])'],*args,**kwargs)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/YappsStarParser_1_0.m.html000066400000000000000000004270111345362224200221700ustar00rootroot00000000000000 CifFile.YappsStarParser_1_0 API documentation Top

CifFile.YappsStarParser_1_0 module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from .StarFile import StarBlock,StarFile,StarList,StarDict
from io import StringIO
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
     from .StarFile import remove_line_folding, remove_line_prefix
     # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          # apply protocols
          nv = remove_line_prefix(nv)
          nv = remove_line_folding(nv)
          return nv
     else:
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print('Making loop with %s' % repr(itemlists))
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
        print(error_string, file=sys.stderr)
        raise ValueError(error_string)

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names.

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name)
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    it = iter(iterable)
    while 1:
        yield next(it), next(it)


# Begin -- grammar generated by Yapps
import sys, re
from . import yapps3_compiled_rt as yappsrt

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared;allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'start_sc_line')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = stripstring(data_value_1)
        else: # == 'start_sc_line'
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        return monitor('data_value',thisval)

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        toploop=[]
        while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name':
            data_name = self._scan('data_name')
            toploop.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line']))
        return toploop

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)


def parse(rule, text):
    P = StarParser(StarParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Classes

class StarParser

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared;allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'start_sc_line')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = stripstring(data_value_1)
        else: # == 'start_sc_line'
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        return monitor('data_value',thisval)

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        toploop=[]
        while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name':
            data_name = self._scan('data_name')
            toploop.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line']))
        return toploop

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

Ancestors (in MRO)

Class variables

var Context

class StarParserScanner

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/YappsStarParser_1_1.m.html000066400000000000000000004276411345362224200222020ustar00rootroot00000000000000 CifFile.YappsStarParser_1_1 API documentation Top

CifFile.YappsStarParser_1_1 module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from .StarFile import StarBlock,StarFile,StarList,StarDict
from io import StringIO
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
     from .StarFile import remove_line_folding, remove_line_prefix
     # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          # apply protocols
          nv = remove_line_prefix(nv)
          nv = remove_line_folding(nv)
          return nv
     else:
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print('Making loop with %s' % repr(itemlists))
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
        print(error_string, file=sys.stderr)
        raise ValueError(error_string)

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names.

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name)
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    it = iter(iterable)
    while 1:
        yield next(it), next(it)


# Begin -- grammar generated by Yapps
import sys, re
from . import yapps3_compiled_rt as yappsrt

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared;allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'start_sc_line')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = stripstring(data_value_1)
        else: # == 'start_sc_line'
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        return monitor('data_value',thisval)

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        toploop=[]
        while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name':
            data_name = self._scan('data_name')
            toploop.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line']))
        return toploop

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)


def parse(rule, text):
    P = StarParser(StarParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Classes

class StarParser

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared;allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'start_sc_line')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = stripstring(data_value_1)
        else: # == 'start_sc_line'
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        return monitor('data_value',thisval)

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        toploop=[]
        while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name':
            data_name = self._scan('data_name')
            toploop.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line']))
        return toploop

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=act_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

Ancestors (in MRO)

Class variables

var Context

class StarParserScanner

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\(\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/YappsStarParser_2_0.m.html000066400000000000000000006305331345362224200221760ustar00rootroot00000000000000 CifFile.YappsStarParser_2_0 API documentation Top

CifFile.YappsStarParser_2_0 module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from .StarFile import StarBlock,StarFile,StarList,StarDict
from io import StringIO
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
     from .StarFile import remove_line_folding, remove_line_prefix
     # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          # apply protocols
          nv = remove_line_prefix(nv)
          nv = remove_line_folding(nv)
          return nv
     else:
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print('Making loop with %s' % repr(itemlists))
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
        print(error_string, file=sys.stderr)
        raise ValueError(error_string)

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names.

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name)
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    it = iter(iterable)
    while 1:
        yield next(it), next(it)


# Begin -- grammar generated by Yapps
import sys, re
from . import yapps3_compiled_rt as yappsrt

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('":"', ':'),
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('c_c_b', '\\}'),
         ('o_c_b', '\\{'),
         ('c_s_b', '\\]'),
         ('o_s_b', '\\['),
         ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'),
         ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'),
         ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\{\\}\\[\\]][^\\s\\{\\}\\[\\]]*)'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]
        while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(stored_block, _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = delimited_data_value
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            dataseq = self.dataseq(savebc[savehead], _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            dataseq = self.dataseq(savebc[savehead], _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') != 'c_s_b':
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b') != 'c_s_b':
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'c_s_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        table_as_list = []
        while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value') != 'c_c_b':
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list = [delimited_data_value]
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
            while self._peek('triple_quote_data_value', 'single_quote_data_value', 'c_c_b') != 'c_c_b':
                delimited_data_value = self.delimited_data_value(_context)
                table_as_list.append(delimited_data_value)
                self._scan('":"')
                data_value = self.data_value(_context)
                table_as_list.append(data_value)
            if self._peek() not in ['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))


def parse(rule, text):
    P = StarParser(StarParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Classes

class StarParser

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]
        while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(stored_block, _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = delimited_data_value
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            dataseq = self.dataseq(savebc[savehead], _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            dataseq = self.dataseq(savebc[savehead], _context)
        if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') != 'c_s_b':
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b') != 'c_s_b':
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'c_s_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        table_as_list = []
        while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value') != 'c_c_b':
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list = [delimited_data_value]
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
            while self._peek('triple_quote_data_value', 'single_quote_data_value', 'c_c_b') != 'c_c_b':
                delimited_data_value = self.delimited_data_value(_context)
                table_as_list.append(delimited_data_value)
                self._scan('":"')
                data_value = self.data_value(_context)
                table_as_list.append(data_value)
            if self._peek() not in ['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))

Ancestors (in MRO)

Class variables

var Context

class StarParserScanner

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('":"', ':'),
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('c_c_b', '\\}'),
         ('o_c_b', '\\{'),
         ('c_s_b', '\\]'),
         ('o_s_b', '\\['),
         ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'),
         ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'),
         ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\{\\}\\[\\]][^\\s\\{\\}\\[\\]]*)'),
         ('END', '$'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/YappsStarParser_DDLm.m.html000066400000000000000000005666451345362224200224120ustar00rootroot00000000000000 CifFile.YappsStarParser_DDLm API documentation Top

CifFile.YappsStarParser_DDLm module

from StarFile import *
from types import *
import copy
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,`value`)
    lastval = `value`
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
    # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          return nv 
     else: 
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .   
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print 'Making loop with %s' % `itemlists`
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # print 'Makeloop constructed %s' % `loopstructure`
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % `loop_seq`
        print >>sys.stderr, error_string
        raise ValueError, error_string

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names. 

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all 
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (`old_keys`,`new_dict.keys()`)
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError, "Duplicate dataname or blockname %s in input file" % new_key
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name) 
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    itnext = iter(iterable).next
    while 1:
        yield itnext(), itnext()


# Begin -- grammar generated by Yapps
import sys, re
import yapps3_compiled_rt as yappsrt

class StarParserScanner(yappsrt.Scanner):
    patterns = [
        ('":"', re.compile(':')),
        ('","', re.compile(',')),
        ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')),
        ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')),
        ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')),
        ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')),
        ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')),
        ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
        ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')),
        ('data_name', re.compile(u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0010fffd_-]+')),
        ('data_heading', re.compile(u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0010fffd-]+')),
        ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')),
        ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')),
        ('end_sc_line', re.compile(';')),
        ('c_c_b', re.compile('\\}')),
        ('o_c_b', re.compile('\\{')),
        ('c_s_b', re.compile('\\]')),
        ('o_s_b', re.compile('\\[')),
        ('dat_val_internal_sq', re.compile('\\[([^\\s\\[\\]]*)\\]')),
        ('triple_quote_data_value', re.compile('(?s)\'\'\'.*?\'\'\'|""".*"""')),
        ('single_quote_data_value', re.compile('\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+')),
        ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\',_\\{\\}\\[\\]][^\\s,\\{\\}\\[\\]]*)')),
        ('END', re.compile('$')),
    ]
    def __init__(self, str):
        yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str)

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);thisbc.NewBlock(heading,StarBlock(overwrite=False))
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(_context)
                thisbc.merge_fast(save_frame,parent=heading)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = stripstring(delimited_data_value)
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = start_sc_line
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines = lines+sc_line_of_text
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        return monitor('sc_line_of_text',lines+end_sc_line)

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') not in ['c_s_b', '","']:
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == '","':
                self._scan('","')
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', '","', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        delimited_data_value = self.delimited_data_value(_context)
        table_as_list = [delimited_data_value]
        self._scan('":"')
        data_value = self.data_value(_context)
        table_as_list.append(data_value)
        while self._peek('c_c_b', '","') == '","':
            self._scan('","')
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list.append(delimited_data_value)
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
        if self._peek() not in ['c_c_b', '","']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'c_c_b']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();savebc.NewBlock(savehead,StarBlock(overwrite=False))
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(_context)
                savebc.merge_fast(save_frame,parent=savehead)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)


def parse(rule, text):
    P = StarParser(StarParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Module variables

var StringTypes

Classes

class StarParser

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);thisbc.NewBlock(heading,StarBlock(overwrite=False))
        while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(thisbc[heading], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(_context)
                thisbc.merge_fast(save_frame,parent=heading)
        if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']))
        thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = stripstring(delimited_data_value)
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = start_sc_line
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines = lines+sc_line_of_text
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        return monitor('sc_line_of_text',lines+end_sc_line)

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') not in ['c_s_b', '","']:
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == '","':
                self._scan('","')
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
                raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', '","', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        delimited_data_value = self.delimited_data_value(_context)
        table_as_list = [delimited_data_value]
        self._scan('":"')
        data_value = self.data_value(_context)
        table_as_list.append(data_value)
        while self._peek('c_c_b', '","') == '","':
            self._scan('","')
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list.append(delimited_data_value)
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
        if self._peek() not in ['c_c_b', '","']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'c_c_b']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();savebc.NewBlock(savehead,StarBlock(overwrite=False))
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(_context)
                savebc.merge_fast(save_frame,parent=savehead)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.SyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

Ancestors (in MRO)

Class variables

var Context

class StarParserScanner

class StarParserScanner(yappsrt.Scanner):
    patterns = [
        ('":"', re.compile(':')),
        ('","', re.compile(',')),
        ('([ \t\n\r](?!;))|[ \t]', re.compile('([ \t\n\r](?!;))|[ \t]')),
        ('(#.*[\n\r](?!;))|(#.*)', re.compile('(#.*[\n\r](?!;))|(#.*)')),
        ('LBLOCK', re.compile('(L|l)(O|o)(O|o)(P|p)_')),
        ('GLOBAL', re.compile('(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_')),
        ('STOP', re.compile('(S|s)(T|t)(O|o)(P|p)_')),
        ('save_heading', re.compile('(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+')),
        ('save_end', re.compile('(S|s)(A|a)(V|v)(E|e)_')),
        ('data_name', re.compile(u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0010fffd_-]+')),
        ('data_heading', re.compile(u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0010fffd-]+')),
        ('start_sc_line', re.compile('(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+')),
        ('sc_line_of_text', re.compile('[^;\r\n]([^\r\n])*(\r\n|\r|\n)+')),
        ('end_sc_line', re.compile(';')),
        ('c_c_b', re.compile('\\}')),
        ('o_c_b', re.compile('\\{')),
        ('c_s_b', re.compile('\\]')),
        ('o_s_b', re.compile('\\[')),
        ('dat_val_internal_sq', re.compile('\\[([^\\s\\[\\]]*)\\]')),
        ('triple_quote_data_value', re.compile('(?s)\'\'\'.*?\'\'\'|""".*"""')),
        ('single_quote_data_value', re.compile('\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+')),
        ('data_value_1', re.compile('((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\',_\\{\\}\\[\\]][^\\s,\\{\\}\\[\\]]*)')),
        ('END', re.compile('$')),
    ]
    def __init__(self, str):
        yappsrt.Scanner.__init__(self,None,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],str)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/YappsStarParser_STAR2.m.html000066400000000000000000006515151345362224200224540ustar00rootroot00000000000000 CifFile.YappsStarParser_STAR2 API documentation Top

CifFile.YappsStarParser_STAR2 module

# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from .StarFile import StarBlock,StarFile,StarList,StarDict
from io import StringIO
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
     from .StarFile import remove_line_folding, remove_line_prefix
     # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          # apply protocols
          nv = remove_line_prefix(nv)
          nv = remove_line_folding(nv)
          return nv
     else:
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print('Making loop with %s' % repr(itemlists))
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
        print(error_string, file=sys.stderr)
        raise ValueError(error_string)

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names.

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name)
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    it = iter(iterable)
    while 1:
        yield next(it), next(it)


# Begin -- grammar generated by Yapps
import sys, re
from . import yapps3_compiled_rt as yappsrt

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('":"', ':'),
         ('","', ','),
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('c_c_b', '\\}'),
         ('o_c_b', '\\{'),
         ('c_s_b', '\\]'),
         ('o_s_b', '\\['),
         ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'),
         ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'),
         ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'),
         ('END', '$'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\',_\\{\\}\\[\\]][^\\s,\\{\\}\\[\\]]*)'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]
        while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(stored_block, _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = delimited_data_value
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') not in ['c_s_b', '","']:
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == '","':
                self._scan('","')
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', '","', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        table_as_list = []
        while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","') in ['triple_quote_data_value', 'single_quote_data_value']:
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list = [delimited_data_value]
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
            while self._peek('","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b') == '","':
                self._scan('","')
                delimited_data_value = self.delimited_data_value(_context)
                table_as_list.append(delimited_data_value)
                self._scan('":"')
                data_value = self.data_value(_context)
                table_as_list.append(data_value)
            if self._peek() not in ['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b', '","']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))


def parse(rule, text):
    P = StarParser(StarParserScanner(text))
    return yappsrt.wrap_error_reporter(P, rule)

# End -- grammar generated by Yapps

Classes

class StarParser

class StarParser(yappsrt.Parser):
    Context = yappsrt.Context
    def input(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared])
        _token = self._peek('END', 'data_heading')
        if _token == 'data_heading':
            dblock = self.dblock(prepared, _context)
            allblocks = prepared; allblocks.merge_fast(dblock)
            while self._peek('END', 'data_heading') == 'data_heading':
                dblock = self.dblock(prepared, _context)
                allblocks.merge_fast(dblock)
            if self._peek() not in ['END', 'data_heading']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading']))
            END = self._scan('END')
        else: # == 'END'
            END = self._scan('END')
            allblocks = prepared
        return allblocks

    def dblock(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared])
        data_heading = self._scan('data_heading')
        heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]
        while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(stored_block, _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                thisbc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))

    def dataseq(self, starblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock])
        data = self.data(starblock, _context)
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['LBLOCK', 'data_name']:
            data = self.data(starblock, _context)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))

    def data(self, currentblock, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock])
        _token = self._peek('LBLOCK', 'data_name')
        if _token == 'LBLOCK':
            top_loop = self.top_loop(_context)
            makeloop(currentblock,top_loop)
        else: # == 'data_name'
            datakvpair = self.datakvpair(_context)
            currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)

    def datakvpair(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', [])
        data_name = self._scan('data_name')
        data_value = self.data_value(_context)
        return [data_name,data_value]

    def data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'data_value', [])
        _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b')
        if _token == 'data_value_1':
            data_value_1 = self._scan('data_value_1')
            thisval = data_value_1
        elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']:
            delimited_data_value = self.delimited_data_value(_context)
            thisval = delimited_data_value
        elif _token == 'start_sc_line':
            sc_lines_of_text = self.sc_lines_of_text(_context)
            thisval = stripextras(sc_lines_of_text)
        else: # in ['o_s_b', 'o_c_b']
            bracket_expression = self.bracket_expression(_context)
            thisval = bracket_expression
        return monitor('data_value',thisval)

    def delimited_data_value(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', [])
        _token = self._peek('triple_quote_data_value', 'single_quote_data_value')
        if _token == 'triple_quote_data_value':
            triple_quote_data_value = self._scan('triple_quote_data_value')
            thisval = striptriple(triple_quote_data_value)
        else: # == 'single_quote_data_value'
            single_quote_data_value = self._scan('single_quote_data_value')
            thisval = stripstring(single_quote_data_value)
        return thisval

    def sc_lines_of_text(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', [])
        start_sc_line = self._scan('start_sc_line')
        lines = StringIO();lines.write(start_sc_line)
        while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text':
            sc_line_of_text = self._scan('sc_line_of_text')
            lines.write(sc_line_of_text)
        if self._peek() not in ['end_sc_line', 'sc_line_of_text']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line']))
        end_sc_line = self._scan('end_sc_line')
        lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())

    def bracket_expression(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', [])
        _token = self._peek('o_s_b', 'o_c_b')
        if _token == 'o_s_b':
            square_bracket_expr = self.square_bracket_expr(_context)
            return square_bracket_expr
        else: # == 'o_c_b'
            curly_bracket_expr = self.curly_bracket_expr(_context)
            return curly_bracket_expr

    def top_loop(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', [])
        LBLOCK = self._scan('LBLOCK')
        loopfield = self.loopfield(_context)
        loopvalues = self.loopvalues(_context)
        return loopfield,loopvalues

    def loopfield(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', [])
        loop_seq=[]
        while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name':
            data_name = self._scan('data_name')
            loop_seq.append(data_name)
        if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        return loop_seq

    def loopvalues(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', [])
        data_value = self.data_value(_context)
        dataloop=[data_value]
        while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            data_value = self.data_value(_context)
            dataloop.append(monitor('loopval',data_value))
        if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']))
        return dataloop

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def save_frame(self, prepared, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared])
        save_heading = self._scan('save_heading')
        savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname]
        while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']:
            _token = self._peek('save_heading', 'LBLOCK', 'data_name')
            if _token != 'save_heading':
                dataseq = self.dataseq(savebc[savehead], _context)
            else: # == 'save_heading'
                save_frame = self.save_frame(prepared, _context)
                savebc.merge_fast(save_frame,parent=stored_block)
        if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']))
        save_end = self._scan('save_end')
        return monitor('save_frame',savebc)

    def square_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', [])
        o_s_b = self._scan('o_s_b')
        this_list = []
        while self._peek('c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') not in ['c_s_b', '","']:
            data_value = self.data_value(_context)
            this_list.append(data_value)
            while self._peek('","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == '","':
                self._scan('","')
                data_value = self.data_value(_context)
                this_list.append(data_value)
            if self._peek() not in ['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']))
        if self._peek() not in ['c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', '","', 'o_s_b', 'o_c_b']))
        c_s_b = self._scan('c_s_b')
        return StarList(this_list)

    def curly_bracket_expr(self, _parent=None):
        _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', [])
        o_c_b = self._scan('o_c_b')
        table_as_list = []
        while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","') in ['triple_quote_data_value', 'single_quote_data_value']:
            delimited_data_value = self.delimited_data_value(_context)
            table_as_list = [delimited_data_value]
            self._scan('":"')
            data_value = self.data_value(_context)
            table_as_list.append(data_value)
            while self._peek('","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b') == '","':
                self._scan('","')
                delimited_data_value = self.delimited_data_value(_context)
                table_as_list.append(delimited_data_value)
                self._scan('":"')
                data_value = self.data_value(_context)
                table_as_list.append(data_value)
            if self._peek() not in ['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b']:
                raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b']))
        if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","']:
            raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b', '","']))
        c_c_b = self._scan('c_c_b')
        return StarDict(pairwise(table_as_list))

Ancestors (in MRO)

Class variables

var Context

class StarParserScanner

class StarParserScanner(yappsrt.Scanner):
    def __init__(self, *args,**kwargs):
        patterns = [
         ('":"', ':'),
         ('","', ','),
         ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'),
         ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'),
         ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'),
         ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'),
         ('STOP', '(S|s)(T|t)(O|o)(P|p)_'),
         ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('save_end', '(S|s)(A|a)(V|v)(E|e)_'),
         ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'),
         ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'),
         ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'),
         ('end_sc_line', ';'),
         ('c_c_b', '\\}'),
         ('o_c_b', '\\{'),
         ('c_s_b', '\\]'),
         ('o_s_b', '\\['),
         ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'),
         ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'),
         ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'),
         ('END', '$'),
         ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\',_\\{\\}\\[\\]][^\\s,\\{\\}\\[\\]]*)'),
        ]
        yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs)

Ancestors (in MRO)

pycifrw-4.4/docs/CifFile/index.html000066400000000000000000000646131345362224200173070ustar00rootroot00000000000000 CifFile API documentation Top

CifFile module

from __future__ import absolute_import
print("Name is " + repr(__name__))
from .StarFile import StarError,ReadStar,StarList,apply_line_folding,apply_line_prefix
from .CifFile_module import CifDic,CifError, CifBlock,ReadCif,ValidCifFile,ValidCifError,Validate,CifFile
from .CifFile_module import get_number_with_esd,convert_type,validate_report
from .StarFile import remove_line_prefix,remove_line_folding
from .StarFile import check_stringiness

Sub-modules

CifFile.StarScan

A tokeniser for Star files

pycifrw-4.4/docs/dict_use.html000066400000000000000000000205111345362224200164630ustar00rootroot00000000000000

Using CIF dictionaries with PyCIFRW

Introduction

CIF dictionaries describe the meaning of the datanames found in CIF data files in a machine-readable format - the CIF format. Each block in a CIF dictionary defines a single dataname by assigning values to a limited set of attributes. This set of attributes used by a dictionary is called its 'Dictionary Definition Language' or DDL. Three languages have been used in IUCr-supported CIF dictionaries: DDL1 (the original language), DDL2 (heavily developed by the macromolecular community), and DDLm (a new standard that aims to unite the best of DDL1 and DDL2). DDL2 and DDLm both allow algorithms to be defined for datanames. These algorithms describe how to derive values for datanames from other quantities in the data file.

Knowing the dictionary that a given datafile is written with reference to thus allows us to do two things: to validate that datanames and values match the constraints imposed by the definition; and, in the case of DDL2 and DDLm, to calculate values which might then be used for checking or simply to fill in missing information.

Dictionaries

DDL dictionaries can be read into CifFile objects just like CIF data files. For this purpose, CifFile objects automatically support save frames (used in DDL2 and DDLm dictionaries), which are accessed just like CifBlocks using their save frame name. By default save frames are not listed as keys in CifFiles as they do not form part of the CIF standard.

The more powerful CifDic object creats a unified interface to DDL1, DDL2 and DDLm dictionaries. A CifDic is initialised with a single file name or CifFile object, and will accept the grammar keyword:

    cd = CifFile.CifDic("cif_core.dic",grammar='1.1')

Definitions are accessed using the usual notation, e.g. cd['_atom_site_aniso_label']. Return values are always CifBlock objects. Additionally, the CifDic object contains a number of instance variables derived from dictionary global data:

dicname
The dictionary name + version as given in the dictionary
diclang
'DDL1','DDL2', or 'DDLm'

CifDic objects provide a large number of validation functions, which all return a Python dictionary which contains at least the key result. result takes the values True, False or None depending on the success, failure or non-applicability of each test. In case of failure, additional keys are returned depending on the nature of the error.

Validation with PyCIFRW

A top level function is provided for convenient validation of CIF files:

    CifFile.Validate("mycif.cif",dic = "cif_core.dic")

This returns a tuple (valid_result, no_matches). valid_result and no_matches are Python dictionaries indexed by block name. For valid_result, the value for each block is itself a dictionary indexed by item_name. The value attached to each item name is a list of (check_function, check_result) tuples, with check_result a small dictionary containing at least the key result. All tests which passed or were not applicable are removed from this dictionary, so result is always False. Additional keys contain auxiliary information depending on the test. Each of the items in no_matches is a simple list of item names which were not found in the dictionary.

If a simple validation report is required, the function validate_report can be called on the output of the above function, printing a simple ASCII report. This function can be studied as an example of how to process the structure returned by the validate function.

A somewhat nicer interface to validation is provided in the ValidationResult class (thanks to Boris Dusek), which is initialised with the return value from validate:

    val_report = ValidationResult(validate("mycif.cif",dic="cif_core.dic"))

This class provides the report method, producing a human-readable report, as well as boolean methods which return whether or not the block is valid or if items appear in the block that are not present in the dictionary - is_valid and has_no_match_items respectively.

Limitations on validation

  1. (DDL2 only) When validating data dictionaries themselves, no checks are made on group and subgroup consistency (e.g. that a specified subgroup is actually defined).

  2. (DDL1 only) Some _type_construct attributes in the DDL1 spec file are not machine-readable, so values cannot be checked for consistency

  3. DDLm validation methods are still in development so are not comprehensive.

pycifrw-4.4/docs/drel_use.html000066400000000000000000000146351345362224200165000ustar00rootroot00000000000000

Using dREL in PyCIFRW

Introduction

dREL is a language for manipulating tabular data described in 1. A CIF dictionary (DDL2/DDLm) definition can contain a piece of dREL code describing how the defined data item can be calculated from other dataitem values. PyCIFRW includes a dREL parser that executes these calculations to provide missing values

Using dREL methods

Once a dictionary containing methods is assigned to a CifBlock using CifBlock.assign_dictionary, any attempts to retrieve a missing dataname will trigger execution of the dREL method for that dataname, if available. Note that the value returned will not necessarily be a string type.

>>> from CifFile import CifDic, CifFile
>>> p = CifDic('pycifrw/drel/testing/cif_core.dic',grammar='STAR2')
>>> r = CifFile('pycifrw/drel/testing/data/nick.cif',grammar='STAR2')
>>> r = r['saly2']         # choose our testing block
>>> r['_cell.volume']      #should be in the file already
u'635.3'                   # Note this is a string value
>>> del r['_cell.volume']  # remove from the CifBlock
>>> r['_cell.volume']      # check that it is gone
KeyError: 'No such item: _cell.volume'
>>> r.assign_dictionary(p) # Use this dictionary for calculations
>>> r['_cell.volume']      # Is it there now?
635.2977003095574          # Note this is a floating-point number

PyCIFRW extensions

PyCIFRW will understand multiple methods for a single definition, and run through each until a successful evaluation occurs. If recursion is detected (defined as a second request to evaluate a dataname in a single evaluation sequence) the current definition is abandoned and the next one tried.

Limitations on dREL methods

dREL is a rich specification and, while the PyCIFRW implementation is relatively comprehensive, a number of functionalities are not yet implemented, and testing is not complete.

  1. No standard uncertainty (su) propagation. The CIF standards provide for su, which would allow propagation. To be implemented.

  2. Values that have been calculated are stored as their native type (not necessarily strings). These values may be formatted unusually (e.g. too many decimal places) when output to a file. A method will be introduced in later versions to properly format native values.

  3. The Python numpy package is required for dREL-based calculations.

  4. Multi-line strings in dREL are not correctly converted to Python.

pycifrw-4.4/docs/overview.html000066400000000000000000000647431345362224200165510ustar00rootroot00000000000000

Programming with PyCIFRW

PyCIFRW provides facilities for reading, manipulating and writing CIF and STAR files. In addition, CIF files and dictionaries may be validated against DDL1/2/m dictionaries.

Installing and Initialising PyCIFRW

(Note: these instructions refer to version 4.0 and higher. For older versions, see the documentation provided with those versions).

As of version 4.0, it is sufficient to install the PyCIFRW “wheel” using pip, for example:

pip install --use-wheel PyCifRW-4.2-cp27-none-linux_i686.whl

or using the platform independent source package found on PyPI:

pip install pycifrw

If you want to include PyCIFRW with your package, you can install the PyCIFRW wheel into your development environment and then bundle the contents of the CifFile directory found in the Python local libraries directory (usually site-packages).

If PyCIFRW has installed properly, the following command should complete without any errors:

  import CifFile

Working with CIF files

Reading CIF files

CIF files are represented in PyCIFRW as CifFile objects. These objects behave identically to Python dictionaries, with some additional methods. CifFile objects can be created by calling the ReadCif function on a filename or URL:

      from CifFile import ReadCif
      cf = ReadCif("mycif.cif")    
      df = ReadCif("ftp://ftp.iucr.org/pub/cifdics/cifdic.register")

Errors are raised if CIF syntax/grammar violations are encountered in the input file or line length limits are exceeded.

A compiled extension (StarScan.so) is available in binary distributions which increases parsing speed by a factor of three or more. To use this facility, include the keyword argument scantype='flex' in ReadCif commands:

      cf = ReadCif("mycif.cif",scantype="flex")

Binary distributions are generally only provided for the 'manylinux' target, but may also be generated from the source distribution for any platform if the appropriate compilers are available on that platform.

Alternatively, you may initialise a CifFile object with the URI:

      cf = CifFile("mycif.cif",scantype="flex")

If your CIF file contains characters that are not encoded in UTF8 or ASCII, you may pass the 'permissive' option to ReadCif, which will try other encodings (currently only latin1). Use of this option is not encouraged.

Grammar options

There are three variations in CIF file syntax. An early, little-used version of the standard allowed non-quoted data strings to begin with square bracket characters ('['). This was disallowed in version 1.1 in order to reserve such usage for later developments. The recently introduced CIF2 standard adds list and table datastructures to CIF1. Detection of the appropriate CIF grammar is automatic, but potentially time-consuming for multiple files, so specification of the particular version to use is possible with the grammar keyword:

     cf = ReadCif('oldcif.cif',grammar='1.0') #oldest CIF syntax      
     cf = ReadCif('normcif.cif',grammar='1.1') #widespread
     cf = ReadCif('future.cif',grammar='2.0') #latest standard
     cf = ReadCif('unknown.cif',grammar='auto') #try 2.0->1.1->1.0

Reading of STAR2 files is also possible by setting grammar='STAR2'. Currently, the default is set to 'auto'.

Creating a new CifFile

A new CifFile object is usually created empty:

        from CifFile import CifFile
        cf = CifFile()

You will need to create at least one CifBlock object to hold your data. The CifBlock is then added to the CifFile using the usual Python dictionary notation. The dictionary 'key' becomes the blockname used for output.

        from CifFile import CifBlock
        myblock = CifBlock()        
        cf['a_block'] = myblock

A CifBlock object may be initialised with another CifBlock, in which case a copy operation is performed.

Note that most operations on data provided by PyCIFRW involve CifBlock objects.

Manipulating values in a CIF file

Accessing data

The simplest form of access is using standard Python square bracket notation. Data blocks and data names within each data block are referenced identically to normal Python dictionaries:

      my_data = cf['a_data_block']['_a_data_name']

All values read in are stored as strings 1, with CIF syntactical elements stripped, that is, no enclosing quotation marks or semicolons are included in the values. The value associated with a CifFile dictionary key is always a CifBlock object. All standard Python dictionary methods (e.g. get, update, items(), keys()) are available for both CifFile and CifBlock objects. Note also the convenience method first_block(), which will return the first datablock stored which is not necessarily the first datablock in the physical file:

    my_data = cf.first_block()

If a data name occurs in a loop, a list of values is returned for the value of that dataname - the next section describes ways to access looped data.

Tabular (“looped”) data

For the purpose of the examples, we use the following example CIF file:

data_testblock
loop_
  _item_5   
  _item_7   
  _item_6    
  1  a  5    
  2  b  6    
  3  c  7    
  4  d  8 

Any table can be interacted with in a column-based or a row-based way. A PyCIFRW CifBlock object provides column-based access using normal square bracket syntax as described above: for example cf['testblock']['_item_6'] will return ['5','6','7','8'].

Table row access

The CifLoopBlock object represents a loop structure in the CIF file and facilitates row-based access. A CifLoopBlock object can be obtained by calling the CifBlock method GetLoop(dataname). Column-based access remains available for this object (e.g. keys() returns a list of datanames in the loop and square bracket notation returns a list of column values for that column).

A particular row can be selected using the CifLoopBlock GetKeyedPacket method:

    >>> lb = cf['testblock'].GetLoop('_item_6')
    >>> myrow = lb.GetKeyedPacket('_item_7','c') 
    >>> myrow._item_5
    '3'

In this example, the single packet with a value of 'c' for _item_7 is returned, and packet values can then be accessed using the dataname as an attribute of the packet. Note that a KeyError is raised if more than one packet matches, or no packets match, and that the packet returned is a copy of the data read in from the file, and therefore can be changed without affecting the CifBlock object.

You may also access the nth value in this CifLoopBlock object. 2, and values can be obtained from these packets as attributes.

    >>> lb = cb.GetLoop("_item_5")
    >>> lb[0]
    ['1', 'a', '5']
    >>> lb[0]._item_7
    'a'

An alternative way of accessing loop data uses Python iterators, allowing the following syntax:

    >>> for a in lb: print `a["_item_7"]` 
    'a' 'b' 'c' 'd' 

Note that in both the above examples the row packet is a copy of the looped data, and therefore changes to it will not silently alter the contents of the original CifFile object, unlike the lists returned when column-based access is used.

Changing or adding data values

If many operations are going to be performed on a single data block, it is convenient to assign that block to a new variable:

    cb = cf['my_block']

A new data name and value may be added, or the value of an existing name changed, by straight assignment:

    cb['_new_data_name'] = 4.5
    cb['_old_data_name'] = 'cucumber'

Old values are overwritten silently. Note that values may be strings or numbers.

Creating loops

To create a loop, simply set the column values to same-length lists, and then call the CifBlock method CreateLoop with a list of the looped datanames as a single argument. This method will raise an error if the datanames have different length columns assigned to them. For example, the following commands create the example loop above:

    cb['_item_5']  = [1,2,3,4]
    cb['_item_7']  = ['a','b','c','d']
    cb['_item_6']  = [5,6,7,8]
    cb.CreateLoop(['_item_5','_item_7','_item_6'])

Another method, AddToLoop(dataname,newdata), adds columns in newdata to the pre-existing loop containing dataname, silently overwriting duplicate data. newdata should be a Python dictionary of dataname - datavalue pairs.

Note that lists (and other listlike objects except packets) returned by PyCIFRW actually point to the list currently inside the CifBlock object, and therefore any modification to them will modify the stored list. While this is often the desired behaviour, if you intend to manipulate such a list in other parts of your program while preserving the original CIF information, you should first copy the list to avoid destroying the loop structure:

    mysym = cb['_symmetry_ops'][:]
    mysym.append('x-1/2,y+1/2,z')

Changing item order

Item (and block) order has no semantic significance in CIF files. However, the readability of CIF files in simple text editors leads to a desire to organise the output order for human readers. The ChangeItemOrder method allows the order in which data items appear in the printed file to be changed:

    mycif['testblock'].ChangeItemOrder('_item_5',0)

will move _item_5 to the beginning of the datablock. When changing the order inside a loop block, the loop block's method must be called i.e.:

aloop = mycif['testblock'].GetLoop('_loop_item_1')
aloop.ChangeItemOrder('_loop_item_1',4)

Note also that the position of a loop within the file can be changed in this way as well, by passing the 'block number' object as the first argument. Each loop is assigned a simple integer number, which can be found by calling FindLoop with the name of a column in that loop:

loop_id = mycif['testblock'].FindLoop('_item_6')
mycif['testblock'].ChangeItemOrder(loop_id,0)

will move the loop block to the beginning of the printed datablock.

Adding and removing table rows

While it is most efficient to add columns to the CifBlock and then bind them together once into a loop, it is possible to add a new row into an existing loop using the AddPacket(packet) method of CifLoopBlock objects:

    aloop = mycif['testblock'].GetLoop('_item_7')
    template = aloop.GetKeyedPacket('_item_7','d')
    template._item_5 = '5'
    template._item_7 = 'e'
    template._item_6 = '9'
    aloop.AddPacket(template)

Note we use an existing packet as a template in this example. If you wish to create a packet from scratch, you should instantiate a StarPacket:

    from CifFile import StarFile   #installed with PyCIFRW
    newpack = StarFile.StarPacket()
    newpack._item_5 = '5'  
    ...
    aloop.AddPacket(newpack)

Note that an error will be raised when calling AddPacket if the packet attributes do not exactly match the item names in the loop.

A packet may be removed using the RemoveKeyedPacket method, which chooses the packet to be removed based on the value of the given dataname:

    aloop.RemoveKeyedPacket('_item_7','a')

Writing CIF Files

The CifFile method WriteOut returns a string which may be passed to an open file descriptor:

    outfile = open("mycif.cif")
    outfile.write(cf.WriteOut())

Or the built-in Python str() function can be used:

outfile.write(str(cf))

WriteOut takes an optional keyword argument, comment, which should be a string containing a comment which will be placed at the top of the output file. This comment string must already contain # characters at the beginning of lines:

    outfile.write(cf.WriteOut("#This is a test file"))

Two additional keyword arguments control line length in the output file: wraplength and maxoutlength. Lines in the output file are guaranteed to be shorter than maxoutlength characters, and PyCIFRW will additionally insert a line break if putting two data values or a dataname/datavalue pair together on the same line would exceed wraplength. In other words, unless data values are longer than maxoutlength characters long, no line breaks will be inserted into those datavalues the output file. By default, wraplength = 80 and maxoutlength = 2048. Note that the CIF line folding protocol is used, which makes wrapping of long datavalues reversible.

These values may be set on a per block basis by calling the SetOutputLength method of the block.

The order of output of items within a CifFile or CifBlock is specified using the ChangeItemOrder method (see above). The default order is the order that items were first added in to the CifFile/CifBlock. Note that this order is not guaranteed to be the order in which they appear in the input file.

Templating system

If you want precise control of the layout of your CIF file, you can pass a template file to the CifBlock.process_template method. A 'template' is a CIF file containing a single block, where the datanames are laid out in the way that the user desires. The layout elements that are picked up from this template are:

  1. order (overrides current order of CifBlock)
  2. column position of datavalues (only the first row of a loop block is inspected)
  3. delimiters
  4. If a semicolon-delimited string outside a loop contains 3 or more spaces in a row at the beginning of a line, that datavalue will be wrapped and indented by the same amount on output

Constraints on the template:

  1. There should only ever be one dataname on each line
  2. loop_ and and datablock tokens should appear as the only non-blank characters on their lines
  3. Comments are flagged by a '#' as the first character in the line
  4. Blank lines are acceptable (and ignored)
  5. The dummy datavalues should use only alphanumeric characters
  6. Semicolon-delimited strings are not allowed in loops

After calling process_template with the template file as the argument, subsequent calls to WriteOut will respect the template information, and revert to default behaviour for any datanames that were not found in the template. Templating is most useful when formatting CIF dictionaries which are read heavily by human readers, and have many (thousands!) of datablocks, each containing the same limited number of datanames.

Output format

CIF files are output by default in CIF2 grammar, but with the CIF2-only triple quotes avoided unless explicitly requested through a template. Therefore, as long as CIF2-only datastructures (lists and tables) are absent, the output CIF files will conform to 1.0,1.1 and 2.0 grammar. The grammar of the output files can be changed by calling CifFile.set_grammar with the choices being 1.0,1.1,2.0 or STAR2.

Deprecated classes

The ValidCifFile class is deprecated and will be removed in a future version.

Example programs

A program which uses PyCIFRW for validation, validate_cif.py, is included in the distribution in the Programs subdirectory. It will validate a CIF file (including dictionaries) against one or more dictionaries which may be specified by name and version or as a filename on the local disk. If name and version are specified, the IUCr canonical registry or a local registry is used to find the dictionary and download it if necessary.

Usage

python validate_cif.py [options] ciffile

Options

--version show version number and exit
-h,--help print short help message
-d dirname directory to find/store dictionary files
-f dictname filename of locally-stored dictionary
-u version dictionary version to resolve using registry
-n name dictionary name to resolve using registry
-s store downloaded dictionary locally (default True)
-c fetch and use canonical registry from IUCr
-r registry location of registry as filename or URL
-t The file to be checked is itself a DDL2 dictionary

Further information

The source files are in a literate programming format (noweb) with file extension .nw. HTML documentation generated from these files and containing both code and copious comments is included in the downloaded package. Details of interpretation of the current standards as relates to validation can be found in these files.


  1. This deviates from the current CIF standard, which mandates interpreting unquoted strings as numbers where possible and in the absence of dictionary definitions to the contrary (International Tables, Vol. G., p24).

  2. Warning: row and column order in a CIF loop is arbitrary; while PyCIFRW currently maintains the row order seen in the input file, there is nothing in the CIF standards which mandates this behaviour, and later implementations may change this behaviour

pycifrw-4.4/setup.cfg000066400000000000000000000000461345362224200146700ustar00rootroot00000000000000[egg_info] tag_build = tag_date = 0 pycifrw-4.4/setup.py000066400000000000000000000030271345362224200145630ustar00rootroot00000000000000# Setup file for creation of the PyCIFRW # distribution from __future__ import print_function from setuptools import setup, Extension, find_packages #### Do the setup c_scanner = Extension("CifFile.StarScan", sources = ["src/lib/lex.yy.c","src/lib/py_star_scan.c"]) setup(name="PyCifRW", version = "4.4", description = "CIF/STAR file support for Python", author = "James Hester", author_email = "jamesrhester@gmail.com", license = 'Python 2.0', url="https://bitbucket.org/jamesrhester/pycifrw/overview", classifiers = [ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: Python Software Foundation License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 2', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'Topic :: Software Development :: Libraries :: Python Modules' ], py_modules = ['CifFile.CifFile_module','CifFile.yapps3_compiled_rt','CifFile.YappsStarParser_1_1','CifFile.YappsStarParser_1_0', 'CifFile.YappsStarParser_STAR2','CifFile.YappsStarParser_2_0','CifFile.StarFile','CifFile.TypeContentsParser'], ext_modules = [c_scanner], packages = ['CifFile', 'CifFile.drel'], test_suite = 'TestPyCifRW', package_dir = {'CifFile':'src'} ) pycifrw-4.4/src/000077500000000000000000000000001345362224200136365ustar00rootroot00000000000000pycifrw-4.4/src/CifFile.html000066400000000000000000007670711345362224200160470ustar00rootroot00000000000000 CifFile_module.nw
<Copyright statement>= (U->)
__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""

Introduction

This file implements a general CIF reading/writing utility. The basic objects (CifFile/CifBlock) read and write syntactically correct CIF 1.1 files including save frames. Objects for validating CIFs are built on these basic objects: A CifDic object is derived from a CifFile created from a DDL1/2 dictionary; and the ValidCifFile/ValidCifBlock objects allow creation/checking of CIF files against a list of CIF dictionaries.

The CifFile class is initialised with either no arguments (a new CIF file) or with the name of an already existing CIF file. Data items are accessed/changed/added using the python mapping type ie to get dataitem you would type value = cf[blockname][dataitem].

Note also that a CifFile object can be accessed as a mapping type, ie using square brackets. Most mapping operations have been implemented (see below).

We build upon the objects defined in the StarFile class, by imposing a few extra restrictions where necessary.

<*>=
# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO

# Python 2,3 compatibility
try:
    from urllib import urlopen         # for arbitrary opening
    from urlparse import urlparse, urljoin
except:
    from urllib.request import urlopen
    from urllib.parse import urlparse, urljoin

# The unicode type does not exist in Python3 as the str type
# encompasses unicode.  PyCIFRW tests for 'unicode' would fail
# Suggestions for a better approach welcome.

if isinstance(u"abc",str):   #Python3
    unicode = str

<Copyright statement>

import re,sys
from . import StarFile
from .StarFile import StarList  #put in global scope for exec statement
try:
    import numpy                   #put in global scope for exec statement
    from .drel import drel_runtime  #put in global scope for exec statement
except ImportError:
    pass                       #will fail when using dictionaries for calcs
from copy import copy          #must be in global scope for exec statement

<Decorators>
<CifBlock class>
<CifFile class>
<Define an error class>
<CIF Dictionary type>
<A valid CIF block>
<A valid CIF file>
<Top-level functions>
<Utility functions>
<Read in a CIF file>
<CifLoopBlock class>
<API documentation flags>

CifFile

A CifFile is subclassed from a StarFile. Our StarFile class has an optional check of line length, which we use.

A CifFile object is a dictionary of CifBlock objects, accessed by block name. As the maximum line length is subject to change, we allow the length to be specified, with the current default set at 2048 characters (Cif 1.1). For reading in files, we only flag a length error if the parameter strict is true, in which case we use parameter maxinlength as our maximum line length on input. Parameter maxoutlength sets the maximum line size for output. If maxoutlength is not specified, it defaults to the maximum input length.

Note that this applies to the input only. For changing output length, you can provide an optional parameter in the WriteOut method.

<CifFile class>= (<-U)
class CifFile(StarFile.StarFile):
<Initialise data structures>

When initialising, we add those parts that are unique to the CifFile as opposed to a simple collection of blocks - i.e. reading in from a file, and some line length restrictions. We do not indent this section in this noweb file, so that our comment characters output at the beginning of the line.

<Initialise data structures>= (<-U)
    def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs):
        super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs)
        self.strict = strict
        self.header_comment = \
"""
##########################################################################
#               Crystallographic Information Format file
#               Produced by PyCifRW module
#
#  This is a CIF file.  CIF has been adopted by the International
#  Union of Crystallography as the standard for data archiving and
#  transmission.
#
#  For information on this file format, follow the CIF links at
#  http://www.iucr.org
##########################################################################
"""

Cif Block class

CifBlocks exist(ed) as a separate class in order to enforce non-nested loops and maximum dataname lengths. As nested loops have been removed completely from PyCIFRW, they are no longer necessary but kept here for backwards compatibility.

<CifBlock class>= (<-U)
class CifBlock(StarFile.StarBlock):
    """
    A class to hold a single block of a CIF file.  A `CifBlock` object can be treated as
    a Python dictionary, in particular, individual items can be accessed using square
    brackets e.g. `b['_a_dataname']`.  All other Python dictionary methods are also
    available (e.g. `keys()`, `values()`).  Looped datanames will return a list of values.

    ## Initialisation

    When provided, `data` should be another `CifBlock` whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    <Initialise Cif Block>
    <Adjust emulation of a mapping type>
    <Add a data item>
    <Return all looped names>

A CifBlock is a StarBlock with a very few restrictions.

<Initialise Cif Block>= (<-U)
def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs):
    """When provided, `data` should be another CifBlock whose contents will be copied to
    this block.

    * if `strict` is set, maximum name lengths will be enforced

    * `maxoutlength` is the maximum length for output lines

    * `wraplength` is the ideal length to make output lines

    * When set, `overwrite` allows the values of datanames to be changed (otherwise an error
    is raised).

    * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using
    the syntax `a[_dataname] = [1,2,3,4]`.  This should now be done by calling `CreateLoop`
    after setting the dataitem value.
    """
    if strict: maxnamelength=75
    else:
       maxnamelength=-1
    super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs)
    self.dictionary = None   #DDL dictionary referring to this block
    self.compat_mode = compat_mode   #old-style behaviour of setitem

def RemoveCifItem(self,itemname):
    """Remove `itemname` from the CifBlock"""
    self.RemoveItem(itemname)

The second line in the copy method switches the class of the returned object to be a CifBlock. It may not be necessary.

<Adjust emulation of a mapping type>= (<-U)
def __setitem__(self,key,value):
    self.AddItem(key,value)
    # for backwards compatibility make a single-element loop
    if self.compat_mode:
        if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList):
             # single element loop
             self.CreateLoop([key])

def copy(self):
    newblock = super(CifBlock,self).copy()
    return self.copy.im_class(newblock)   #catch inheritance

This function was added for the dictionary validation routines. It will return a list where each member is itself a list of item names, corresponding to the names in each loop of the file.

<Return all looped names>= (<-U)
def loopnames(self):
    return [self.loops[a] for a in self.loops]

Adding a data item. In the old, deprecated method we are passed a tuple with the (set) of data names at the beginning, and a (set) of values for them following.

We implement this behaviour by looping over the input datanames, and adding them to the set of keys. When we have finished, we create the loop.

We check the length of the name, and give an error if the name is greater than 75 characters, which is the CIF 1.1 maximum length.

We also check for consistency, by making sure the new item is not in the block already. If it is, we replace it (consistent with the meaning of square brackets). If it is in a loop, we replace the looped value and all other items in that loop block. This means that when adding loops, we must add them all at once if we call this routine directly.

We typecheck the data items. They can be tuples, strings or lists. If we have a list of values for a single item, the item name should also occur in a single member tuple.

<Add a data item>= (<-U)
def AddCifItem(self,data):
    """ *DEPRECATED*. Use `AddItem` instead."""
    # we accept only tuples, strings and lists!!
    if not (isinstance(data[0],(unicode,tuple,list,str))):
              raise TypeError('Cif datanames are either a string, tuple or list')
    # we catch single item loops as well...
    if isinstance(data[0],(unicode,str)):
        self.AddSingleCifItem(data[0],list(data[1]))
        if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList):  # a single element loop
            self.CreateLoop([data[0]])
        return
    # otherwise, we loop over the datanames
    keyvals = zip(data[0][0],[list(a) for a in data[1][0]])
    [self.AddSingleCifItem(a,b) for a,b in keyvals]
    # and create the loop
    self.CreateLoop(data[0][0])

def AddSingleCifItem(self,key,value):
    """*Deprecated*. Use `AddItem` instead"""
    """Add a single data item. If it is part of a loop, a separate call should be made"""
    self.AddItem(key,value)

Reading in a file. We use the STAR grammar parser. Note that the blocks returned will be locked for changing (overwrite=False) and can be unlocked by setting block.overwrite to True.

<Read in a CIF file>= (<-U)
def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF',
            permissive=False):
    """ Read in a CIF file, returning a `CifFile` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for
    properly-formed CIF2.0 files.  Note that only Unicode characters in the basic multilingual
    plane are recognised (this will be fixed when PyCIFRW is ported to Python 3).

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines, but is not available for CIF2/STAR2 files.  Note that running PyCIFRW in
    Jython uses native Java regular expressions
    to provide a speedup regardless of this argument (and does not yet support CIF2).

    * `scoping` is only relevant where nested save frames are expected (STAR2 only).
    `instance` scoping makes nested save frames
    invisible outside their hierarchy, allowing duplicate save frame names in separate
    hierarchies. `dictionary` scoping makes all save frames within a data block visible to each
    other, thereby restricting all save frames to have unique names.
    Currently the only recognised value for `standard` is `CIF`, which when set enforces a
    maximum length of 75 characters for datanames and has no other effect. """

    finalcif = CifFile(scoping=scoping,standard=standard)
    return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype,
                             permissive=permissive)
    #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs)

Defining an error class: we simply derive a 'nothing' class from the root Python class

<Define an error class>= (<-U)
class CifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Format error: '+ self.value

class ValidCifError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nCif Validity error: ' + self.value

class CifRecursionError(Exception):
    def __init__(self,key_value,call_stack):
        self.key_value = key_value
        self.call_stack = call_stack
    def __str__(self):
        return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack))


Dictionaries

To avoid ambiguity with the Python dictionary type, we use capital D to denote CIF Dictionaries where misinterpretation is possible.

We build our Dictionary behaviour on top of the StarFile object, which is notionally a collection of StarBlocks. A Dictionary is simply a collection of datablocks, where each datablock corresponds to a single definition. DDL1 had no category definitions.

We adopt a data model whereby the excess information in a DDL2 dictionary is absorbed into special methods (and I am thinking here of the _item_type_list.construct stuff which appears at the global level), which we initialise ourselves for a DDL1 dictionary.

The square bracket notation is repurposed to mean access to the appropriate definition, as the save frame name and the definition may be slightly (or completely) different.

<CIF Dictionary type>= (<-U)
<Dictionary block type>
class CifDic(StarFile.StarFile):
    """Create a Cif Dictionary object from the provided source, which can
    be a filename/URL or a CifFile.  Optional arguments (relevant to DDLm
    only):

    * do_minimum (Boolean):
         Do not set up the dREL system for auto-calculation or perform
         imports.  This implies do_imports=False and do_dREL=False

    * do_imports = No/Full/Contents/All:
         If not 'No', intepret _import.get statements for
         Full mode/Contents mode/Both respectively. See also option 'heavy'

    * do_dREL = True/False:
         Parse and convert all dREL methods to Python. Implies do_imports=All

    * heavy = True/False:
         (Experimental). If True, importation overwrites definitions. If False,
         attributes are resolved dynamically.
    """
    <Initialise Cif dictionary>
    <DDL-specific initialisation routines>
    <Repurpose standard python methods>
    <Repurpose Starfile methods>
    <Obtaining semantic information from the dictionary>
    <Create alias table>
    <Create category/object table>
    <Add type information>
    <Add category information>
    <Definition manipulation methods>
    <Dictionary output routines>
    <Return a single packet by key>
    <Extract number and esd>
    <Analyse range>
    <Linkage to dREL>
    <Switch on numpy arrays>
    <Convert string to appropriate type>
    <Validation routines>

Dictionary blocks

A dictionary block is essentially identical to a StarBlock, with the extra semantics of chasing through `_import.get` calls in order to transparently return attributes defined in separate dictionaries. If the `_import_cache` is empty, this is skipped.

<Dictionary block type>= (<-U)
class DicBlock(StarFile.StarBlock):
    """A definition block within a dictionary, which allows imports
    to be transparently followed"""

    def __init__(self,*args,**kwargs):
        super(DicBlock,self).__init__(*args,**kwargs)
        self._import_cache = {}
        
    def __getitem__(self,dataname):
        value = None
        if super(DicBlock,self).has_key("_import.get") and self._import_cache:
            value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) 
        try:
            final_value = super(DicBlock,self).__getitem__(dataname)
        except KeyError:    #not there
            final_value = value
        if final_value is None:
            raise KeyError("%s not found" % dataname)
        return final_value

    def has_key(self,key):
        try:
            self[key]
        except KeyError:
            return False
        return True
    
    def add_dict_cache(self,name,cached):
        """Add a loaded dictionary to this block's cache"""
        self._import_cache[name]=cached
        
    def follow_import(self,import_info,dataname):
        """Find the dataname values from the imported dictionary. `import_info`
        is a list of import locations"""
        latest_value = None
        for import_ref in import_info:
            file_loc = import_ref["file"]
            if file_loc not in self._import_cache:
                raise ValueError("Dictionary for import %s not found" % file_loc)
            import_from = self._import_cache[file_loc]
            miss = import_ref.get('miss','Exit')
            target_key = import_ref["save"]
            try:
                import_target = import_from[target_key]
            except KeyError:
                if miss == 'Exit':
                    raise CifError('Import frame %s not found in %s' % (target_key,file_loc))
                else: continue
            # now import appropriately
            mode = import_ref.get("mode",'Contents').lower()
            if mode == "contents":   #only this is used at this level
                latest_value = import_target.get(dataname,latest_value)
        return latest_value
    

Initialisation

We want to be able to accept strings, giving the file name of the CIF dictionary, and pre-initialised CifFile objects. We do not accept CifDic objects. Our initialisation procedure first unifies the interface to the Dictionary, and then runs through the Dictionary producing a normalised form. Following this, type and category information can be collected for later reference.

Validation functions are listed so that it would be possible to add and remove them from the "valid set". This behaviour has not yet been implemented.

When loading DDLm dictionaries we may recursively call this initialisation function with a dictionary to be imported as the argument. In this case we do not want to do all the method derivation, as the necessary categories will be loaded into the calling dictionary rather than the currently initialising dictionary. So there is a keyword argument to stop the operations that should operate on the dictionary as a whole taking place.

The dREL methods require Numpy support, but we do not wish to introduce a global dependence on Numpy. Therefore, we introduce a 'switch' which will return Numpy arrays from the __getitem__ method instead of StarLists. It is intended that the dREL methods will turn this on only during execution, then turn it off afterwards.

Note that DDLm importation logic provides many choices. We have a choice of 'No', 'Contents', 'Full' and 'All' for the amount that is imported. If `heavy` is False, no definition material will be replaced, rather the import will be resolved dynamically.

<Initialise Cif dictionary>= (<-U)
def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True,
             grammar='auto',heavy=True,**kwargs):
    self.do_minimum = do_minimum
    if do_minimum:
        do_imports = 'No'
        do_dREL = False
    if do_dREL: do_imports = 'All'
    if heavy == 'Light' and do_imports not in ('contents','No'):
        raise(ValueError,"Light imports only available for mode 'contents'")
    self.template_cache = {}    #for DDLm imports
    self.ddlm_functions = {}    #for DDLm functions
    self.switch_numpy(False)    #no Numpy arrays returned
    super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs)
    self.standard = 'Dic'    #for correct output order
    self.scoping = 'dictionary'
    (self.dicname,self.diclang) = self.dic_determine()
    print('%s is a %s dictionary' % (self.dicname,self.diclang))
    self.scopes_mandatory = {}
    self.scopes_naughty = {}
    # rename and expand out definitions using "_name" in DDL dictionaries
    if self.diclang == "DDL1":
        self.DDL1_normalise()   #this removes any non-definition entries
    self.create_def_block_table() #From now on, [] uses definition_id
    if self.diclang == "DDL1":
        self.ddl1_cat_load()
    elif self.diclang == "DDL2":
        self.DDL2_normalise()   #iron out some DDL2 tricky bits
    elif self.diclang == "DDLm":
        self.scoping = 'dictionary'   #expose all save frames
        if do_imports is not 'No':
            self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine
        self.create_alias_table()
        self.create_cat_obj_table()
        self.create_cat_key_table()
        if do_dREL:
            print('Doing full dictionary initialisation')
            self.initialise_drel()
    self.add_category_info(full=do_dREL)
    # initialise type information
    self.typedic={}
    self.primdic = {}   #typecode<->primitive type translation
    self.add_type_info()
    self.install_validation_functions()

These routines seek to impose a uniform structure on dictionaries written in DDL1, DDL2 and DDLm. Historically, the richer and more systematic DDL2 approach was used to describe DDL1 definitions. With the advent of DDLm, the DDLm paradigm is likely to overtake DDL2. When interpreting the following routines, therefore, bear in mind that they were originally written with DDL2 in mind, and are gradually shifting to DDLm.

<DDL-specific initialisation routines>= (<-U)
<Dictionary determination function>
<Deal with DDL1 differences>
<Load categories with DDL2-type information>
<Iron out DDL2 strangeness>
<Parse DDLm validity information>
<Perform DDLm imports>

This function determines whether we have a DDLm, DDL2 or DDL1 dictionary. We are built from a CifFile object. The current method looks for an on_this_dictionary block, which implies DDL1, or a single block, which implies DDL2/DDLM. This is also where we define some universal keys for uniform access to DDL attributes.

<Dictionary determination function>= (<-U)
def dic_determine(self):
    if "on_this_dictionary" in self:
        self.master_block = super(CifDic,self).__getitem__("on_this_dictionary")
        self.def_id_spec = "_name"
        self.cat_id_spec = "_category.id"   #we add this ourselves
        self.type_spec = "_type"
        self.enum_spec = "_enumeration"
        self.cat_spec = "_category"
        self.esd_spec = "_type_conditions"
        self.must_loop_spec = "_list"
        self.must_exist_spec = "_list_mandatory"
        self.list_ref_spec = "_list_reference"
        self.key_spec = "_list_mandatory"
        self.unique_spec = "_list_uniqueness"
        self.child_spec = "_list_link_child"
        self.parent_spec = "_list_link_parent"
        self.related_func = "_related_function"
        self.related_item = "_related_item"
        self.primitive_type = "_type"
        self.dep_spec = "xxx"
        self.cat_list = []   #to save searching all the time
        name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"]
        version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"]
        return (name+version,"DDL1")
    elif len(self.get_roots()) == 1:              # DDL2/DDLm
        self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0])
        # now change to dictionary scoping
        self.scoping = 'dictionary'
        name = self.master_block["_dictionary.title"]
        version = self.master_block["_dictionary.version"]
        if self.master_block.has_key("_dictionary.class"):   #DDLm
            self.enum_spec = '_enumeration_set.state'
            self.key_spec = '_category.key_id'
            self.must_exist_spec = None
            self.cat_spec = '_name.category_id'
            self.primitive_type = '_type.contents'
            self.cat_id_spec = "_definition.id"
            self.def_id_spec = "_definition.id"
            return(name+version,"DDLm")
        else:   #DDL2
            self.cat_id_spec = "_category.id"
            self.def_id_spec = "_item.name"
            self.key_spec = "_category_mandatory.name"
            self.type_spec = "_item_type.code"
            self.enum_spec = "_item_enumeration.value"
            self.esd_spec = "_item_type_conditions.code"
            self.cat_spec = "_item.category_id"
            self.loop_spec = "there_is_no_loop_spec!"
            self.must_loop_spec = "xxx"
            self.must_exist_spec = "_item.mandatory_code"
            self.child_spec = "_item_linked.child_name"
            self.parent_spec = "_item_linked.parent_name"
            self.related_func = "_item_related.function_code"
            self.related_item = "_item_related.related_name"
            self.unique_spec = "_category_key.name"
            self.list_ref_spec = "xxx"
            self.primitive_type = "_type"
            self.dep_spec = "_item_dependent.dependent_name"
            return (name+version,"DDL2")
    else:
        raise CifError("Unable to determine dictionary DDL version")

DDL1 differences. Firstly, in DDL1 you can loop a _name to get definitions of related names (e.g. x,y,z). Secondly, the data block name is missing the initial underscore, so we need to read the _name value. There is one block without a _name attribute, which we proceed to destroy (exercise for the reader: which one?).

A further complex difference is in the way that ranges are specified. A DDL2 dictionary generally loops the _item_range.maximum/minimum items, in order to specify inclusion of the endpoints of the range, whereas DDL1 dictionaries simply specify ranges as n:m. We translate these values into item_range specifications.

If the _list item is missing for a dictionary definition, it defaults to no, i.e. the item cannot be listed. We explicitly include this in our transformations.

The dictionaries also contain categories, which are used to impose constraints on groupings of items in lists. Category names in DDL2 dictionaries have no leading underscore, and the constraints are stored directly in the category definition. So, with a DDL1 dictionary, we rewrite things to match the DDL2 methods. In particular, the list_uniqueness item becomes the category_key.name attribute of the category. This may apply to _list_mandatory and /or _list_reference to, but the current specification is vague.

Also, it is possible for cross-item references (e.g. in a _list_reference) to include a whole range of items by terminating the name with an underscore. It is then understood to include anything starting with those characters. We explicitly try to expand these references out.

Note the way we convert to DDL2-style type definitions; any definition having a _type_construct regular expression triggers the definition of a whole new type, which is stored as per DDL2, for the later type dictionary construction process to find.

<Deal with DDL1 differences>= (<-U)
def DDL1_normalise(self):
    # switch off block name collision checks
    self.standard = None
    # add default type information in DDL2 style
    # initial types and constructs
    base_types = ["char","numb","null"]
    prim_types = base_types[:]
    base_constructs = [".*",
        '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.',
        "\"\" "]
    for key,value in self.items():
       newnames = [key]  #keep by default
       if "_name" in value:
           real_name = value["_name"]
           if isinstance(real_name,list):        #looped values
               for looped_name in real_name:
                  new_value = value.copy()
                  new_value["_name"] = looped_name  #only looped name
                  self[looped_name] = new_value
               newnames = real_name
           else:
                  self[real_name] = value
                  newnames = [real_name]
       # delete the old one
       if key not in newnames:
          del self[key]
    # loop again to normalise the contents of each definition
    for key,value in self.items():
       #unlock the block
       save_overwrite = value.overwrite
       value.overwrite = True
       # deal with a missing _list, _type_conditions
       if "_list" not in value: value["_list"] = 'no'
       if "_type_conditions" not in value: value["_type_conditions"] = 'none'
       # deal with enumeration ranges
       if "_enumeration_range" in value:
           max,min = self.getmaxmin(value["_enumeration_range"])
           if min == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min))))
           elif max == ".":
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min))))
           else:
               self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min))))
       #add any type construct information
       if "_type_construct" in value:
           base_types.append(value["_name"]+"_type")   #ie dataname_type
           base_constructs.append(value["_type_construct"]+"$")
           prim_types.append(value["_type"])     #keep a record
           value["_type"] = base_types[-1]   #the new type name

    #make categories conform with ddl2
    #note that we must remove everything from the last underscore
       if value.get("_category",None) == "category_overview":
            last_under = value["_name"].rindex("_")
            catid = value["_name"][1:last_under]
            value["_category.id"] = catid  #remove square bracks
            if catid not in self.cat_list: self.cat_list.append(catid)
       value.overwrite = save_overwrite
    # we now add any missing categories before filling in the rest of the
    # information
    for key,value in self.items():
        #print('processing ddl1 definition %s' % key)
        if "_category" in self[key]:
            if self[key]["_category"] not in self.cat_list:
                # rogue category, add it in
                newcat = self[key]["_category"]
                fake_name = "_" + newcat + "_[]"
                newcatdata = CifBlock()
                newcatdata["_category"] = "category_overview"
                newcatdata["_category.id"] = newcat
                newcatdata["_type"] = "null"
                self[fake_name] = newcatdata
                self.cat_list.append(newcat)
    # write out the type information in DDL2 style
    self.master_block.AddLoopItem((
        ("_item_type_list.code","_item_type_list.construct",
          "_item_type_list.primitive_code"),
        (base_types,base_constructs,prim_types)
        ))

DDL2 has a few idiosyncracies of its own. For some reason, in the definition of a parent item, all the child items are listed and their mandatory/not mandatory status specified. This duplicates information under the child item itself, although there is something on the web indicating that this is purely cosmetic and not strictly necessary. For our purposes, we want to extract the mandatory/not mandatory nature of the current item, which appears to be conventionally at the top of the list (we do not assume this below). The only way of determining what the actual item name is is to look at the save frame name, which is a bit of a fragile tactic - especially as dictionary merge operations are supposed to look for _item.name.

So, in these cases, we have to assume the save frame name is the one we want, and find this entry in the list.

Additionally, the child entry doesn't contain the category specification, so we add this into the child entry at the same time, together with a pointer to the parent item.

Such entries then have a loop listing parents and children down the whole hierarchy, starting with the current item. We disentangle this, placing parent item attributes in the child items, moving sub-children down to their level. Sub children may not exist at all, so we create them if necessary.

To make life more interesting, the PDBX have an entry_pc placeholder in which additional (and sometimes repeated) parent-child relationships can be expressed. We cannot assume that any given parent-child relationship is stated at a single site in the file. What is more, it appears that multiple parents for a single child are defined in the _entry.pdbx_pc entry. Our changes to the file pre-checking are therefore restricted to making sure that the child contains information about the parents; we do not interfere with the parent's information about the children, even if we consider that to be superfluous. Note that we will have to add parent-child validity checks to check consistency among all these relationships.

Update: in the DDL-2.1.6 file, only the parents/children are looped, rather than the item names, so we have to check looping separately.

Next: DDL2 contains aliases to DDL1 item names, so in theory we should be able to use a DDL2 dictionary to validate a DDL1-style CIF file. We create separate definition blocks for each alias to enable this.

Also, we flatten out any single-element lists for item_name. This is simply to avoid the value of e.g. category_id being a single-element list instead of a string.

Note also that _item.category_id in DDL2 is 'implicit', meaning in this case that you can determine it from the item name. We add in the category for simplicity.

<Iron out DDL2 strangeness>= (<-U)
<Loopify parent-child relationships>

def DDL2_normalise(self):
   listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys())
   # now filter out all the single element lists!
   dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs)
   for item_def in dodgy_defs:
      <Repopulate child definitions>
   <Populate parent and child links correctly>
   # now flatten any single element lists
   single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs)
   for flat_def in single_defs:
       flat_keys = self[flat_def].GetLoop('_item.name').keys()
       for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0]
   # now deal with the multiple lists
   # next we do aliases
   all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')]
   for aliased in all_aliases:
      my_aliases = listify(self[aliased]['_item_aliases.alias_name'])
      for alias in my_aliases:
          self[alias] = self[aliased].copy()   #we are going to delete stuff...
          del self[alias]["_item_aliases.alias_name"]

As some DDL2 dictionaries neglect children, we repopulate the skeleton or non-existent definitions that may be provided in the dictionary.

<Repopulate child definitions>= (<-U)
      # print("DDL2 norm: processing %s" % item_def)
      thisdef = self[item_def]
      packet_no = thisdef['_item.name'].index(item_def)
      realcat = thisdef['_item.category_id'][packet_no]
      realmand = thisdef['_item.mandatory_code'][packet_no]
      # first add in all the missing categories
      # we don't replace the entry in the list corresponding to the
      # current item, as that would wipe out the information we want
      for child_no in range(len(thisdef['_item.name'])):
          if child_no == packet_no: continue
          child_name = thisdef['_item.name'][child_no]
          child_cat = thisdef['_item.category_id'][child_no]
          child_mand = thisdef['_item.mandatory_code'][child_no]
          if child_name not in self:
              self[child_name] = CifBlock()
              self[child_name]['_item.name'] = child_name
          self[child_name]['_item.category_id'] = child_cat
          self[child_name]['_item.mandatory_code'] = child_mand
      self[item_def]['_item.name'] = item_def
      self[item_def]['_item.category_id'] = realcat
      self[item_def]['_item.mandatory_code'] = realmand

Populating parent and child links. The DDL2 model uses parent-child relationships to create relational database behaviour. This means that the emphasis is on simply linking two ids together directionally. This link is not necessarily inside a definition that is being linked, but we require that any parents and children are identified within the definition that they relate to. This means we have to sometimes relocate and expand links. As an item can simultaneously be both a parent and a child, we need to explicitly fill in the links even within a single definition.

<Populate parent and child links correctly>= (<-U)
target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \
                              '_item_linked.parent_name' in self[a]]
# now dodgy_defs contains all definition blocks with more than one child/parent link
for item_def in dodgy_defs: self.create_pcloop(item_def)           #regularise appearance
for item_def in dodgy_defs:
      print('Processing %s' % item_def)
      thisdef = self[item_def]
      child_list = thisdef['_item_linked.child_name']
      parents = thisdef['_item_linked.parent_name']
      # for each parent, find the list of children.
      family = list(zip(parents,child_list))
      notmychildren = family         #We aim to remove non-children
      # Loop over the parents, relocating as necessary
      while len(notmychildren):
         # get all children of first entry
         mychildren = [a for a in family if a[0]==notmychildren[0][0]]
         print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren)))
         for parent,child in mychildren:   #parent is the same for all
                  # Make sure that we simply add in the new entry for the child, not replace it,
                  # otherwise we might spoil the child entry loop structure
                  try:
                      childloop = self[child].GetLoop('_item_linked.parent_name')
                  except KeyError:
                      print('Creating new parent entry %s for definition %s' % (parent,child))
                      self[child]['_item_linked.parent_name'] = [parent]
                      childloop = self[child].GetLoop('_item_linked.parent_name')
                      childloop.AddLoopItem(('_item_linked.child_name',[child]))
                      continue
                  else:
                      # A parent loop already exists and so will a child loop due to the
                      # call to create_pcloop above
                      pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child]
                      goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent]
                      if len(goodpars)>0:   #no need to add it
                          print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child))
                          continue
                      print('Adding %s to %s entry' % (parent,child))
                      newpacket = childloop.GetPacket(0)   #essentially a copy, I hope
                      setattr(newpacket,'_item_linked.child_name',child)
                      setattr(newpacket,'_item_linked.parent_name',parent)
                      childloop.AddPacket(newpacket)
         #
         # Make sure the parent also points to the children.  We get
         # the current entry, then add our
         # new values if they are not there already
         #
         parent_name = mychildren[0][0]
         old_children = self[parent_name].get('_item_linked.child_name',[])
         old_parents = self[parent_name].get('_item_linked.parent_name',[])
         oldfamily = zip(old_parents,old_children)
         newfamily = []
         print('Old parents -> %s' % repr(old_parents))
         for jj, childname in mychildren:
             alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname]
             if len(alreadythere)>0: continue
             'Adding new child %s to parent definition at %s' % (childname,parent_name)
             old_children.append(childname)
             old_parents.append(parent_name)
         # Now output the loop, blowing away previous definitions.  If there is something
         # else in this category, we are destroying it.
         newloop = CifLoopBlock(dimension=1)
         newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
         newloop.AddLoopItem(('_item_linked.child_name',old_children))
         del self[parent_name]['_item_linked.parent_name']
         del self[parent_name]['_item_linked.child_name']
         self[parent_name].insert_loop(newloop)
         print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name']))
         # now make a new,smaller list
         notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]]

In order to handle parent-child relationships in a regular way, we want to assume that all parent-child entries occur in a loop, with both members present. This routine does that for us. If the parent is missing, it is assumed to be the currently-defined item. If the child is missing, likewise.

<Loopify parent-child relationships>= (<-U)
def create_pcloop(self,definition):
    old_children = self[definition].get('_item_linked.child_name',[])
    old_parents = self[definition].get('_item_linked.parent_name',[])
    if isinstance(old_children,unicode):
         old_children = [old_children]
    if isinstance(old_parents,unicode):
         old_parents = [old_parents]
    if (len(old_children)==0 and len(old_parents)==0) or \
       (len(old_children) > 1 and len(old_parents)>1):
         return
    if len(old_children)==0:
         old_children = [definition]*len(old_parents)
    if len(old_parents)==0:
         old_parents = [definition]*len(old_children)
    newloop = CifLoopBlock(dimension=1)
    newloop.AddLoopItem(('_item_linked.parent_name',old_parents))
    newloop.AddLoopItem(('_item_linked.child_name',old_children))
    try:
        del self[definition]['_item_linked.parent_name']
        del self[definition]['_item_linked.child_name']
    except KeyError:
        pass
    self[definition].insert_loop(newloop)


Loading the DDL1 categories with DDL2-type information. DDL2 people wisely put category-wide information in the category definition rather than spreading it out between category items. We collect this information together here.

This routine is the big time-waster in initialising a DDL1 dictionary, so we have attempted to optimize it by locally defining functions, instead of using lambdas, and making one loop through the dictionary instead of hundreds.

<Load categories with DDL2-type information>= (<-U)
def ddl1_cat_load(self):
    deflist = self.keys()       #slight optimization
    cat_mand_dic = {}
    cat_unique_dic = {}
    # a function to extract any necessary information from each definition
    def get_cat_info(single_def):
        if self[single_def].get(self.must_exist_spec)=='yes':
            thiscat = self[single_def]["_category"]
            curval = cat_mand_dic.get(thiscat,[])
            curval.append(single_def)
            cat_mand_dic[thiscat] = curval
        # now the unique items...
        # cif_core.dic throws us a curly one: the value of list_uniqueness is
        # not the same as the defined item for publ_body_label, so we have
        # to collect both together.  We assume a non-listed entry, which
        # is true for all current (May 2005) ddl1 dictionaries.
        if self[single_def].get(self.unique_spec,None)!=None:
            thiscat = self[single_def]["_category"]
            new_unique = self[single_def][self.unique_spec]
            uis = cat_unique_dic.get(thiscat,[])
            if single_def not in uis: uis.append(single_def)
            if new_unique not in uis: uis.append(new_unique)
            cat_unique_dic[thiscat] = uis

    [get_cat_info(a) for a in deflist] # apply the above function
    for cat in cat_mand_dic.keys():
        self[cat]["_category_mandatory.name"] = cat_mand_dic[cat]
    for cat in cat_unique_dic.keys():
        self[cat]["_category_key.name"] = cat_unique_dic[cat]

A dataname can appear in a file under a different name if it has been aliased. We create an alias table to speed up lookup. The table is indexed by true name, with a list of alternatives.

<Create alias table>= (<-U)
def create_alias_table(self):
    """Populate an alias table that we can look up when searching for a dataname"""
    all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]]
    self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases])

DDLm internally refers to data items by the category.object notation, with the twist that child categories of loops can have their objects appear in the parent category. So this table prepares a complete list of (cat,obj):dataname correspondences, as the implementation of parent-child requires looking up a table each time searching for children.

The recursive expand_base_table function returns a dictionary of (name,definition_id) pairs indexing the corresponding datanames. We must catch any keys and exclude them from this process, as they are allowed to have the same object_id as their parent key in the enclosing datablock and will overwrite the entry for the parent key if left in. We also note that the example dictionary allows these types of name collisions if an item is intended to be identical (e.g. _atom_site_aniso.type_symbol and atom_site.type_symbol), so we create a short list of possible alternative names for each (cat,obj) pair.

The create_nested_key_table stores information about which keys index child categories. This way applications can search for any loops containing these keys and expand packets for dREL accordingly.

<Create category/object table>= (<-U)
def create_cat_obj_table(self):
    """Populate a table indexed by (cat,obj) and returning the correct dataname"""
    base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \
                       for a in self.keys() if self[a].get('_definition.scope','Item')=='Item'])
    loopable = self.get_loopable_cats()
    loopers = [self.ddlm_immediate_children(a) for a in loopable]
    print('Loopable cats:' + repr(loopable))
    loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers]
    expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0])
    print("Expansion list:" + repr(expand_list))
    extra_table = {}   #for debugging we keep it separate from base_table until the end
    def expand_base_table(parent_cat,child_cats):
        extra_names = []
        # first deal with all the child categories
        for child_cat in child_cats:
          nn = []
          if child_cat in expand_list:  # a nested category: grab its names
            nn = expand_base_table(child_cat,expand_list[child_cat])
            # store child names
            extra_names += nn
          # add all child names to the table
          child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                         for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key']
          child_names += extra_names
          extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table]))
        # and the repeated ones get appended instead
        repeats = [a for a in child_names if a in extra_table]
        for obj,name in repeats:
            extra_table[(parent_cat,obj)] += [name]
        # and finally, add our own names to the return list
        child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \
                        for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key']
        return child_names
    [expand_base_table(parent,child) for parent,child in expand_list.items()]
    print('Expansion cat/obj values: ' + repr(extra_table))
    # append repeated ones
    non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table])
    repeats = [a for a in extra_table.keys() if a in base_table]
    base_table.update(non_repeats)
    for k in repeats:
        base_table[k] += extra_table[k]
    self.cat_obj_lookup_table = base_table
    self.loop_expand_list = expand_list

def get_loopable_cats(self):
    """A short utility function which returns a list of looped categories. This
    is preferred to a fixed attribute as that fixed attribute would need to be
    updated after any edits"""
    return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop']

def create_cat_key_table(self):
    """Create a utility table with a list of keys applicable to each category. A key is
    a compound key, that is, it is a list"""
    self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name",
        [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()])
    def collect_keys(parent_cat,child_cats):
            kk = []
            for child_cat in child_cats:
                if child_cat in self.loop_expand_list:
                    kk += collect_keys(child_cat)
                # add these keys to our list
                kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))]
            self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk
            return kk
    for k,v in self.loop_expand_list.items():
        collect_keys(k,v)
    print('Keys for categories' + repr(self.cat_key_table))

CIF Dictionaries use the square bracket notation to refer to the definition, as for CifFile objects, but the key is the definition itself, rather than the block name. So we have to create a lookup table. However, template dictionaries may not have a _definition.id, which means we have to revert to their blockname, so we use blockname as a default. We also completely ignore case, which is a bit liberal, as definitions themselves are case-sensitive. We catch duplicate definitions (e.g. as a result of incorrect merging).

If a definition is not found, we search any dictionaries that were imported in 'Full' mode. This means that definitions in the dictionary proper override anything in the imported dictionaries, as per definitions.

<Repurpose standard python methods>= (<-U)
def create_def_block_table(self):
    """ Create an internal table matching definition to block id """
    proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()]
    # now get the actual ids instead of blocks
    proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table])
    # remove non-definitions
    if self.diclang != "DDL1":
        top_blocks = list([a[0].lower() for a in self.get_roots()])
    else:
        top_blocks = ["on_this_dictionary"]
    # catch dodgy duplicates
    uniques = set([a[0] for a in proto_table])
    if len(uniques)<len(proto_table):
        def_names = list([a[0] for a in proto_table])
        dodgy = [a for a in def_names if def_names.count(a)>1]
        raise CifError('Duplicate definitions in dictionary:' + repr(dodgy))
    self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks])

def __getitem__(self,key):
    """Access a datablock by definition id, after the lookup has been created"""
    try:
        return super(CifDic,self).__getitem__(self.block_id_table[key.lower()])
    except AttributeError:   #block_id_table not present yet
        return super(CifDic,self).__getitem__(key)
    except KeyError: # key is missing
        try: # print(Definition for %s not found, reverting to CifFile' % key)
            return super(CifDic,self).__getitem__(key)
        except KeyError: # try imports
            return self.lookup_imports(key)

def __setitem__(self,key,value):
    """Add a new definition block"""
    super(CifDic,self).__setitem__(key,value)
    try:
        self.block_id_table[value['_definition.id']]=key
    except AttributeError:   #does not exist yet
        pass

def NewBlock(self,*args,**kwargs):
    newname = super(CifDic,self).NewBlock(*args,**kwargs)
    try:
        self.block_id_table[self[newname]['_definition.id']]=newname
    except AttributeError: #no block_id table
        pass
            
def __delitem__(self,key):
    """Remove a definition"""
    try:
        super(CifDic,self).__delitem__(self.block_id_table[key.lower()])
        del self.block_id_table[key.lower()]
    except (AttributeError,KeyError):   #block_id_table not present yet
        super(CifDic,self).__delitem__(key)
        return
    # fix other datastructures
    # cat_obj table

def keys(self):
    """Return all definitions"""
    try:
        return self.block_id_table.keys()
    except AttributeError:
        return super(CifDic,self).keys()

def has_key(self,key):
    return key in self

def __contains__(self,key):
    try:
        return key.lower() in self.block_id_table
    except AttributeError:
        return super(CifDic,self).__contains__(key)

def items(self):
    """Return (key,value) pairs"""
    return list([(a,self[a]) for a in self.keys()])

Any Starfile method that uses the square-bracket notation or build-in syntax (e.g. del) to access keys may fail if the set of keys it uses is not that provided by the keys() method above, as the object delegation using super() does not apply. As we have set up our methods above to 'fall through' to the underlying CifFile, the process of renaming may or may not have called our del method to remove the definition, so we check.

<Repurpose Starfile methods>= (<-U)
def unlock(self):
    """Allow overwriting of all definitions in this collection"""
    for a in self.keys():
        self[a].overwrite=True

def lock(self):
    """Disallow changes in definitions"""
    for a in self.keys():
        self[a].overwrite=False

def rename(self,oldname,newname,blockname_as_well=True):
    """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True,
    change the underlying blockname too."""
    if blockname_as_well:
        super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname)
        self.block_id_table[newname.lower()]=newname
        if oldname.lower() in self.block_id_table: #not removed
           del self.block_id_table[oldname.lower()]
    else:
        self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()]
        del self.block_id_table[oldname.lower()]
        return

Semantic information

<Obtaining semantic information from the dictionary>= (<-U)
<Operations with semantic children>
<Get category information>
<List all items in a category>

For convenience we provide ways of interrogating the semantic tree of categories. Note that if we are passed the top-level datablock, the semantic children are the syntactic children. An additional method finds the 'dangling' definitions, which are definitions that have no category definition present - these might be definitions added by this dictionary to categories found in other dictionaries.

<Operations with semantic children>= (<-U)
def get_root_category(self):
    """Get the single 'Head' category of this dictionary"""
    root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head']
    if len(root_cats)>1 or len(root_cats)==0:
        raise CifError("Cannot determine a unique Head category, got" % repr(root_cats))
    return root_cats[0]

def ddlm_immediate_children(self,catname):
    """Return a list of datanames for the immediate children of catname.  These are
    semantic children (i.e. based on _name.category_id), not structural children as
    in the case of StarFile.get_immediate_children"""

    straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()]
    return list(straight_children)

def ddlm_all_children(self,catname):
    """Return a list of all children, including the `catname`"""
    all_children = self.ddlm_immediate_children(catname)
    cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category']
    for c in cat_children:
        all_children.remove(c)
        all_children += self.ddlm_all_children(c)
    return all_children + [catname]

def is_semantic_child(self,parent,maybe_child):
    """Return true if `maybe_child` is a child of `parent`"""
    all_children = self.ddlm_all_children(parent)
    return maybe_child in all_children

def ddlm_danglers(self):
    """Return a list of definitions that do not have a category defined
    for them, or are children of an unattached category"""
    top_block = self.get_root_category()
    connected = set(self.ddlm_all_children(top_block))
    all_keys = set(self.keys())
    unconnected = all_keys - connected
    return list(unconnected)

def get_ddlm_parent(self,itemname):
    """Get the parent category of itemname"""
    parent = self[itemname].get('_name.category_id','')
    if parent == '':  # use the top block by default
        raise CifError("%s has no parent" % itemname)
    return parent

Some methods for interrogating categories for names.

<Get category information>= (<-U)
def expand_category_opt(self,name_list):
    """Return a list of all non-category items in a category or return the name
       if the name is not a category"""
    new_list = []
    for name in name_list:
      if self.get(name,{}).get('_definition.scope','Item') == 'Category':
        new_list += self.expand_category_opt([a for a in self.keys() if \
                 self[a].get('_name.category_id','').lower() == name.lower()])
      else:
        new_list.append(name)
    return new_list

def get_categories(self):
    """Return a list of category names"""
    return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category'])

This method was added to facilitate running dREL scripts, which treat certain variables as having attributes which all belong to a single category. We return only the extension in keeping with dREL syntax. If names_only is true, we return only the object part of the dataname. Note that sub categories are excluded. TODO: use cat-obj table for speed.

<List all items in a category>= (<-U)
def names_in_cat(self,cat,names_only=False):
    names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()]
    if not names_only:
        return list([a for a in names if self[a].get('_definition.scope','Item')=='Item'])
    else:
        return list([self[a]["_name.object_id"] for a in names])


DDLm introduces validity information in the enclosing datablock. It is a loop of scope, attribute values where the scope is one of dictionary (everywhere), category (whole category) and item (just the single definition). Validity can be mandatory, encouraged or not allowed. It only appears in the DDLm attributes dictionary, so this information is blank unless we are dealing with the DDLm dictionary.

<Parse DDLm validity information>= (<-U)
def ddlm_parse_valid(self):
    if "_dictionary_valid.application" not in self.master_block:
        return
    for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"):
        scope = getattr(scope_pack,"_dictionary_valid.application")
        valid_info = getattr(scope_pack,"_dictionary_valid.attributes")
        if scope[1] == "Mandatory":
            self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info)
        elif scope[1] == "Prohibited":
            self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info)

These methods were added when developing interactive editing tools, which allow shifting categories around.

<Definition manipulation methods>= (<-U)
<Changing and updating categories>
<Getting category information>

Changing a category name involves changing the _name.category_id in all children as well as the category definition itself and datablock names, then updating our internal structures.

<Changing and updating categories>= (<-U)
def change_category_name(self,oldname,newname):
    self.unlock()
    """Change the category name from [[oldname]] to [[newname]]"""
    if oldname not in self:
        raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname))
    if newname in self:
        raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname))
    child_defs = self.ddlm_immediate_children(oldname)
    self.rename(oldname,newname)   #NB no name integrity checks
    self[newname]['_name.object_id']=newname
    self[newname]['_definition.id']=newname
    for child_def in child_defs:
        self[child_def]['_name.category_id'] = newname
        if self[child_def].get('_definition.scope','Item')=='Item':
            newid = self.create_catobj_name(newname,self[child_def]['_name.object_id'])
            self[child_def]['_definition.id']=newid
            self.rename(child_def,newid[1:])  #no underscore at the beginning
    self.lock()

def create_catobj_name(self,cat,obj):
    """Combine category and object in approved fashion to create id"""
    return ('_'+cat+'.'+obj)

def change_category(self,itemname,catname):
    """Move itemname into catname, return new handle"""
    defid = self[itemname]
    if defid['_name.category_id'].lower()==catname.lower():
        print('Already in category, no change')
        return itemname
    if catname not in self:    #don't have it
        print('No such category %s' % catname)
        return itemname
    self.unlock()
    objid = defid['_name.object_id']
    defid['_name.category_id'] = catname
    newid = itemname # stays the same for categories
    if defid.get('_definition.scope','Item') == 'Item':
        newid = self.create_catobj_name(catname,objid)
        defid['_definition.id']= newid
        self.rename(itemname,newid)
    self.set_parent(catname,newid)
    self.lock()
    return newid

def change_name(self,one_def,newobj):
    """Change the object_id of one_def to newobj. This is not used for
    categories, but can be used for dictionaries"""
    if '_dictionary.title' not in self[one_def]:  #a dictionary block
        newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj)
        self.unlock()
        self.rename(one_def,newid)
        self[newid]['_definition.id']=newid
        self[newid]['_name.object_id']=newobj
    else:
        self.unlock()
        newid = newobj
        self.rename(one_def,newobj)
        self[newid]['_dictionary.title'] = newid
    self.lock()
    return newid

# Note that our semantic parent is given by catparent, but our syntactic parent is
# always just the root block
def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False):
    """Add a new category to the dictionary with name [[catname]].
       If [[catparent]] is None, the category will be a child of
       the topmost 'Head' category or else the top data block. If
       [[is_loop]] is false, a Set category is created. If [[allow_dangler]]
       is true, the parent category does not have to exist."""
    if catname in self:
        raise CifError('Attempt to add existing category %s' % catname)
    self.unlock()
    syntactic_root = self.get_roots()[0][0]
    if catparent is None:
        semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head']
        if len(semantic_root)>0:
            semantic_root = semantic_root[0]
        else:
            semantic_root = syntactic_root
    else:
        semantic_root = catparent
    realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root)
    self.block_id_table[catname.lower()]=realname
    self[catname]['_name.object_id'] = catname
    if not allow_dangler or catparent is None:
        self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id']
    else:
        self[catname]['_name.category_id'] = catparent
    self[catname]['_definition.id'] = catname
    self[catname]['_definition.scope'] = 'Category'
    if is_loop:
        self[catname]['_definition.class'] = 'Loop'
    else:
        self[catname]['_definition.class'] = 'Set'
    self[catname]['_description.text'] = 'No definition provided'
    self.lock()
    return catname

def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False):
    """Add itemname to category [[catparent]]. If itemname contains periods,
    all text before the final period is ignored. If [[allow_dangler]] is True,
    no check for a parent category is made."""
    self.unlock()
    if '.' in itemname:
        objname = itemname.split('.')[-1]
    else:
        objname = itemname
    objname = objname.strip('_')
    if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'):
        raise CifError('No category %s in dictionary' % catparent)
    fullname = '_'+catparent.lower()+'.'+objname
    print('New name: %s' % fullname)
    syntactic_root = self.get_roots()[0][0]
    realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change
    # update our dictionary structures
    self.block_id_table[fullname]=realname
    self[fullname]['_definition.id']=fullname
    self[fullname]['_name.object_id']=objname
    self[fullname]['_name.category_id']=catparent
    self[fullname]['_definition.class']='Datum'
    self[fullname]['_description.text']=def_text

def remove_definition(self,defname):
    """Remove a definition from the dictionary."""
    if defname not in self:
        return
    if self[defname].get('_definition.scope')=='Category':
        children = self.ddlm_immediate_children(defname)
        [self.remove_definition(a) for a in children]
        cat_id = self[defname]['_definition.id'].lower()
    del self[defname]

The DDLm architecture identifies a data definition by (category,object) which identifies a unique textual dataname appearing in the data file. Because of category joins when nested categories are looped, a single dataname may be referred to by several different category identifiers. The get_name_by_cat_obj routine will search all loop categories within the given category hierarchy until it finds the appropriate one.

If give_default is True, the default construction '_catid.objid' is returned if nothin is found in the dictionary. This should only be used during testing as the lack of a corresponding definition in the dictionary means that it is unlikely that anything sensible will result.

<Getting category information>= (<-U)
def get_cat_obj(self,name):
    """Return (cat,obj) tuple. [[name]] must contain only a single period"""
    cat,obj = name.split('.')
    return (cat.strip('_'),obj)

def get_name_by_cat_obj(self,category,object,give_default=False):
    """Return the dataname corresponding to the given category and object"""
    if category[0] == '_':    #accidentally left in
       true_cat = category[1:].lower()
    else:
       true_cat = category.lower()
    try:
        return self.cat_obj_lookup_table[(true_cat,object.lower())][0]
    except KeyError:
        if give_default:
           return '_'+true_cat+'.'+object
    raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object))

Dictionaries have the category-wide information in the category definition area. We do not need to fill all of this in if we are not planning on running dREL.

<Add category information>= (<-U)
def add_category_info(self,full=True):
    if self.diclang == "DDLm":
        <Create category parent table>
        if full:
            <Create key hierarchy>
    else:
        self.parent_lookup = {}
        self.key_equivs = {}

This method was added for DDLm support. We are passed a category and a value, and must find a packet which has a matching key. We use the keyname as a way of finding the loop.

<Return a single packet by key>= (<-U)
def get_key_pack(self,category,value,data):
    keyname = self[category][self.unique_spec]
    onepack = data.GetPackKey(keyname,value)
    return onepack

For help in validation we create a lookup table which matches a category to its ultimate parent. This allows us to quickly check whether or not a data item is allowed to be co-looped with other data items. Note that we may have to draw in external dictionaries to do this properly, but to avoid holding the whole lot in memory, we simply stop searching up the parent tree if the parent block is missing.

<Create category parent table>= (<-U)
catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category']
looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop']
self.parent_lookup = {}
for one_cat in looped_cats:
    parent_cat = one_cat
    parent_def = self[parent_cat]
    next_up = parent_def['_name.category_id'].lower()
    while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop':
        parent_def = self[next_up]
        parent_cat = next_up
        next_up = parent_def['_name.category_id'].lower()
    self.parent_lookup[one_cat] = parent_cat

The key hierarchy. This is in many ways reinventing the parent-child relationships that are laid out in DDL2 definitions. In order to access a particular packet using multiple datanames as compound keys, we need to be aware of which keys are related to which other keys. Relationships are always made explicit via the '_name.linked_item_id' attribute in DDLm, which always points to the parent. This is always present, even though it may be often be inferred using Loop category parent/child relationships, as compound keys in categories might introduce ambiguity.

This datastructure allows us to provide a key, and obtain a list of equivalent keys, being all those above it in the hierarchy, that is, which it can be replaced by. If we are not doing dREL, we can afford to skip this.

<Create key hierarchy>= (<-U)
self.key_equivs = {}
for one_cat in looped_cats:   #follow them up
    lower_keys = listify(self[one_cat]['_category_key.name'])
    start_keys = lower_keys[:]
    while len(lower_keys)>0:
        this_cat = self[lower_keys[0]]['_name.category_id']
        parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a]
        #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent)))
        if len(parent)>1:
            raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent)))
        if len(parent)==0: break
        parent = parent[0]
        parent_keys = listify(self[parent]['_category_key.name'])
        linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys]
        # sanity check
        if set(parent_keys) != set(linked_keys):
            raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys))
            # now add in our information
        for parent,child in zip(linked_keys,start_keys):
            self.key_equivs[child] = self.key_equivs.get(child,[])+[parent]
        lower_keys = linked_keys  #preserves order of start keys

SectionDDLm functionality

DDLm is a far more complex dictionary standard than DDL2. We are able to import definitions in two modes, "Full" and "Contents". "Contents" simply copies the attributes found in the target definition, and is useful as a templating mechanism for commonly-seen attributes. "Full" brings in the entire definition block and all child definitions, and is useful for including entire dictionaries. As a special case, if we import a 'Head' definition into a 'Head' definition, we actually make all non-Head categories of the imported dictionary into child categories of the importing dictionary 'Head' category, and the imported 'Head' category disappears.

``Contents'' and ``Full'' modes are implemented dynamically, that is, when the value of an attribute is requested the dictionary resolves imports.

The merging method of the StarFile object is purely syntactic and so does not understand DDLm relationships. We add all blocks as the children of the top-level dictionary block, and then in the case of a new 'Head' block we simply reparent the immediate semantic children of the old 'Head' block.

<Perform DDLm imports>= (<-U)
<Get import information>
<Heavy import routine>
<Light import routine>
<Lookup imports for whole dictionary>

<Get import information>= (<-U)
def obtain_imports(self,import_mode,heavy=False):
    """Collate import information"""
    self._import_dics = []
    import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]])
    print('Import mode %s applied to following frames' % import_mode)
    print(str([a[0] for a in import_frames]))
    if import_mode != 'All':
       for i in range(len(import_frames)):
            import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()])
       print('Importing following frames in mode %s' % import_mode)
       print(str(import_frames))
    #resolve all references
    for parent_block,import_list in import_frames:
      for import_ref in import_list:
        file_loc = import_ref["file"]
        full_uri = self.resolve_path(file_loc)
        if full_uri not in self.template_cache:
            dic_as_cif = CifFile(full_uri,grammar=self.grammar)
            self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False)  #this will recurse internal imports
            print('Added %s to cached dictionaries' % full_uri)
        import_from = self.template_cache[full_uri]
        dupl = import_ref.get('dupl','Exit')
        miss = import_ref.get('miss','Exit')
        target_key = import_ref["save"]
        try:
            import_target = import_from[target_key]
        except KeyError:
            if miss == 'Exit':
               raise CifError('Import frame %s not found in %s' % (target_key,full_uri))
            else: continue
        # now import appropriately
        mode = import_ref.get("mode",'Contents').lower()
        if target_key in self and mode=='full':  #so blockname will be duplicated
            if dupl == 'Exit':
                raise CifError('Import frame %s already in dictionary' % target_key)
            elif dupl == 'Ignore':
                continue
        if heavy:
            self.ddlm_import(parent_block,import_from,import_target,target_key,mode)
        else:
            self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode)
            

The original way of doing imports was to completely merge the information from the imported file. This is slightly more efficient if information about import statements is not required.

<Heavy import routine>= (<-U)
def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'):
        """Import other dictionaries in place"""
        if mode == 'contents':   #merge attributes only
            self[parent_block].merge(import_target)
        elif mode =="full":
            # Do the syntactic merge
            syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting
            from_cat_head = import_target['_name.object_id']
            child_frames = import_from.ddlm_all_children(from_cat_head)
             # Check for Head merging Head
            if self[parent_block].get('_definition.class','Datum')=='Head' and \
               import_target.get('_definition.class','Datum')=='Head':
                  head_to_head = True
            else:
                  head_to_head = False
                  child_frames.remove(from_cat_head)
            # As we are in syntax land, we call the CifFile methods
            child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames])
            child_blocks = super(CifDic,import_from).makebc(child_blocks)
            # Prune out any datablocks that have identical definitions
            from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()])
            double_defs = list([b for b in from_defs.items() if self.has_key(b[1])])
            print('Definitions for %s superseded' % repr(double_defs))
            for b in double_defs:
                del child_blocks[b[0]]
            super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head)      #
            print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames),
               mode,len(self)))
            # Now the semantic merge
            # First expand our definition <-> blockname tree
            self.create_def_block_table()
            merging_cat = self[parent_block]['_name.object_id']      #new parent
            if head_to_head:
                child_frames = self.ddlm_immediate_children(from_cat_head)    #old children
                #the new parent is the importing category for all old children
                for f in child_frames:
                    self[f].overwrite = True
                    self[f]['_name.category_id'] = merging_cat
                    self[f].overwrite = False
                # remove the old head
                del self[from_cat_head]
                print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat))
            else:  #imported category is only child
                from_frame = import_from[target_key]['_definition.id'] #so we can find it
                child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0]
                self[child_frame]['_name.category_id'] = merging_cat
                print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat))
        # it will never happen again...
        del self[parent_block]["_import.get"]

def resolve_path(self,file_loc):
    url_comps = urlparse(file_loc)
    if url_comps[0]: return file_loc    #already full URI
    new_url = urljoin(self.my_uri,file_loc)
    #print("Transformed %s to %s for import " % (file_loc,new_url))
    return new_url

It is possible to not perform imports at reading time, but simply to register the links and resolve the imports if and when a definition is accessed.

<Light import routine>= (<-U)
def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'):
    """Register the imported dictionaries but do not alter any definitions. `parent_block`
    contains the id of the block that is importing. `import_target` is the block that
    should be imported. `import_from` is the CifFile that contains the definitions."""
    if mode == 'contents':   #merge attributes only
        self[parent_block].add_dict_cache(file_loc,import_from)
    elif mode =="full":
         # Check for Head merging Head
        if self[parent_block].get('_definition.class','Datum')=='Head' and \
           import_target.get('_definition.class','Datum')=='Head':
               head_to_head = True
        else:
               head_to_head = False
        # Figure out the actual definition ID
        head_id = import_target["_definition.id"]
        # Adjust parent information
        merging_cat = self[parent_block]['_name.object_id']
        from_cat_head = import_target['_name.object_id']
        if not head_to_head:   # imported category is only child
            import_target["_name.category_id"]=merging_cat
        self._import_dics = [(import_from,head_id)]+self._import_dics #prepend

Lightweight importation simply records the import information without performing the import, and then when keys are accessed it checks through the imported dictionaries. The semantics are such that the last dictionary imported should be the first dictionary checked, as imports overwrite any definitions in preceding imports.

<Lookup imports for whole dictionary>= (<-U)
def lookup_imports(self,key):
    """Check the list of imported dictionaries for this definition"""
    for one_dic,head_def in self._import_dics:
        from_cat_head = one_dic[head_def]['_name.object_id']
        possible_keys = one_dic.ddlm_all_children(from_cat_head)
        if key in possible_keys:
            return one_dic[key]
    raise KeyError("%s not found in import dictionaries" % key)
    

Merging a whole dictionary. A dictionary is a collection of categories for the purposes of merging (later we may want to keep some audit information).

<Add another DDLM dictionary>=
def get_whole_dict(self,source_dict,on_dupl,on_miss):
    for source_cat in source_dict.get_categories():
        self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)

Merging a single category. If this category does not exist, we simply add the category block and any members of the category. If it does exist, we use the 'on_dupl' flag to resolve our behaviour, either ignoring, replacing, or dying a horrible death.

If the specified block is missing in the external dictionary, we either skip it or die a horrible death.

<Add an external DDLM category>=
def get_one_cat(self,source_dict,source_cat,on_dupl,on_miss):
    ext_cat = source_dict.get(source_cat,"")
    this_cat = self.get(source_cat,"")
    print("Adding category %s" % source_cat)
    if not ext_cat:
        if on_miss == "Ignore":
           pass
        else:
           raise CifError("Missing category %s" % source_cat)
    else:
        all_ext_defns = source_dict.keys()
        cat_list = filter(lambda a:source_dict[a].get("_name.category_id","").lower()==source_cat.lower(),
                           all_ext_defns)
        print("Items: %s" % repr(cat_list))
        if this_cat:     # The category block itself is duplicated
            if on_dupl=="Ignore":
                pass
            elif on_dupl == "Exit":
                raise CifError("Duplicate category %s" % source_cat)
            else:
                self[source_cat] = ext_cat
        else:
            self[source_cat] = ext_cat
        # now do all member definitions
        for cat_defn in cat_list:
            self.add_one_defn(source_dict,cat_defn,on_dupl)

def add_one_defn(self,source_dict,cat_defn,on_dupl):
    if cat_defn in self:
       if on_dupl == "Ignore": pass
       elif on_dupl == "Exit":
               raise CifError("Duplicate definition %s" % cat_defn)
       else: self[cat_defn] = source_dict[cat_defn]
    else: self[cat_defn] = source_dict[cat_defn]
    print("    "+cat_defn)

This actually follows the children of the category down. We get a list of child categories and add them one by one recursively.

<Add an external DDLM category with children>=
def get_one_cat_with_children(self,source_dict,source_cat,on_dupl,on_miss):
    self.get_one_cat(source_dict,source_cat,on_dupl,on_miss)
    child_cats = [a for a in source_dict.get_categories() if source_dict[a]["_category.parent_id"]==source_dict[source_cat]["_definition.id"]]
    for child_cat in child_cats: self.get_one_cat(source_dict,child_cat,on_dupl,on_miss)

Importing into definitions. We are adjusting only the attributes of a single definition.

<Add attributes to definitions>=
def import_attributes(self,mykey,source_dict,source_def,on_dupl,on_miss):
    # process missing
    if source_def not in source_dict:
        if on_miss == 'Exit':
            raise CifError('Missing definition for import %s' % source_def)
        else: return          #nothing else to do
    # now do the import
    print('Adding attributes from %s to %s' % (source_def,mykey))
    self[mykey].merge(source_dict[source_def],mode='replace',match_att= \
          ['_definition.id','_name.category_id','_name.object_id'])

def import_loop(self,mykey,source_dict,source_def,loop_name,on_miss):
    # process imssing
    if source_def not in source_dict:
        if on_miss == 'Exit':
            raise CifError('Missing definition for import %s' % source_def)
        else: return          #nothing else to do
    print('Adding %s attributes from %s to %s' % (loop_name,source_def,mykey))
    state_loop = source_dict[source_def].GetLoop(loop_name)
    self[mykey].insert_loop(state_loop)

Validation

A DDL provides lots of information that can be used to check a datafile or dictionary for consistency. Currently, the DDL-appropriate routines are installed at initialisation time.

<Validation routines>= (<-U)
<Install validation functions>
<Item-level validation>
<Loop-level validation>
<Cross-item validation>
<Block-level validation>
<Run validation tests>
<Optimisation on/off>

Each dictionary has a set of validation functions associated with it based on the information contained in the DDL. The following function is called on initialisation.

<Install validation functions>= (<-U)
def install_validation_functions(self):
    """Install the DDL-appropriate validation checks"""
    if self.diclang != 'DDLm':
        # functions which check conformance
        self.item_validation_funs = [
            self.validate_item_type,
            self.validate_item_esd,
            self.validate_item_enum,
            self.validate_enum_range,
            self.validate_looping
        ]
        # functions checking loop values
        self.loop_validation_funs = [
            self.validate_loop_membership,
            self.validate_loop_key,
            self.validate_loop_references
        ]
        # where we need to look at other values
        self.global_validation_funs = [
            self.validate_exclusion,
            self.validate_parent,
            self.validate_child,
            self.validate_dependents,
            self.validate_uniqueness
        ]
        # where only a full block will do
        self.block_validation_funs = [
            self.validate_mandatory_category
        ]
        # removal is quicker with special checks
        self.global_remove_validation_funs = [
            self.validate_remove_parent_child
        ]
    elif self.diclang == 'DDLm':
        self.item_validation_funs = [
            self.validate_item_enum,
            self.validate_item_esd_ddlm,
            ]
        self.loop_validation_funs = [
            self.validate_looping_ddlm,
            self.validate_loop_key_ddlm,
            self.validate_loop_membership
            ]
        self.global_validation_funs = []
        self.block_validation_funs = [
            self.check_mandatory_items,
            self.check_prohibited_items
            ]
        self.global_remove_validation_funs = []
    self.optimize = False        # default value
    self.done_parents = []
    self.done_children = []
    self.done_keys = []

Some things are independent of where an item occurs in the file; we check those things here. All functions are expected to return a dictionary with at least one key: "result", as well as optional keys depending on the type of error.

<Item-level validation>= (<-U)
<Validate the type of an item (DDL1/2)>
<Validate the type of an item (DDLm)>
<Validate esd presence>
<Validate enumeration range>
<Validate an enumeration>
<Validate looping properties>

Validate the type of an item

We use the expressions for type that we have available to check that the type of the item passed to us matches up. We may have a list of items, so be aware of that. We define a tiny matching function so that we do not have to do a double match to catch the non-matching case, which returns None and thus an attribute error if we immediately try to get a group.

Note also that none of the extant dictionaries use the 'none' or 'seq' values for type. The seq value in particular would complicate matters.

<Validate the type of an item (DDL1/2)>= (<-U)
def validate_item_type(self,item_name,item_value):
    def mymatch(m,a):
        res = m.match(a)
        if res != None: return res.group()
        else: return ""
    target_type = self[item_name].get(self.type_spec)
    if target_type == None:          # e.g. a category definition
        return {"result":True}                  # not restricted in any way
    matchexpr = self.typedic[target_type]
    item_values = listify(item_value)
    #for item in item_values:
        #print("Type match " + item_name + " " + item + ":",)
    #skip dots and question marks
    check_all = [a for a in item_values if a !="." and a != "?"]
    check_all = [a for a in check_all if mymatch(matchexpr,a) != a]
    if len(check_all)>0: return {"result":False,"bad_values":check_all}
    else: return {"result":True}

DDLm types are far more nuanced, and we are not provided with prepacked regular expressions in order to check them. We have identified the following checks: that the type is in the correct container; that the contents are as described in _type.contents; that 'State' purpose datanames have a list of enumerated states; that 'Link' purpose datanames have '_name.linked_item_id' in the same definition; that 'SU' purpose datanames also has the above.

<Validate the type of an item (DDLm)>= (<-U)
def decide(self,result_list):
    """Construct the return list"""
    if len(result_list)==0:
           return {"result":True}
    else:
           return {"result":False,"bad_values":result_list}

def validate_item_container(self, item_name,item_value):
    container_type = self[item_name]['_type.container']
    item_values = listify(item_value)
    if container_type == 'Single':
       okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))]
       return decide(okcheck)
    if container_type in ('Multiple','List'):
       okcheck = [a for a in item_values if not isinstance(a,StarList)]
       return decide(okcheck)
    if container_type == 'Array':    #A list with numerical values
       okcheck = [a for a in item_values if not isinstance(a,StarList)]
       first_check = decide(okcheck)
       if not first_check['result']: return first_check
       #num_check = [a for a in item_values if len([b for b in a if not isinstance

Esds. Numbers are sometimes not allowed to have esds appended. The default is that esds are not OK, and we should also skip anything that has character type, as that is automatically not a candidate for esds.

Note that we make use of the primitive type here; there are some cases where a string type looks like an esd, so unless we know we have a number we ignore these cases.

DDLm requires an esd if _type.purpose is Measurand, and should not have an esd if _type.purpose is Number.

<Validate esd presence>= (<-U)
def validate_item_esd(self,item_name,item_value):
    if self[item_name].get(self.primitive_type) != 'numb':
        return {"result":None}
    can_esd = self[item_name].get(self.esd_spec,"none") == "esd"
    if can_esd: return {"result":True}         #must be OK!
    item_values = listify(item_value)
    check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None])
    if len(check_all)>0: return {"result":False,"bad_values":check_all}
    return {"result":True}

def validate_item_esd_ddlm(self,item_name,item_value):
    if self[item_name].get('self.primitive_type') not in \
    ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']:
        return {"result":None}
    can_esd = True
    if self[item_name].get('_type.purpose') != 'Measurand':
        can_esd = False
    item_values = listify(item_value)
    check_all = [get_number_with_esd(a)[1] for a in item_values]
    check_all = [v for v in check_all if (can_esd and v == None) or \
             (not can_esd and v != None)]
    if len(check_all)>0: return {"result":False,"bad_values":check_all}
    return {"result":True}

Enumeration ranges. Our dictionary has been prepared as for a DDL2 dictionary, where loops are used to specify closed or open ranges: if an entry exists where maximum and minimum values are equal, this means that this value is included in the range; otherwise, ranges are open. Our value is already numerical.

<Validate enumeration range>= (<-U)
def validate_enum_range(self,item_name,item_value):
    if "_item_range.minimum" not in self[item_name] and \
       "_item_range.maximum" not in self[item_name]:
        return {"result":None}
    minvals = self[item_name].get("_item_range.minimum",default = ["."])
    maxvals = self[item_name].get("_item_range.maximum",default = ["."])
    def makefloat(a):
        if a == ".": return a
        else: return float(a)
    maxvals = map(makefloat, maxvals)
    minvals = map(makefloat, minvals)
    rangelist = list(zip(minvals,maxvals))
    item_values = listify(item_value)
    def map_check(rangelist,item_value):
        if item_value == "?" or item_value == ".": return True
        iv,esd = get_number_with_esd(item_value)
        if iv==None: return None  #shouldn't happen as is numb type
        for lower,upper in rangelist:
            #check the minima
            if lower == ".": lower = iv - 1
            if upper == ".": upper = iv + 1
            if iv > lower and iv < upper: return True
            if upper == lower and iv == upper: return True
        # debug
        # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper))
        return False
    check_all = [a for a in item_values if map_check(rangelist,a) != True]
    if len(check_all)>0: return {"result":False,"bad_values":check_all}
    else: return {"result":True}

Note that we must make a copy of the enum list, otherwise when we add in our ? and . they will modify the Cif in place, very sneakily, and next time we have a loop length check, e.g. in writing out, we will probably have a mismatch.

<Validate an enumeration>= (<-U)
def validate_item_enum(self,item_name,item_value):
    try:
        enum_list = self[item_name][self.enum_spec][:]
    except KeyError:
        return {"result":None}
    enum_list.append(".")   #default value
    enum_list.append("?")   #unknown
    item_values = listify(item_value)
    #print("Enum check: {!r} in {!r}".format(item_values, enum_list))
    check_all = [a for a in item_values if a not in enum_list]
    if len(check_all)>0: return {"result":False,"bad_values":check_all}
    else: return {"result":True}

Check that something can be looped. For DDL1 we have yes, no and both, For DDL2 there is no explicit restriction on looping beyond membership in a category. Note that the DDL1 language specifies a default value of 'no' for this item, so when not explicitly allowed by the dictionary, listing is prohibited. In DDLm, only members of 'Loop' categories allow looping. As we transition the whole setup to DDLm-type data structures, the two calls below will merge and move to the looping checks rather than the single item checks.

<Validate looping properties>= (<-U)
def validate_looping(self,item_name,item_value):
    try:
        must_loop = self[item_name][self.must_loop_spec]
    except KeyError:
        return {"result":None}
    if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped
        return {"result":False}      #this could be triggered
    if must_loop == 'no' and not isinstance(item_value,(unicode,str)):
        return {"result":False}
    return {"result":True}

def validate_looping_ddlm(self,loop_names):
    """Check that all names are loopable"""
    truly_loopy = self.get_final_cats(loop_names)
    if len(truly_loopy)<len(loop_names):  #some are bad
        categories = [(a,self[a][self.cat_spec].lower()) for a in loop_names]
        not_looped = [a[0] for a in categories if a[1] not in self.parent_lookup.keys()]
        return {"result":False,"bad_items":not_looped}
    return {"result":True}

And some things are related to the group structure. Note that these functions do not require knowledge of the item values.

<Loop-level validation>= (<-U)
<Validate loop membership>
<Validate loop key>
<Validate loop key DDLm>
<Validate loop mandatory items>
<Get alternative item names>

Loop membership. The most common constraints on a loop are that all items are from the same category, and that loops of a certain category must contain a certain key to be valid. The latter test should be performed after the former test.

DDLm allows nested loop categories, so an item from a child category can appear in a parent category loop if both are from 'Loop' categories.

<Validate loop membership>= (<-U)
def validate_loop_membership(self,loop_names):
    final_cat = self.get_final_cats(loop_names)
    bad_items =  [a for a in final_cat if a != final_cat[0]]
    if len(bad_items)>0:
        return {"result":False,"bad_items":bad_items}
    else: return {"result":True}

def get_final_cats(self,loop_names):
    """Return a list of the uppermost parent categories for the loop_names. Names
    that are not from loopable categories are ignored."""
    try:
        categories = [self[a][self.cat_spec].lower() for a in loop_names]
    except KeyError:       #category_id is mandatory
        raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0]))
    truly_looped = [a for a in categories if a in self.parent_lookup.keys()]
    return [self.parent_lookup[a] for a in truly_looped]

The items specified by _list_mandatory (DDL1) must be present in a loop containing items of a given category (and it follows that only one loop in a given data block is available for any category containing such an item). This has been explicitly described as a key in DDL2. In DDLm, any key from a parent looped category is acceptable as well as the key of the given category itself.

<Validate loop key>= (<-U)
def validate_loop_key(self,loop_names):
    category = self[loop_names[0]][self.cat_spec]
    # find any unique values which must be present
    key_spec = self[category].get(self.key_spec,[])
    for names_to_check in key_spec:
        if isinstance(names_to_check,unicode):   #only one
            names_to_check = [names_to_check]
        for loop_key in names_to_check:
            if loop_key not in loop_names:
                #is this one of those dang implicit items?
                if self[loop_key].get(self.must_exist_spec,None) == "implicit":
                    continue          #it is virtually there...
                alternates = self.get_alternates(loop_key)
                if alternates == []:
                    return {"result":False,"bad_items":loop_key}
                for alt_names in alternates:
                    alt = [a for a in alt_names if a in loop_names]
                    if len(alt) == 0:
                        return {"result":False,"bad_items":loop_key}  # no alternates
    return {"result":True}

Validating keys in DDLm. We move everything to the uppermost parent category, and then lookup what keys can be used. If any of these are present, we are happy. This might miss some subtleties in mixed or unmixed loops?

<Validate loop key DDLm>= (<-U)
def validate_loop_key_ddlm(self,loop_names):
    """Make sure at least one of the necessary keys are available"""
    final_cats = self.get_final_cats(loop_names)
    if len(final_cats)>0:
        poss_keys = self.cat_key_table[final_cats[0]][0] # 
        found_keys = [a for a in poss_keys if a in loop_names]
        if len(found_keys)>0:
            return {"result":True}
        else:
            return {"result":False,"bad_items":poss_keys}
    else:
        return {"result":True}

The _list_reference value specifies data names which must co-occur with the defined data name. We check that this is indeed the case for all items in the loop. We trace through alternate values as well. In DDL1 dictionaries, a name terminating with an underscore indicates that any(?) corresponding name is suitable.

<Validate loop mandatory items>= (<-U)
def validate_loop_references(self,loop_names):
    must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names]
    must_haves = [a for a in must_haves if a != None]
    # build a flat list.  For efficiency we don't remove duplicates,as
    # we expect no more than the order of 10 or 20 looped names.
    def flat_func(a,b):
        if isinstance(b,unicode):
           a.append(b)       #single name
        else:
           a.extend(b)       #list of names
        return a
    flat_mh = []
    [flat_func(flat_mh,a) for a in must_haves]
    group_mh = filter(lambda a:a[-1]=="_",flat_mh)
    single_mh = filter(lambda a:a[-1]!="_",flat_mh)
    res = [a for a in single_mh if a not in loop_names]
    def check_gr(s_item, name_list):
        nl = map(lambda a:a[:len(s_item)],name_list)
        if s_item in nl: return True
        return False
    res_g = [a for a in group_mh if check_gr(a,loop_names)]
    if len(res) == 0 and len(res_g) == 0: return {"result":True}
    # construct alternate list
    alternates = map(lambda a: (a,self.get_alternates(a)),res)
    alternates = [a for a in alternates if a[1] != []]
    # next line purely for error reporting
    missing_alts = [a[0] for a in alternates if a[1] == []]
    if len(alternates) != len(res):
       return {"result":False,"bad_items":missing_alts}   #short cut; at least one
                                                   #doesn't have an altern
    #loop over alternates
    for orig_name,alt_names in alternates:
         alt = [a for a in alt_names if a in loop_names]
         if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates
    return {"result":True}        #found alternates

A utility function to return a list of alternate names given a main name. In DDL2 we have to deal with aliases. Each aliased item appears in our normalised dictionary independently, so there is no need to resolve aliases when looking up a data name. However, the original definition using DDL2-type names is simply copied to this aliased name during normalisation, so all references to other item names (e.g. _item_dependent) have to be resolved using the present function.

These aliases are returned in any case, so if we had a data file which mixed DDL1 and DDL2 style names, it may turn out to be valid, and what's more, we wouldn't necessarily detect an error if a data name and its alias were present - need to ponder this.

The exclusive_only option will only return items which must not co-exist with the item name in the same datablock. This includes aliases, and allows us to do a check that items and their aliases are not present at the same time in a data file.

<Get alternative item names>= (<-U)
def get_alternates(self,main_name,exclusive_only=False):
    alternates = self[main_name].get(self.related_func,None)
    alt_names = []
    if alternates != None:
        alt_names =  self[main_name].get(self.related_item,None)
        if isinstance(alt_names,unicode):
            alt_names = [alt_names]
            alternates = [alternates]
        together = zip(alt_names,alternates)
        if exclusive_only:
            alt_names = [a for a in together if a[1]=="alternate_exclusive" \
                                         or a[1]=="replace"]
        else:
            alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"]
        alt_names = list([a[0] for a in alt_names])
    # now do the alias thing
    alias_names = listify(self[main_name].get("_item_aliases.alias_name",[]))
    alt_names.extend(alias_names)
    # print("Alternates for {}: {!r}".format(main_name, alt_names))
    return alt_names

Some checks require access to the entire data block. These functions take both a provisional dictionary and a global dictionary; the provisional dictionary includes items which will go into the dictionary together with the current item, and the global dictionary includes items which apply to all data blocks (this is for validation of DDL1/2 dictionaries).

<Cross-item validation>= (<-U)
<Validate exclusion rules>
<Validate parent child relations>
<Validate presence of dependents>
<Validate list uniqueness>

DDL2 dictionaries introduce the "alternate exclusive" category for related items. We also unilaterally include items listed in aliases as acting in this way.

<Validate exclusion rules>= (<-U)
def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}):
   alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)]
   item_name_list = [a.lower() for a in whole_block.keys()]
   item_name_list.extend([a.lower() for a in provisional_items.keys()])
   bad = [a for a in alternates if a in item_name_list]
   if len(bad)>0:
       print("Bad: %s, alternates %s" % (repr(bad),repr(alternates)))
       return {"result":False,"bad_items":bad}
   else: return {"result":True}

When validating parent/child relations, we check the parent link to the children, and separately check that parents exist for any children present. Switching on optimisation will remove the redundancy in this procedure, but only if no changes are made to the relevant data items between the two checks.

It appears that DDL2 dictionaries allow parents to be absent if children take only unspecified values (i.e. dot or question mark). We catch this case.

The provisional items dictionary includes items that are going to be included with the present item (in a single loop structure) so the philosophy of inclusion must be all or nothing.

When validating DDL2 dictionaries themselves, we are allowed access to other definition blocks in order to resolve parent-child pointers. We will be able to find these save frames inside the globals dictionary (they will in this case be collected inside a CifBlock object).

When removing, we look at the item to make sure that no child items require it to be present.

<Validate parent child relations>= (<-U)
# validate that parent exists and contains matching values
def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}):
    parent_item = self[item_name].get(self.parent_spec)
    if not parent_item: return {"result":None}   #no parent specified
    if isinstance(parent_item,list):
        parent_item = parent_item[0]
    if self.optimize:
        if parent_item in self.done_parents:
            return {"result":None}
        else:
            self.done_parents.append(parent_item)
            print("Done parents %s" % repr(self.done_parents))
    # initialise parent/child values
    if isinstance(item_value,unicode):
        child_values = [item_value]
    else: child_values = item_value[:]    #copy for safety
    # track down the parent
    # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block))
    # if globals contains the parent values, we are doing a DDL2 dictionary, and so
    # we have collected all parent values into the global block - so no need to search
    # for them elsewhere.
    # print("Looking for {!r}".format(parent_item))
    parent_values = globals.get(parent_item)
    if not parent_values:
        parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
    if not parent_values:
        # go for alternates
        namespace = whole_block.keys()
        namespace.extend(provisional_items.keys())
        namespace.extend(globals.keys())
        alt_names = filter_present(self.get_alternates(parent_item),namespace)
        if len(alt_names) == 0:
            if len([a for a in child_values if a != "." and a != "?"])>0:
                return {"result":False,"parent":parent_item}#no parent available -> error
            else:
                return {"result":None}       #maybe True is more appropriate??
        parent_item = alt_names[0]           #should never be more than one??
        parent_values = provisional_items.get(parent_item,whole_block.get(parent_item))
        if not parent_values:   # check global block
            parent_values = globals.get(parent_item)
    if isinstance(parent_values,unicode):
        parent_values = [parent_values]
    #print("Checking parent %s against %s, values %r/%r" % (parent_item,
    #                                          item_name, parent_values, child_values))
    missing = self.check_parent_child(parent_values,child_values)
    if len(missing) > 0:
        return {"result":False,"bad_values":missing,"parent":parent_item}
    return {"result":True}

def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}):
    try:
        child_items = self[item_name][self.child_spec][:]  #copy
    except KeyError:
        return {"result":None}    #not relevant
    # special case for dictionaries  -> we check parents of children only
    if item_name in globals:  #dictionary so skip
        return {"result":None}
    if isinstance(child_items,unicode): # only one child
        child_items = [child_items]
    if isinstance(item_value,unicode): # single value
        parent_values = [item_value]
    else: parent_values = item_value[:]
    # expand child list with list of alternates
    for child_item in child_items[:]:
        child_items.extend(self.get_alternates(child_item))
    # now loop over the children
    for child_item in child_items:
        if self.optimize:
            if child_item in self.done_children:
                return {"result":None}
            else:
                self.done_children.append(child_item)
                print("Done children %s" % repr(self.done_children))
        if child_item in provisional_items:
            child_values = provisional_items[child_item][:]
        elif child_item in whole_block:
            child_values = whole_block[child_item][:]
        else:  continue
        if isinstance(child_values,unicode):
            child_values = [child_values]
            # print("Checking child %s against %s, values %r/%r" % (child_item,
            #       item_name, child_values, parent_values))
        missing = self.check_parent_child(parent_values,child_values)
        if len(missing)>0:
            return {"result":False,"bad_values":missing,"child":child_item}
    return {"result":True}       #could mean that no child items present

#a generic checker: all child vals should appear in parent_vals
def check_parent_child(self,parent_vals,child_vals):
    # shield ourselves from dots and question marks
    pv = parent_vals[:]
    pv.extend([".","?"])
    res =  [a for a in child_vals if a not in pv]
    #print("Missing: %s" % res)
    return res

def validate_remove_parent_child(self,item_name,whole_block):
    try:
        child_items = self[item_name][self.child_spec]
    except KeyError:
        return {"result":None}
    if isinstance(child_items,unicode): # only one child
        child_items = [child_items]
    for child_item in child_items:
        if child_item in whole_block:
            return {"result":False,"child":child_item}
    return {"result":True}

The DDL2 _item_dependent attribute at first glance appears to be the same as _list_reference, however the dependent item does not have to appear in a loop at all, and neither does the other item name. Perhaps this behaviour was intended to be implied by having looped _names in DDL1 dictionaries, but we can't be sure and so don't implement this yet.

<Validate presence of dependents>= (<-U)
def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}):
    try:
        dep_items = self[item_name][self.dep_spec][:]
    except KeyError:
        return {"result":None}    #not relevant
    if isinstance(dep_items,unicode):
        dep_items = [dep_items]
    actual_names = whole_block.keys()
    actual_names.extend(prov.keys())
    actual_names.extend(globals.keys())
    missing = [a for a in dep_items if a not in actual_names]
    if len(missing) > 0:
        alternates = map(lambda a:[self.get_alternates(a),a],missing)
        # compact way to get a list of alternative items which are
        # present
        have_check = [(filter_present(b[0],actual_names),
                                   b[1]) for b in alternates]
        have_check = list([a for a in have_check if len(a[0])==0])
        if len(have_check) > 0:
            have_check = [a[1] for a in have_check]
            return {"result":False,"bad_items":have_check}
    return {"result":True}

The _list_uniqueness attribute permits specification of a single or multiple items which must have a unique combined value. Currently it is only used in the powder dictionary to indicate that peaks must have a unique index and in the core dictionary to indicate the a publication section name with its label must be unique; however it would appear to implicitly apply to any index-type value in any dictionary. This is used precisely once in the cif_core dictionary in a non-intuitive manner, but we code for this here. The value of the _list_uniqueness attribute can actually refer to another data name, which together with the defined name must be unique.

DDL2 dictionaries do away with separate _list_mandatory and _list_uniqueness attributes, instead using a _category_key. If multiple keys are specified, they must be unique in combination, in accordance with standard relational database behaviour.

<Validate list uniqueness>= (<-U)
def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={},
                                                              globals={}):
    category = self[item_name].get(self.cat_spec)
    if category == None:
        print("No category found for %s" % item_name)
        return {"result":None}
    # print("Category {!r} for item {}".format(category, item_name))
    # we make a copy in the following as we will be removing stuff later!
    unique_i = self[category].get("_category_key.name",[])[:]
    if isinstance(unique_i,unicode):
        unique_i = [unique_i]
    if item_name not in unique_i:       #no need to verify
        return {"result":None}
    if isinstance(item_value,unicode):  #not looped
        return {"result":None}
    # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i))
    # check that we can't optimize by not doing this check
    if self.optimize:
        if unique_i in self.done_keys:
            return {"result":None}
        else:
            self.done_keys.append(unique_i)
    val_list = []
    # get the matching data from any other data items
    unique_i.remove(item_name)
    other_data = []
    if len(unique_i) > 0:            # i.e. do have others to think about
       for other_name in unique_i:
       # we look for the value first in the provisional dict, then the main block
       # the logic being that anything in the provisional dict overrides the
       # main block
           if other_name in provisional_items:
               other_data.append(provisional_items[other_name])
           elif other_name in whole_block:
               other_data.append(whole_block[other_name])
           elif self[other_name].get(self.must_exist_spec)=="implicit":
               other_data.append([item_name]*len(item_value))  #placeholder
           else:
               return {"result":False,"bad_items":other_name}#missing data name
    # ok, so we go through all of our values
    # this works by comparing lists of strings to one other, and
    # so could be fooled if you think that '1.' and '1' are
    # identical
    for i in range(len(item_value)):
        #print("Value no. %d" % i, end=" ")
        this_entry = item_value[i]
        for j in range(len(other_data)):
            this_entry = " ".join([this_entry,other_data[j][i]])
        #print("Looking for {!r} in {!r}: ".format(this_entry, val_list))
        if this_entry in val_list:
            return {"result":False,"bad_values":this_entry}
        val_list.append(this_entry)
    return {"result":True}

<Block-level validation>= (<-U)
<Validate category presence>
<Process DDLm mandatory information>
<Process DDLm prohibited information>

DDL2 introduces a new idea, that of a mandatory category, items of which must be present. We check only this particular fact, and leave the checks for mandatory items within the category, keys etc. to the relevant routines. This would appear to be applicable to dictionaries only.

Also, although the natural meaning for a DDL2 dictionary would be that items from these categories must appear in every definition block, this is not what happens in practice, as category definitions do not have anything from the (mandatory) _item_description category. We therefore adopt the supremely useless meaning that mandatory categories in a dictionary context mean only that somewhere, maybe in only one save frame, an item from this category exists. This interpretation is forced by using the "fake_mand" argument, which then assumes that the alternative routine will be used to set the error information on a dictionary-wide basis.

<Validate category presence>= (<-U)
def validate_mandatory_category(self,whole_block):
    mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"]
    if len(mand_cats) == 0:
        return {"result":True}
    # print("Mandatory categories - {!r}".format(mand_cats)
    # find which categories each of our datanames belongs to
    all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()]
    missing = set(mand_cats) - set(all_cats)
    if len(missing) > 0:
        return {"result":False,"bad_items":repr(missing)}
    return {"result":True}

Processing DDLm mandatory categories/items

DDLm manages mandatory items by providing a table in the DDLm dictionary which classifies datanames into mandatory/recommended/prohibited for dictionary, category or item scopes. Note that the following check might fail for categories and dictionaries if '_definition.scope' or '_dictionary.title' is missing.

<Process DDLm mandatory information>= (<-U)
def check_mandatory_items(self,whole_block,default_scope='Item'):
    """Return an error if any mandatory items are missing"""
    if len(self.scopes_mandatory)== 0: return {"result":True}
    if default_scope == 'Datablock':
        return {"result":True}     #is a data file
    scope = whole_block.get('_definition.scope',default_scope)
    if '_dictionary.title' in whole_block:
       scope = 'Dictionary'
    missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block])
    if len(missing)==0:
        return {"result":True}
    else:
        return {"result":False,"bad_items":missing}

<Process DDLm prohibited information>= (<-U)
def check_prohibited_items(self,whole_block,default_scope='Item'):
    """Return an error if any prohibited items are present"""
    if len(self.scopes_naughty)== 0: return {"result":True}
    if default_scope == 'Datablock':
        return {"result":True}     #is a data file
    scope = whole_block.get('_definition.scope',default_scope)
    if '_dictionary.title' in whole_block:
       scope = 'Dictionary'
    present = list([a for a in self.scopes_naughty[scope] if a in whole_block])
    if len(present)==0:
        return {"result":True}
    else:
        return {"result":False,"bad_items":present}

These validation checks are intended to be called externally. They return a dictionary keyed by item name with value being a list of the results of the check functions. The individual functions return a dictionary which contains at least the key "result", and in case of error relevant keys relating to the error.

<Run validation tests>= (<-U)
def run_item_validation(self,item_name,item_value):
    return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])}

def run_loop_validation(self,loop_names):
    return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])}

def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}):
    results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs])
    return {item_name:results}

def run_block_validation(self,whole_block,block_scope='Item'):
    results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs])
    # fix up the return values
    return {"whole_block":results}

Optimization: the dictionary validation routines normally retain no history of what has been checked, as they are executed on a per-item basis. This leads to duplication of the uniqueness check, when there is more than one key, and duplication of the parent-child check, once for the parent and once for the child. By switching on optimisation, a record is kept and these checks will not be repeated. This is safe only if none of the relevant items is altered while optimisation is on, and optimisation should be switched off as soon as all the checks are finished.

<Optimisation on/off>= (<-U)
def optimize_on(self):
    self.optimize = True
    self.done_keys = []
    self.done_children = []
    self.done_parents = []

def optimize_off(self):
    self.optimize = False
    self.done_keys = []
    self.done_children = []
    self.done_parents = []

Preparing our type expressions

In DDL2 dictionaries our type expressions are given in the main block as POSIX regexps, so we can pass them on to the re package. For DDL1 dictionaries we could get them from the DDL1 language definition, but for now we just hard code them. Essentially only the number definition is important, as the syntax check during reading/writing will catch any char violations.

Note that the python re engine is not POSIX compliant in that it will not return the longest leftmost match, but rather the first leftmost match. John Bollinger suggested an obvious fix: we append a $ to force a full match.

In other regexp editing, the \{ sequence inside the character sets of some of the regexps is actually interpreted as an escaped bracket, so the backslash vanishes. We add it back in by doing a very hackish and ugly substitution which substitues these two characters anywhere that they occur inside square brackets. A final change is to insert a \r wherever we find a \n - it seems that this has been left out. After these changes, and appending on default expressions as well, we can now work with DDL2 expressions directly.

We keep the primitive code for the single reason that we need to know when we are dealing with a number that has an esd appended, and this is flagged by the primitive code being of type 'numb'.

<Add type information>= (<-U)
def add_type_info(self):
    if "_item_type_list.construct" in self.master_block:
        types = self.master_block["_item_type_list.code"]
        prim_types = self.master_block["_item_type_list.primitive_code"]
        constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]])
        # add in \r wherever we see \n, and change \{ to \\{
        def regex_fiddle(mm_regex):
            brack_match = r"((.*\[.+)(\\{)(.*\].*))"
            ret_match = r"((.*\[.+)(\\n)(.*\].*))"
            fixed_regexp = mm_regex[:]  #copy
            # fix the brackets
            bm = re.match(brack_match,mm_regex)
            if bm != None:
                fixed_regexp = bm.expand(r"\2\\\\{\4")
            # fix missing \r
            rm = re.match(ret_match,fixed_regexp)
            if rm != None:
                fixed_regexp = rm.expand(r"\2\3\\r\4")
            #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp))
            return fixed_regexp
        constructs = map(regex_fiddle,constructs)
        for typecode,construct in zip(types,constructs):
            self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL)
        # now make a primitive <-> type construct mapping
        for typecode,primtype in zip(types,prim_types):
            self.primdic[typecode] = primtype

Linkage to dREL

The drel_ast_yacc package will generate an Abstract Syntax Tree, which we then convert to a Python function using py_from_ast.make_function. We use it during initialisation to transform all methods to python expressions, and then the derive_item method will use this to try to derive the expression. Note that newline is the only recognised statement separator in dREL, so we make sure all lines are separated in this way. We also allow multiple 'Evaluation' methods, which is an enhancement of the current standard.

The make_function function requires dictionary information to be supplied regarding looped categories and keys.

If we were really serious about dictionary-driven software, the attribute lookups that follow would not use get(), but square brackets and allow default values to be returned. However, that would require assigning a dictionary to the dictionary and consequent automated searches which I cannot be bothered to do at this stage. Just be aware that the default value in the get() statement is the _enumeration.default specified in ddl.dic.

<Linkage to dREL>= (<-U)
<Initialise dREL functions>
<Transform drel to python>
<Store dREL functions>
<Derive item information>
<Storing a dREL-derived value>
<Construct a category>
<Insert category items from pullback information>
<Storing a whole new dREL-derived category>
<Generating default packets>

Full initialisation. This can take some time so we optionally skip it, but can call this function separately at a later stage if needed.

<Initialise dREL functions>= (<-U)
def initialise_drel(self):
    """Parse drel functions and prepare data structures in dictionary"""
    self.ddlm_parse_valid() #extract validity information from data block
    self.transform_drel()   #parse the drel functions
    self.add_drel_funcs()   #put the drel functions into the namespace

<Transform drel to python>= (<-U)
def transform_drel(self):
    from .drel import drel_ast_yacc
    from .drel import py_from_ast
    import traceback
    parser = drel_ast_yacc.parser
    lexer = drel_ast_yacc.lexer
    my_namespace = self.keys()
    my_namespace = dict(zip(my_namespace,my_namespace))
    # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...})
    loopable_cats = self.get_loopable_cats()
    loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
    loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
    cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
    loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
    # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")]
    derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \
                          and self[a].get("_name.category_id","")!= "function"]
    for derivable in derivable_list:
        target_id = derivable
        # reset the list of visible names for parser
        special_ids = [dict(zip(self.keys(),self.keys()))]
        print("Target id: %s" % derivable)
        drel_exprs = self[derivable]["_method.expression"]
        drel_purposes = self[derivable]["_method.purpose"]
        all_methods = []
        if not isinstance(drel_exprs,list):
            drel_exprs = [drel_exprs]
            drel_purposes = [drel_purposes]
        for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs):
            if drel_purpose != 'Evaluation':
                continue
            drel_expr = "\n".join(drel_expr.splitlines())
            # print("Transforming %s" % drel_expr)
            # List categories are treated differently...
            try:
                meth_ast = parser.parse(drel_expr+"\n",lexer=lexer)
            except:
                print('Syntax error in method for %s; leaving as is' % derivable)
                a,b = sys.exc_info()[:2]
                print((repr(a),repr(b)))
                print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout))
                # reset the lexer
                lexer.begin('INITIAL')
                continue
            # Construct the python method
            cat_meth = False
            if self[derivable].get('_definition.scope','Item') == 'Category':
                cat_meth = True
            pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id,
                                                                       loopable=loop_info,
                                                         cif_dic = self,cat_meth=cat_meth)
            all_methods.append(pyth_meth)
        if len(all_methods)>0:
            save_overwrite = self[derivable].overwrite
            self[derivable].overwrite = True
            self[derivable]["_method.py_expression"] = all_methods
            self[derivable].overwrite = save_overwrite
        #print("Final result:\n " + repr(self[derivable]["_method.py_expression"]))

Drel functions are all stored in category 'functions' in our final dictionary. We want to convert them to executable python code and store them in an appropriate namespace which we can then pass to our individual item methods. As dREL accepts only linefeed as a terminator, we convert the input text as required.

<Store dREL functions>= (<-U)
def add_drel_funcs(self):
    from .drel import drel_ast_yacc
    from .drel import py_from_ast
    funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function']
    funcnames = [(self[a]["_name.object_id"],
                  getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist]
    # create executable python code...
    parser = drel_ast_yacc.parser
    # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...})
    loopable_cats = self.get_loopable_cats()
    loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats]
    loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys]
    cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats]
    loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names)))
    for funcname,funcbody in funcnames:
        newline_body = "\n".join(funcbody.splitlines())
        parser.target_id = funcname
        res_ast = parser.parse(newline_body)
        py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self)
        #print('dREL library function ->\n' + py_function)
        global_table = globals()
        exec(py_function, global_table)    #add to namespace
    #print('Globals after dREL functions added:' + repr(globals()))
    self.ddlm_functions = globals()  #for outside access

When a dictionary is available during CIF file access, we can resolve a missing dataname in four ways: (1) check if it is defined under an alias; (2) use a dREL method to calculate the value; (3) use default values if defined. We resolve in this priority. Note that we also convert to the appropriate type. A subsection of (2) is that, if the entire category is missing, we can either use DDLm category construction information or a category method to find our values; we only do this if no items in the category are present. We raise a StarDerivationError if we cannot derive the item, and internally we set result to None as we go through the various ways of deriving the item.

The store_value flag asks us to update the ciffile object with the new value. We remove any numpy dependencies before doing this, which means that we must recreate the numpy type when returning it.

The allow_defaults flag allows default values to be derived. In a situation where multiple methods are available for deriving an item, a calculation that accepts default values will return incorrect values in any situation where an alternative calculation method would have given correct values. For example, if the default value of axis.vector[n] is 0, but I can use an alternative derivation for axis.vector from a different setting, then a calculation that creates axis.vector from the components will give the wrong answer as it will fill in default values when the components are missing. The track_recursion decorator code handles this by propagating the initial value of allow_defaults to nested calls.

<Derive item information>= (<-U)
@track_recursion
def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True):
    key = start_key   #starting value
    result = None     #success is a non-None value
    default_result = False #we have not used a default value
    <Resolve using aliases>
    the_category = self[key]["_name.category_id"]
    cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
    has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
    # store any default value in case we have a problem
    def_val = self[key].get("_enumeration.default","")
    def_index_val = self[key].get("_enumeration.def_index_id","")
    if len(has_cat_names)==0: # try category method
        <Populate a category>
    # Recalculate in case it actually worked
    has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)]
    the_funcs = self[key].get('_method.py_expression',"")
    if the_funcs:   #attempt to calculate it
        <Execute pythonised dREL method>
    if result is None and allow_defaults:   # try defaults
        <Work out default value of dataname>
    # read it in
    if result is None:   #can't do anything else
        print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults)))
        raise StarFile.StarDerivationError(start_key)
    <Adjust value to be appropriate length>
    # now try to insert the new information into the right place
    # find if items of this category already appear...
    # Never cache empty values
    if not (isinstance(result,list) and len(result)==0) and\
      store_value:
        if self[key].get("_definition.scope","Item")=='Item':
            if is_looped:
                result = self.store_new_looped_value(key,cifdata,result,default_result)
            else:
                result = self.store_new_unlooped_value(key,cifdata,result)
        else:
            self.store_new_cat_values(cifdata,result,the_category)
    return result

Adjusting our calculated value. If we have used a default value or we have None, we need to make the dimension match the currently-existing length of the category.

<Adjust value to be appropriate length>= (<-U)
is_looped = False
if self[the_category].get('_definition.class','Set')=='Loop':
    is_looped = True
    if len(has_cat_names)>0:   #this category already exists
        if result is None or default_result: #need to create a list of values
            loop_len = len(cifdata[has_cat_names[0]])
            out_result = [result]*loop_len
            result = out_result
    else:   #nothing exists in this category, we can't store this at all
        print('Resetting result %s for %s to null list as category is empty' % (key,result))
        result = []

Storing a dREL-derived value back into our CifFile. The dREL value (or potentially a simple default value) may correspond to an entire column, or even an entire loop for category methods. We have to distinguish between list values that are StarLists, that is, a single CIF value, and list values that correspond to a column of a loop. Additionally, testing has revealed that we cannot judge the type of elements in a list by the first element (e.g. could be a plain list, then a numpy array).

The conv_from_numpy mini-functions are designed to handle arbitrary numpy arrays quickly.

<Storing a dREL-derived value>= (<-U)
def store_new_looped_value(self,key,cifdata,result,default_result):
      """Store a looped value from the dREL system into a CifFile"""
      # try to change any matrices etc. to lists
      the_category = self[key]["_name.category_id"]
      out_result = result
      if result is not None and not default_result:
              # find any numpy arrays
              def conv_from_numpy(one_elem):
                  if not hasattr(one_elem,'dtype'):
                     if isinstance(one_elem,(list,tuple)):
                        return StarFile.StarList([conv_from_numpy(a) for a in one_elem])
                     return one_elem
                  if one_elem.size > 1:   #so is not a float
                     return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()])
                  else:
                      try:
                        return one_elem.item(0)
                      except:
                        return one_elem
              out_result = [conv_from_numpy(a) for a in result]
      # so out_result now contains a value suitable for storage
      cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category]
      has_cat_names = [a for a in cat_names if a in cifdata]
      print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names))
      if len(has_cat_names)>0:   #this category already exists
          cifdata[key] = out_result      #lengths must match or else!!
          cifdata.AddLoopName(has_cat_names[0],key)
      else:
          cifdata[key] = out_result
          cifdata.CreateLoop([key])
      print('Loop info:' + repr(cifdata.loops))
      return out_result

def store_new_unlooped_value(self,key,cifdata,result):
      """Store a single value from the dREL system"""
      if result is not None and hasattr(result,'dtype'):
          if result.size > 1:
              out_result = StarFile.StarList(result.tolist())
              cifdata[key] = out_result
          else:
              cifdata[key] = result.item(0)
      else:
          cifdata[key] = result
      return result

Storing category results. dREL allows 'category methods', which initialise an entire category. The dREL system that we have written returns a dictionary of lists, with the dictionary keys being item names. It is sufficient for us to extract each of these names and pass them to our normal storage routine. If some of the values in the category key are duplicated, we bail, as we may overwrite previous values. We also bail if we do not have exactly the same datanames available, as we are too lazy to insert 'unknown' in the non-matching positions.

<Storing a whole new dREL-derived category>= (<-U)
def store_new_cat_values(self,cifdata,result,the_category):
    """Store the values in [[result]] into [[cifdata]]"""
    the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key']
    double_names = [a for a in result.keys() if a in cifdata]
    if len(double_names)>0:
        already_present = [a for a in self.names_in_cat(the_category) if a in cifdata]
        if set(already_present) != set(result.keys()):
            print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys()))))
            return
        #check key values
        old_keys = set(cifdata[the_key])
        common_keys = old_keys & set(result[the_key])
        if len(common_keys)>0:
            print("Category %s not updated, key values in common:" % (common_keys))
            return
        #extend result values with old values
        for one_name,one_value in result.items():
            result[one_name].extend(cifdata[one_name])
    for one_name, one_value in result.items():
        try:
            self.store_new_looped_value(one_name,cifdata,one_value,False)
        except StarFile.StarError:
            print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value)))
    #put the key as the first item
    print('Fixing item order for {}'.format(repr(the_key)))
    for one_key in the_key:  #should only be one
        cifdata.ChangeItemOrder(one_key,0)


Executing a dREL method. The execution defines a function, 'pyfunc' which is then itself executed in global scope. This has caused us some grief in order to get the bindings right (e.g. having StarList in scope). Essentially, anything that the method might refer to should be in scope at this point, otherwise the way Python works it will be too late to have things in scope within the enclosing routine that calls this function. Importing the necessary modules at the beginning of the module file (as done here) seems to be a reliable way to go.

<Execute pythonised dREL method>= (<-U)
#global_table = globals()
#global_table.update(self.ddlm_functions)
for one_func in the_funcs:
    print('Executing function for %s:' % key)
    #print(one_func)
    exec(one_func, globals())  #will access dREL functions, puts "pyfunc" in scope
    # print('in following global environment: ' + repr(global_table))
    stored_setting = cifdata.provide_value
    cifdata.provide_value = True
    try:
        result = pyfunc(cifdata)
    except CifRecursionError as s:
        print(s)
        result = None
    except StarFile.StarDerivationError as s:
        print(s)
        result = None
    finally:
        cifdata.provide_value = stored_setting
    if result is not None:
        break
    #print("Function returned {!r}".format(result))

Creating categories

A category can be created from scratch (i.e. the identifiers produced) if the appropriate DDLm attributes are defined - currently, experimental attributes 'category_construct_local' are included in the test dictionaries for this purpose. They define two types of 'pullback' (see any category theory textbook), which we can use to create a category. If these attributes are absent, we can instead execute a category method. We only add any new category items calculated in this way if the category does not exist or (i) the category IDs are not already present and (ii) the set of attributes calculated is an exact match for the set of datanames already present.

<Populate a category>= (<-U)
cat_result = {}
pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]]
pulled_from_cats = [(k,[
                      self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']]
                   ) for k in pulled_from_cats]
pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]]
if '_category_construct_local.type' in self[the_category]:
    print("**Now constructing category %s using DDLm attributes**" % the_category)
    try:
        cat_result = self.construct_category(the_category,cifdata,store_value=True)
    except (CifRecursionError,StarFile.StarDerivationError):
        print('** Failed to construct category %s (error)' % the_category)
# Trying a pull-back when the category is partially populated
# will not work, hence we test that cat_result has no keys
if len(pulled_to_cats)>0 and len(cat_result)==0:
    print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats)))
    try:
        cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True)
    except (CifRecursionError,StarFile.StarDerivationError):
        print('** Failed to construct category %s from pullback information (error)' % the_category)
if '_method.py_expression' in self[the_category] and key not in cat_result:
    print("**Now applying category method for %s in search of %s**" % (the_category,key))
    cat_result = self.derive_item(the_category,cifdata,store_value=True)
print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result))
# do we now have our value?
if key in cat_result:
    return cat_result[key]

Constructing categories using DDLm attributes. We have defined local attributes that describe category construction using mathematical 'pullbacks'. We can use these to fill a category, but also to populate a category if the pullback category is available. We use list to coerce all values to a list in case we are passed a numpy array, which does not have an 'index' method.

<Construct a category>= (<-U)
def construct_category(self,category,cifdata,store_value=True):
    """Construct a category using DDLm attributes"""
    con_type = self[category].get('_category_construct_local.type',None)
    if con_type == None:
        return {}
    if con_type == 'Pullback' or con_type == 'Filter':
        morphisms  = self[category]['_category_construct_local.components']
        morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat
        cats = [self[a]['_name.category_id'] for a in morphisms]
        cat_keys = [self[a]['_category.key_id'] for a in cats]
        cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat
        if con_type == 'Filter':
            int_filter = self[category].get('_category_construct_local.integer_filter',None)
            text_filter = self[category].get('_category_construct_local.text_filter',None)
            if int_filter is not None:
                morph_values.append([int(a) for a in int_filter])
            if text_filter is not None:
                morph_values.append(text_filter)
            cat_values.append(range(len(morph_values[-1])))
        # create the mathematical product filtered by equality of dataname values
        pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \
                        if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]]
        # now prepare for return
        if len(pullback_ids)==0:
            return {}
        newids = self[category]['_category_construct_local.new_ids']
        fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids]
        if con_type == 'Pullback':
            final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]}
            final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
            final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids))
        elif con_type == 'Filter':   #simple filter
            final_results = {fullnewids[0]:[x[0] for x in pullback_ids]}
            final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids))
        if store_value:
            self.store_new_cat_values(cifdata,final_results,category)
        return final_results

Going the other way. If we have the pulled-back category, we can populate the pulled-from categories with their identifier items using projections from the pulled-back category. In the special case that we have a pullback that uses a filter function with a single element, we can automatically populate the whole commutative square. We also by default populate identically-named datanames.

The projection datanames are given in _category_construct_local.new_ids, and they always map to the key of the projected-to category.

<Insert category items from pullback information>= (<-U)
def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True):
    """Each of the categories in source_categories are pullbacks that include
    the target_category"""
    target_key = self[target_category]['_category.key_id']
    result = {target_key:[]}
    first_time = True
    # for each source category, determine which element goes to the target
    for sc in source_categories:
        components = self[sc]['_category_construct_local.components']
        comp_cats = [self[c]['_name.category_id'] for c in components]
        new_ids = self[sc]['_category_construct_local.new_ids']
        source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids]
        if len(components) == 2:  # not a filter
            element_pos = comp_cats.index(target_category)
            old_id = source_ids[element_pos]
            print('Using %s to populate %s' % (old_id,target_key))
            result[target_key].extend(cifdata[old_id])
            # project through all identical names
            extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key])
            # we only include keys that are common to all categories
            if first_time:
                result.update(extra_result)
            else:
                for k in extra_result.keys():
                    if k in result:
                        print('Updating %s: was %s' % (k,repr(result[k])))
                        result[k].extend(extra_result[k])
        else:
            extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids)
            if len(extra_result)>0 or source_ids[0] in cifdata:  #something is present
                result[target_key].extend(cifdata[source_ids[0]])
                for k in extra_result.keys():
                    if k in result:
                        print('Reverse filter: Updating %s: was %s' % (k,repr(result[k])))
                        result[k].extend(extra_result[k])
                    else:
                        result[k]=extra_result[k]
# Bonus derivation if there is a singleton filter
                if self[sc]['_category_construct_local.type'] == 'Filter':
                    int_filter = self[sc].get('_category_construct_local.integer_filter',None)
                    text_filter = self[sc].get('_category_construct_local.text_filter',None)
                    if int_filter is not None:
                        filter_values = int_filter
                    else:
                        filter_values = text_filter
                    if len(filter_values)==1:    #a singleton
                        extra_dataname = self[sc]['_category_construct_local.components'][0]
                        if int_filter is not None:
                            new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]])
                        else:
                            new_value = filter_values * len(cifdata[source_ids[0]])
                        if extra_dataname not in result:
                            result[extra_dataname] = new_value
                        else:
                            result[extra_dataname].extend(new_value)
                else:
                    raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type'])
        first_time = False
    # check for sanity - all dataname lengths must be identical
    datalen = len(set([len(a) for a in result.values()]))
    if datalen != 1:
        raise AssertionError('Failed to construct equal-length category items,'+ repr(result))
    if store_value:
        print('Now storing ' + repr(result))
        self.store_new_cat_values(cifdata,result,target_category)
    return result

def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]):
    """Copy across datanames for which the from_category key equals [[key_vals]]"""
    result = {}
    s_names_in_cat = set(self.names_in_cat(from_category,names_only=True))
    t_names_in_cat = set(self.names_in_cat(to_category,names_only=True))
    can_project = s_names_in_cat & t_names_in_cat
    can_project -= set(skip_names)  #already dealt with
    source_key = self[from_category]['_category.key_id']
    print('Source dataname set: ' + repr(s_names_in_cat))
    print('Target dataname set: ' + repr(t_names_in_cat))
    print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project))
    for project_name in can_project:
        full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0]
        full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0]
        if key_vals is None:
            try:
                result[full_to_name] = cifdata[full_from_name]
            except StarFile.StarDerivationError:
                pass
        else:
            all_key_vals = cifdata[source_key]
            filter_pos = [all_key_vals.index(a) for a in key_vals]
            try:
                all_data_vals = cifdata[full_from_name]
            except StarFile.StarDerivationError:
                pass
            result[full_to_name] = [all_data_vals[i] for i in filter_pos]
    return result

Aliases. If we have this item under a different name, find it and return it immediately after putting it into the correct type. We could be passed either the dictionary defined dataname, or any of its previous names. We have stored our aliases as a table indexed by dictionary-defined dataname in order to potentially translate from old to new datanames. Once we find a dataname that is present in the datafile, we return it. Note that we have two types of check: in one we are given an old-style dataname, and have to find the new or other old version (in which case we have to check the key of the table) and in the other check we are given the latest version of the dataname and have to check for older names in the datafile - this latter is the dREL situation so we have optimised for it be checking that first and making the modern datanames the table keys. Note that this section of code occurs first in the 'derive_item' routine and will change the value of 'key' to the dictionary value even if nothing is available in the datafile, thereby enabling the other derivation routes possible.

<Resolve using aliases>= (<-U)
# check for aliases
# check for an older form of a new value
found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata]
if len(found_it)>0:
    corrected_type = self.change_type(key,cifdata[found_it[0]])
    return corrected_type
# now do the reverse check - any alternative form
alias_name = [a for a in self.alias_table.items() if key in a[1]]
print('Aliases for %s: %s' % (key,repr(alias_name)))
if len(alias_name)==1:
    key = alias_name[0][0]   #actual definition name
    if key in cifdata: return self.change_type(key,cifdata[key])
    found_it = [k for k in alias_name[0][1] if k in cifdata]
    if len(found_it)>0:
        return self.change_type(key,cifdata[found_it[0]])
elif len(alias_name)>1:
    raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name))

Using the defaults system. We also check out any default values which we could return in case of error. Note that ddlm adds the '_enumerations.def_index_id' as an alternative way to derive a value from a table. During development, we deliberately allow errors arising from the method to be propagated so that we can see anything that might be wrong.

If we are using default values, we need to fill in the whole column of a looped category. This is taken care of at the end of the derivation function, so we simply set a flag to say that this is necessary.

<Work out default value of dataname>= (<-U)
if def_val:
    result = self.change_type(key,def_val)
    default_result = True
elif def_index_val:            #derive a default value
    index_vals = self[key]["_enumeration_default.index"]
    val_to_index = cifdata[def_index_val]     #what we are keying on
    if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']:
        lcase_comp = True
        index_vals = [a.lower() for a in index_vals]
    # Handle loops
    if isinstance(val_to_index,list):
        if lcase_comp:
            val_to_index = [a.lower() for a in val_to_index]
        keypos = [index_vals.index(a) for a in val_to_index]
        result = [self[key]["_enumeration_default.value"][a]  for a in keypos]
    else:
        if lcase_comp:
            val_to_index = val_to_index.lower()
        keypos = index_vals.index(val_to_index)   #value error if no such value available
        result = self[key]["_enumeration_default.value"][keypos]
        default_result = True   #flag that it must be extended
    result = self.change_type(key,result)
    print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index)))

If a key is missing, we may sometimes fill in default values for it, for example, a missing atom type may be assumed to have a number in cell of 0.

<Generating default packets>= (<-U)
def generate_default_packet(self,catname,catkey,keyvalue):
    """Return a StarPacket with items from ``catname`` and a key value
    of ``keyvalue``"""
    newpack = StarPacket()
    for na in self.names_in_cat(catname):
        def_val = self[na].get("_enumeration.default","")
        if def_val:
            final_val = self.change_type(na,def_val)
            newpack.extend(final_val)
            setattr(newpack,na,final_val)
    if len(newpack)>0:
        newpack.extend(keyvalue)
        setattr(newpack,catkey,keyvalue)
    return newpack

In the single case of executing dREL methods, we wish to return numpy Arrays from our __getitem__ so that the mathematical operations proceed as expected for matrix etc. objects. This needs to be reimplimented: currently numpy must be installed for 'numerification' to work.

<Switch on numpy arrays>= (<-U)
def switch_numpy(self,to_val):
    pass

This function converts the string-valued items returned from the parser into types that correspond to the dictionary specifications. For DDLm it must also deal with potentially complex structures containing both strings and numbers. We have tried to avoid introducing a dependence on Numpy in general for PyCIFRW, but once we get into the realm of DDLm we require Numpy arrays in order to handle the various processing tasks. This routine is the one that will create the arrays from the StarList types, so needs access to numpy. However, this routine is only called if a DDLm dictionary has been provided, so we should still have no Numpy dependence for non DDLm cases

For safety, we check that our object is really string-valued. In practice, this means that it is either a string, a list of strings, or a list of StarLists as these are the only datastructures that an as-parsed file will contain.

<Convert string to appropriate type>= (<-U)
def change_type(self,itemname,inval):
    if inval == "?": return inval
    change_function = convert_type(self[itemname])
    if isinstance(inval,list) and not isinstance(inval,StarFile.StarList):   #from a loop
        newval = list([change_function(a) for a in inval])
    else:
        newval = change_function(inval)
    return newval

We may be passed float values which have esds appended. We catch this case by searching for an opening round bracket

<Convert value to float, ignore esd>= (U->)
def float_with_esd(inval):
    if isinstance(inval,unicode):
        j = inval.find("(")
        if j>=0:  return float(inval[:j])
    return float(inval)



This function analyses a DDL1-type range expression, returning a maximum and minimum value. If the number format were ever to change, we need to change this right here, right now.

<Analyse range>= (<-U)
def getmaxmin(self,rangeexp):
    regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*'
    regexp = regexp + ":" + regexp
    regexp = re.match(regexp,rangeexp)
    try:
        minimum = regexp.group(1)
        maximum = regexp.group(7)
    except AttributeError:
        print("Can't match %s" % rangeexp)
    if minimum == None: minimum = "."
    else: minimum = float(minimum)
    if maximum == None: maximum = "."
    else: maximum = float(maximum)
    return maximum,minimum

Outputting dictionaries

We would like dictionary blocks to be output in a readable order, that is, parent categories before their child definitions. The base BlockCollection output routines have no knowledge of save frame interrelations, so we have to override the output block order returned by the get_child_list routine.

<Dictionary output routines>= (<-U)
def WriteOut(self,**kwargs):
    myblockorder = self.get_full_child_list()
    self.set_grammar(self.grammar)
    self.standard = 'Dic'
    return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs)

def get_full_child_list(self):
    """Return a list of definition blocks in order parent-child-child-child-parent-child..."""
    top_block = self.get_roots()[0][0]
    root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head']
    if len(root_cat) == 1:
        all_names = [top_block] + self.recurse_child_list(root_cat[0])
        unrooted = self.ddlm_danglers()
        double_names =  set(unrooted).intersection(set(all_names))
        if len(double_names)>0:
            raise CifError('Names are children of internal and external categories:%s' % repr(double_names))
        remaining = unrooted[:]
        for no_root in unrooted:
            if self[no_root].get('_definition.scope','Item')=='Category':
                all_names += [no_root]
                remaining.remove(no_root)
                these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()]
                all_names += these_children
                [remaining.remove(n) for n in these_children]
        # now sort by category
        ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining])
        for e in ext_cats:
            cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e]
            [remaining.remove(n) for n in cat_items]
            all_names += cat_items
        if len(remaining)>0:
            print('WARNING: following items do not seem to belong to a category??')
            print(repr(remaining))
            all_names += remaining
        print('Final block order: ' + repr(all_names))
        return all_names
    raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead')

def cat_from_name(self,one_name):
    """Guess the category from the name. This should be used only when this is not important semantic information,
    for example, when printing out"""
    (cat,obj) = one_name.split(".")
    if cat[0] == "_": cat = cat[1:]
    return cat

def recurse_child_list(self,parentname):
    """Recursively expand the logical child list of [[parentname]]"""
    final_list = [parentname]
    child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()]
    child_blocks.sort()    #we love alphabetical order
    child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item']
    final_list += child_items
    child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category']
    for child_cat in child_cats:
        final_list += self.recurse_child_list(child_cat)
    return final_list



Valid CIFS

A whole new can of worms is opened up when we require that a CIF is not only syntactically correct, but valid according to the specified dictionary.

A valid CIF is essentially a collection of valid CIF blocks. It may be the case in the future that inter-block relationships need to be checked, so we define a separate ValidCifFile class.

<A valid CIF block>= (<-U)
class ValidCifBlock(CifBlock):
    """A `CifBlock` that is valid with respect to a given CIF dictionary.  Methods
    of `CifBlock` are overridden where necessary to disallow addition of invalid items to the
    `CifBlock`.

    ## Initialisation

    * `dic` is a `CifDic` object to be used for validation.

    """
    <Initialise with dictionary>
    <Run data checks>
    <Check input data>
    <Redefine item adding and removing>
    <Validation report>

The dic argument contains a previously initialised dictionary. We can alternatively provide a list of filenames/CifFiles which are merged according to mergemode. Both cannot be provided.

<Initialise with dictionary>= (<-U)
def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords):
    CifBlock.__init__(self,*args,**kwords)
    if dic and diclist:
        print("Warning: diclist argument ignored when initialising ValidCifBlock")
    if isinstance(dic,CifDic):
        self.fulldic = dic
    else:
        raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument")
    if len(diclist)==0 and not dic:
        raise ValidCifError( "At least one dictionary must be specified")
    if diclist and not dic:
        self.fulldic = merge_dic(diclist,mergemode)
    if not self.run_data_checks()[0]:
        raise ValidCifError( self.report())

Run all of these data checks. The dictionary validation methods return a list of tuples (validation function name, result) for each item. When checking a full data block, we can make use of the optimisation facilities provided in the CifDic object.

<Run data checks>= (<-U)
def run_data_checks(self,verbose=False):
    self.v_result = {}
    self.fulldic.optimize_on()
    for dataname in self.keys():
        update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname]))
        update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self))
    for loop_names in self.loops.values():
        update_value(self.v_result,self.fulldic.run_loop_validation(loop_names))
    # now run block-level checks
    update_value(self.v_result,self.fulldic.run_block_validation(self))
    # return false and list of baddies if anything didn't match
    self.fulldic.optimize_off()
    all_keys = list(self.v_result.keys()) #dictionary will change
    for test_key in all_keys:
        #print("%s: %r" % (test_key, self.v_result[test_key]))
        self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False]
        if len(self.v_result[test_key]) == 0:
            del self.v_result[test_key]
    isvalid = len(self.v_result)==0
    #if not isvalid:
    #    print("Baddies: {!r}".format(self.v_result))
    return isvalid,self.v_result

Report back. We summarize the contents of v_result. This routine is probably broken.

<Validation report>= (<-U)
def report(self):
   outstr = StringIO()
   outstr.write( "Validation results\n")
   outstr.write( "------------------\n")
   print("%d invalid items found\n" % len(self.v_result))
   for item_name,val_func_list in self.v_result.items():
       outstr.write("%s fails following tests:\n" % item_name)
       for val_func in val_func_list:
           outstr.write("\t%s\n")
   return outstr.getvalue()

It is not a mistake for a data name to be absent from any of the specified dictionaries, so we have to check that we have a match before running any data checks, rather than simply raising an error immediately.

<Check input data>= (<-U)
def single_item_check(self,item_name,item_value):
    #self.match_single_item(item_name)
    if item_name not in self.fulldic:
        result = {item_name:[]}
    else:
        result = self.fulldic.run_item_validation(item_name,item_value)
    baddies = list([a for a in result[item_name] if a[1]["result"]==False])
    # if even one false one is found, this should trigger
    isvalid = (len(baddies) == 0)
    # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    return isvalid,baddies

def loop_item_check(self,loop_names):
    in_dic_names = list([a for a in loop_names if a in self.fulldic])
    if len(in_dic_names)==0:
        result = {loop_names[0]:[]}
    else:
        result = self.fulldic.run_loop_validation(in_dic_names)
    baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False])
    # if even one false one is found, this should trigger
    isvalid = (len(baddies) == 0)
    # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies))
    return isvalid,baddies

def global_item_check(self,item_name,item_value,provisional_items={}):
    if item_name not in self.fulldic:
        result = {item_name:[]}
    else:
        result = self.fulldic.run_global_validation(item_name,
           item_value,self,provisional_items = provisional_items)
    baddies = list([a for a in result[item_name] if a[1]["result"] is False])
    # if even one false one is found, this should trigger
    isvalid = (len(baddies) == 0)
    # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    return isvalid,baddies

def remove_global_item_check(self,item_name):
    if item_name not in self.fulldic:
        result = {item_name:[]}
    else:
        result = self.fulldic.run_remove_global_validation(item_name,self,False)
    baddies = list([a for a in result[item_name] if a[1]["result"]==False])
    # if even one false one is found, this should trigger
    isvalid = (len(baddies) == 0)
    # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies))
    return isvalid,baddies

We need to override the base class methods here to prevent addition of an item that would render an object invalid.

<Redefine item adding and removing>= (<-U)
<Add to looped data with validity checks>
<Add straight data>

<Add straight data>= (<-U)
def AddCifItem(self,data):
    if isinstance(data[0],(unicode,str)):   # single item
        valid,problems = self.single_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
        valid,problems = self.global_item_check(data[0],data[1])
        self.report_if_invalid(valid,problems,data[0])
    elif isinstance(data[0],tuple) or isinstance(data[0],list):
        paired_data = list(zip(data[0],data[1]))
        for name,value in paired_data:
            valid,problems = self.single_item_check(name,value)
            self.report_if_invalid(valid,problems,name)
        valid,problems = self.loop_item_check(data[0])
        self.report_if_invalid(valid,problems,data[0])
        prov_dict = {}            # for storing temporary items
        for name,value in paired_data: prov_dict[name]=value
        for name,value in paired_data:
            del prov_dict[name]   # remove temporarily
            valid,problems = self.global_item_check(name,value,prov_dict)
            prov_dict[name] = value  # add back in
            self.report_if_invalid(valid,problems,name)
    else:
        raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item")
    super(ValidCifBlock,self).AddCifItem(data)

def AddItem(self,key,value,**kwargs):
    """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary"""
    valid,problems = self.single_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    valid,problems = self.global_item_check(key,value)
    self.report_if_invalid(valid,problems,key)
    super(ValidCifBlock,self).AddItem(key,value,**kwargs)

# utility function
def report_if_invalid(self,valid,bad_list,data_name):
    if not valid:
        bad_tests = [a[0] for a in bad_list]
        error_string = ",".join(bad_tests)
        error_string = repr(data_name) + " fails following validity checks: "  + error_string
        raise ValidCifError( error_string)

def __delitem__(self,key):
    # we don't need to run single item checks; we do need to run loop and
    # global checks.
    if key in self:
        try:
            loop_items = self.GetLoop(key)
        except TypeError:
            loop_items = []
        if loop_items:             #need to check loop conformance
            loop_names = [a[0] for a in loop_items if a[0] != key]
            valid,problems = self.loop_item_check(loop_names)
            self.report_if_invalid(valid,problems)
        valid,problems = self.remove_global_item_check(key)
        self.report_if_invalid(valid,problems)
    self.RemoveCifItem(key)

Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in data, which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in AddCifItem, when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance.

<Add to looped data with validity checks>= (<-U)
def AddToLoop(self,dataname,loopdata):
    # single item checks
    paired_data = loopdata.items()
    for name,value in paired_data:
        valid,problems = self.single_item_check(name,value)
        self.report_if_invalid(valid,problems)
    # loop item checks; merge with current loop
    found = 0
    for aloop in self.block["loops"]:
        if dataname in aloop:
            loopnames = aloop.keys()
            for new_name in loopdata.keys():
                if new_name not in loopnames: loopnames.append(new_name)
            valid,problems = self.looped_item_check(loopnames)
            self.report_if_invalid(valid,problems)
    prov_dict = loopdata.copy()
    for name,value in paired_data:
        del prov_dict[name]   # remove temporarily
        valid,problems = self.global_item_check(name,value,prov_dict)
        prov_dict[name] = value  # add back in
        self.report_if_invalid(valid,problems)
    CifBlock.AddToLoop(self,dataname,loopdata)

Note that a dictionary must be specified in order to create a valid Cif file. This dictionary is then passed to any blocks. If they were already ValidCifBlocks, they will be reinitialised. Note that, as reading a dictionary takes time, we do it immediately to save doing it later.

As a convenience, we handle lists of filenames/CifFiles which are supposed to be dictionaries, and pass them directly to the ValidCifBlock object which will merge as necessary.

Note that we have to set bigdic before calling __init__. The various calls down through the inheritance hierarchy end up calling ValidCifBlock with self.bigdic as one of the arguments. Also, this __init__ procedure could be called from within StarFile.__init__ if given a filename to read from, so we allow that bigdic might already have been set - and check for its existence before setting it again!

<A valid CIF file>= (<-U)
class ValidCifFile(CifFile):
    """A CIF file for which all datablocks are valid.  Argument `dic` to
    initialisation specifies a `CifDic` object to use for validation."""
    <Initialise valid CIF>
    <Redefine add new block>

<Initialise valid CIF>= (<-U)
def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs):
    if not diclist and not dic and not hasattr(self,'bigdic'):
        raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object")
    if not dic and diclist:     #merge here for speed
        self.bigdic = merge_dic(diclist,mergemode)
    elif dic and not diclist:
        self.bigdic = dic
    CifFile.__init__(self,*args,**kwargs)
    for blockname in self.keys():
        self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic)

Whenever a new block is added, we have to additionally update our match array and perform a validation run. This definition shadows the definition in the parent class.

<Redefine add new block>= (<-U)
def NewBlock(self,blockname,blockcontents,**kwargs):
    CifFile.NewBlock(self,blockname,blockcontents,**kwargs)
    # dictionary[blockname] is now a CifBlock object.  We
    # turn it into a ValidCifBlock object
    self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic,
                                     data=self.dictionary[blockname])

We provide some functions for straight validation. These serve as an example of the use of the CifDic class with the CifFile class.

<Top-level functions>= (<-U)
<ValidationResult class>
<Validate against the given dictionaries>
<Run dictionary validation checks>

A convenient wrapper class for dealing with the structure returned by validation. Perhaps a more elegant approach would be to return one of these objects from validation rather than wrap the validation routines inside.

<ValidationResult class>= (<-U)
class ValidationResult:
    """Represents validation result. It is initialised with """
    def __init__(self,results):
        """results is return value of validate function"""
        self.valid_result, self.no_matches = results

    def report(self,use_html):
        """Return string with human-readable description of validation result"""
        return validate_report((self.valid_result, self.no_matches),use_html)

    def is_valid(self,block_name=None):
        """Return True for valid CIF file, otherwise False"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.valid_result.iterkeys()
        for block_name in block_names:
            if not self.valid_result[block_name] == (True,{}):
                valid = False
                break
            else:
                valid = True
        return valid

    def has_no_match_items(self,block_name=None):
        """Return true if some items are not found in dictionary"""
        if block_name is not None:
            block_names = [block_name]
        else:
            block_names = self.no_matches.iter_keys()
        for block_name in block_names:
            if self.no_matches[block_name]:
                has_no_match_items = True
                break
            else:
                has_no_match_items = False
        return has_no_match_items



We provide a function to do straight validation, using the built-in methods of the dictionary type. We need to create a single dictionary from the multiple dictionaries we are passed, before doing our check. Also, we allow validation of dictionaries themselves, by passing a special flag isdic. This should only be used for DDL2/DDLm dictionaries, and simply makes save frames visible as ordinary blocks. DDL1 dictionaries validate OK if (any) global block is deleted.

<Validate against the given dictionaries>= (<-U)
def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False):
    """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing,
    to the results of merging the `CifDic` objects in `diclist` according to `mergemode`.  Flag
    `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be
    accessed for validation and that mandatory_category should be interpreted differently for DDL2."""
    if not isinstance(ciffile,CifFile):
        check_file = CifFile(ciffile)
    else:
        check_file = ciffile
    if not dic:
        fulldic = merge_dic(diclist,mergemode)
    else:
        fulldic = dic
    no_matches = {}
    valid_result = {}
    if isdic:          #assume one block only
        check_file.scoping = 'instance' #only data blocks visible
        top_level = check_file.keys()[0]
        check_file.scoping = 'dictionary'   #all blocks visible
        # collect a list of parents for speed
        if fulldic.diclang == 'DDL2':
            poss_parents = fulldic.get_all("_item_linked.parent_name")
            for parent in poss_parents:
                curr_parent = listify(check_file.get(parent,[]))
                new_vals = check_file.get_all(parent)
                new_vals.extend(curr_parent)
                if len(new_vals)>0:
                    check_file[parent] = new_vals
                print("Added %s (len %d)" % (parent,len(check_file[parent])))
    # now run the validations
    for block in check_file.keys():
        if isdic and block == top_level:
           block_scope = 'Dictionary'
        elif isdic:
           block_scope = 'Item'
        else:
           block_scope = 'Datablock'
        no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic]
        # remove non-matching items
        print("Not matched: " + repr(no_matches[block]))
        for nogood in no_matches[block]:
             del check_file[block][nogood]
        print("Validating block %s, scope %s" % (block,block_scope))
        valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope)
    return valid_result,no_matches

def validate_report(val_result,use_html=False):
    valid_result,no_matches = val_result
    outstr = StringIO()
    if use_html:
        outstr.write("<h2>Validation results</h2>")
    else:
        outstr.write( "Validation results\n")
        outstr.write( "------------------\n")
    if len(valid_result) > 10:
        suppress_valid = True         #don't clutter with valid messages
        if use_html:
           outstr.write("<p>For brevity, valid blocks are not reported in the output.</p>")
    else:
        suppress_valid = False
    for block in valid_result.keys():
        block_result = valid_result[block]
        if block_result[0]:
            out_line = "Block '%s' is VALID" % block
        else:
            out_line = "Block '%s' is INVALID" % block
        if use_html:
            if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]:
                outstr.write( "<h3>%s</h3><p>" % out_line)
        else:
                outstr.write( "\n %s\n" % out_line)
        if len(no_matches[block])!= 0:
            if use_html:
                outstr.write( "<p>The following items were not found in the dictionary")
                outstr.write(" (note that this does not invalidate the data block):</p>")
                outstr.write("<p><table>\n")
                [outstr.write("<tr><td>%s</td></tr>" % it) for it in no_matches[block]]
                outstr.write("</table>\n")
            else:
                outstr.write( "\n The following items were not found in the dictionary:\n")
                outstr.write("Note that this does not invalidate the data block\n")
                [outstr.write("%s\n" % it) for it in no_matches[block]]
        # now organise our results by type of error, not data item...
        error_type_dic = {}
        for error_item, error_list in block_result[1].items():
            for func_name,bad_result in error_list:
                bad_result.update({"item_name":error_item})
                try:
                    error_type_dic[func_name].append(bad_result)
                except KeyError:
                    error_type_dic[func_name] = [bad_result]
        # make a table of test name, test message
        info_table = {\
        'validate_item_type':\
            "The following data items had badly formed values",
        'validate_item_esd':\
            "The following data items should not have esds appended",
        'validate_enum_range':\
            "The following data items have values outside permitted range",
        'validate_item_enum':\
            "The following data items have values outside permitted set",
        'validate_looping':\
            "The following data items violate looping constraints",
        'validate_loop_membership':\
            "The following looped data names are of different categories to the first looped data name",
        'validate_loop_key':\
            "A required dataname for this category is missing from the loop\n containing the dataname",
        'validate_loop_key_ddlm':\
            "A loop key is missing for the category containing the dataname",
        'validate_loop_references':\
            "A dataname required by the item is missing from the loop",
        'validate_parent':\
            "A parent dataname is missing or contains different values",
        'validate_child':\
            "A child dataname contains different values to the parent",
        'validate_uniqueness':\
            "One or more data items do not take unique values",
        'validate_dependents':\
            "A dataname required by the item is missing from the data block",
        'validate_exclusion': \
            "Both dataname and exclusive alternates or aliases are present in data block",
        'validate_mandatory_category':\
            "A required category is missing from this block",
        'check_mandatory_items':\
            "A required data attribute is missing from this block",
        'check_prohibited_items':\
            "A prohibited data attribute is present in this block"}

        for test_name,test_results in error_type_dic.items():
           if use_html:
               outstr.write(html_error_report(test_name,info_table[test_name],test_results))
           else:
               outstr.write(error_report(test_name,info_table[test_name],test_results))
               outstr.write("\n\n")
    return outstr.getvalue()

# A function to lay out a single error report.  We are passed
# the name of the error (one of our validation functions), the
# explanation to print out, and a dictionary with the error
# information.  We print no more than 50 characters of the item

def error_report(error_name,error_explanation,error_dics):
   retstring = "\n\n " + error_explanation + ":\n\n"
   headstring = "%-32s" % "Item name"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "%-20s" % "Bad value(s)"
   if "bad_items" in error_dics[0]:
      headstring += "%-20s" % "Bad dataname(s)"
   if "child" in error_dics[0]:
      headstring += "%-20s" % "Child"
   if "parent" in error_dics[0]:
      headstring += "%-20s" % "Parent"
   headstring +="\n"
   for error in error_dics:
      bodystring += "\n%-32s" % error["item_name"]
      if "bad_values" in error:
          out_vals = [repr(a)[:50] for a in error["bad_values"]]
          bodystring += "%-20s" % out_vals
      if "bad_items" in error:
          bodystring += "%-20s" % repr(error["bad_items"])
      if "child" in error:
          bodystring += "%-20s" % repr(error["child"])
      if "parent" in error:
          bodystring += "%-20s" % repr(error["parent"])
   return retstring + headstring + bodystring

#  This lays out an HTML error report

def html_error_report(error_name,error_explanation,error_dics,annotate=[]):
   retstring = "<h4>" + error_explanation + ":</h4>"
   retstring = retstring + "<table cellpadding=5><tr>"
   headstring = "<th>Item name</th>"
   bodystring = ""
   if "bad_values" in error_dics[0]:
      headstring += "<th>Bad value(s)</th>"
   if "bad_items" in error_dics[0]:
      headstring += "<th>Bad dataname(s)</th>"
   if "child" in error_dics[0]:
      headstring += "<th>Child</th>"
   if "parent" in error_dics[0]:
      headstring += "<th>Parent</th>"
   headstring +="</tr>\n"
   for error in error_dics:
      bodystring += "<tr><td><tt>%s</tt></td>" % error["item_name"]
      if "bad_values" in error:
          bodystring += "<td>%s</td>" % error["bad_values"]
      if "bad_items" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["bad_items"]
      if "child" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["child"]
      if "parent" in error:
          bodystring += "<td><tt>%s</tt></td>" % error["parent"]
      bodystring += "</tr>\n"
   return retstring + headstring + bodystring + "</table>\n"

This function executes validation checks provided in the CifDic. The validation calls create a dictionary containing the test results for each item name. Each item has a list of (test name,result) tuples. After running the tests, we contract these lists to contain only false results, and then remove all items containing no false results.

<Run dictionary validation checks>= (<-U)
def run_data_checks(check_block,fulldic,block_scope='Item'):
    v_result = {}
    for key in check_block.keys():
        update_value(v_result, fulldic.run_item_validation(key,check_block[key]))
        update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block))
    for loopnames in check_block.loops.values():
        update_value(v_result, fulldic.run_loop_validation(loopnames))
    update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope))
    # return false and list of baddies if anything didn't match
    all_keys = list(v_result.keys())
    for test_key in all_keys:
        v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False]
        if len(v_result[test_key]) == 0:
            del v_result[test_key]
    # if even one false one is found, this should trigger
    # print("Baddies: {!r}".format(v_result))
    isvalid = len(v_result)==0
    return isvalid,v_result

<Utility functions>= (<-U)
<Extract number and esd>
<Convert value to float, ignore esd>
<Conversions to dictionary types>
<Append update>
<Transpose data>
<Merge dictionaries as CIFs>
<Get topmost parent>

This support function uses re capturing to work out the number's value. The re contains 7 groups: group 0 is the entire expression; group 1 is the overall match in the part prior to esd brackets; group 2 is the match with a decimal point, group 3 is the digits after the decimal point, group 4 is the match without a decimal point. Group 5 is the esd bracket contents, and group 6 is the exponent.

The esd should be returned as an independent number. We count the number of digits after the decimal point, create the esd in terms of this, and then, if necessary, apply the exponent.

<Extract number and esd>= (<-U <-U)
def get_number_with_esd(numstring):
    numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)'
    our_match = re.match(numb_re,numstring)
    if our_match:
        a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups()
        # print("Debug: {} -> {!r}".format(numstring, our_match.groups()))
    else:
        return None,None
    if dot or q: return None,None     #a dot or question mark
    if exp:          #has exponent
       exp = exp.replace("d","e")     # mop up old fashioned numbers
       exp = exp.replace("D","e")
       base_num = base_num + exp
    # print("Debug: have %s for base_num from %s" % (base_num,numstring))
    base_num = float(base_num)
    # work out esd, if present.
    if esd:
        esd = float(esd[1:-1])    # no brackets
        if dad:                   # decimal point + digits
            esd = esd * (10 ** (-1* len(dad)))
        if exp:
            esd = esd * (10 ** (float(exp[1:])))
    return base_num,esd

For dREl operations we require that all numerical types actually appear as numerical types rather than strings. This function takes a datablock and a dictionary and converts all the datablock contents to numerical values according to the dictionary specifications.

Note that as written we are happy to interpret a floating point string as an integer. We are therefore assuming that the value has been validated.

<Conversions to dictionary types>= (<-U)
<Overall conversion>
<Convert a single value>
<Convert a list value>
<Convert a matrix value>
<Parse the structure specification>

Instead of returning a value, we return a function that can be used to convert the values. This saves time reconstructing the conversion function for every value in a loop.

<Overall conversion>= (<-U)
def convert_type(definition):
    """Convert value to have the type given by definition"""
    #extract the actual required type information
    container = definition['_type.container']
    dimension = definition.get('_type.dimension',StarFile.StarList([]))
    structure = interpret_structure(definition['_type.contents'])
    if container == 'Single':   #a single value to convert
        return convert_single_value(structure)
    elif container == 'List':   #lots of the same value
        return convert_list_values(structure,dimension)
    elif container == 'Multiple': #no idea
        return None
    elif container in ('Array','Matrix'): #numpy array
        return convert_matrix_values(structure)
    return lambda a:a    #unable to convert

<Convert a single value>= (<-U)
def convert_single_value(type_spec):
    """Convert a single item according to type_spec"""
    if type_spec == 'Real':
        return float_with_esd
    if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'):
        return int
    if type_spec == 'Complex':
        return complex
    if type_spec == 'Imag':
        return lambda a:complex(0,a)
    if type_spec in ('Code','Name','Tag'):  #case-insensitive -> lowercase
        return lambda a:a.lower()
    return lambda a:a   #can't do anything numeric

Convert a whole DDLm list. A 'List' type implies a repetition of the types given in the 'type.contents' entry. We get all fancy and build a function to decode each entry in our input list. This function is then mapped over the List, and in the case of looped List values, it can be mapped over the dataname value as well. However, in the case of a single repetition, files are allowed to drop one level of enclosing brackets. We account for that here by detecting a one-element list and *not* mapping the conversion function. TODO: Note that we do not yet handle the case that we are supposed to convert to a Matrix, rather than a list. TODO: handle arbitrary dimension lists, rather than special-casing the character sequence '[1]'.

<Convert a list value>= (<-U)
class convert_simple_list(object):

    """\
    Callable object that converts values in a simple list according
    to the specified element structure.
    """

    def __init__(self, structure):
        self.converters = [convert_single_value(tp) for tp in structure]
        return

    def __call__(self, element):
        if len(element) != len(self.converters):
            emsg = "Expected iterable of %i values, got %i." % (
                (len(self.converters), len(element)))
            raise ValueError(emsg)
        rv = [f(e) for f, e in zip(self.converters, element)]
        return rv

# End of class convert_single_value

def convert_list_values(structure, dimension):
    """Convert the values according to the element
       structure given in [[structure]]"""
    # simple repetition
    if isinstance(structure, (unicode, str)):
        fcnv = convert_single_value(structure)
    # assume structure is a list of types
    else:
        fcnv = convert_simple_list(structure)
    rv = fcnv
    # setup nested conversion function when dimension differs from 1.
    if len(dimension) > 0 and int(dimension[0]) != 1:
        rv = lambda args : [fcnv(a) for a in args]
    return rv

When storing a matrix/array value as a result of a calculation, we remove the numpy information and instead store as a StarList. The following routine will work transparently for either string or number-valued Star Lists, so we do not have to worry.

<Convert a matrix value>= (<-U)
def convert_matrix_values(valtype):
    """Convert a dREL String or Float valued List structure to a numpy matrix structure"""
    # first convert to numpy array, then let numpy do the work
    try:
        import numpy
    except ImportError:
        return lambda a:a   #cannot do it
    if valtype == 'Real':
        dtype = float
    elif valtype == 'Integer':
        dtype = int
    elif valtype == 'Complex':
        dtype = complex
    else:
        raise ValueError('Unknown matrix value type')
    fcnv = lambda a : numpy.asarray(a, dtype=dtype)
    return fcnv

DDLm specifies List element composition using a notation of form 'cont(el,el,el...)' where 'cont' refers to a container constructor (list or matrix so far) and 'el' is a simple element type. If 'cont' is missing, the sequence of elements is a sequence of elements in a simple list. We have written a simple parser to interpret this.

<Parse the structure specification>= (<-U)
def interpret_structure(struc_spec):
    """Interpret a DDLm structure specification"""
    from . import TypeContentsParser as t
    p = t.TypeParser(t.TypeParserScanner(struc_spec))
    return getattr(p,"input")()

<Append update>= (<-U)
# A utility function to append to item values rather than replace them
def update_value(base_dict,new_items):
    for new_key in new_items.keys():
        if new_key in base_dict:
            base_dict[new_key].extend(new_items[new_key])
        else:
            base_dict[new_key] = new_items[new_key]

<Transpose data>= (<-U)
#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = []
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

# listify strings - used surprisingly often
def listify(item):
    if isinstance(item,(unicode,str)): return [item]
    else: return item

# given a list of search items, return a list of items
# actually contained in the given data block
def filter_present(namelist,datablocknames):
    return [a for a in namelist if a in datablocknames]

# Make an item immutable, used if we want a list to be a key
def make_immutable(values):
    """Turn list of StarList values into a list of immutable items"""
    if not isinstance(values[0],StarList):
        return values
    else:
        return [tuple(a) for a in values]

Decorators. The following decorator keeps track of calls in order to detect recursion. We raise a special recursion error to allow the derive_item method to act accordingly. We also propagate the first-seen value of 'allow_defaults' recursively, so that the original call can control whether or not to use default values. Typically methods can be tried without, and then with, default values, to ensure that all possibilities for deriving the function are attempted first.

<Decorators>= (<-U)
def track_recursion(in_this_func):
    """Keep an eye on a function call to make sure that the key argument hasn't been
    seen before"""
    def wrapper(*args,**kwargs):
        key_arg = args[1]
        if key_arg in wrapper.called_list:
            print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg)))
            raise CifRecursionError( key_arg,wrapper.called_list[:])    #failure
        if len(wrapper.called_list) == 0:   #first time
            wrapper.stored_use_defaults = kwargs.get("allow_defaults",False)
            print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults))
        else:
            kwargs["allow_defaults"] = wrapper.stored_use_defaults
        wrapper.called_list.append(key_arg)
        print('Recursion watch: call stack: ' + repr(wrapper.called_list))
        try:
            result = in_this_func(*args,**kwargs)
        except StarFile.StarDerivationError as s:
            if len(wrapper.called_list) == 1: #no more
                raise StarFile.StarDerivationFailure(wrapper.called_list[0])
            else:
                raise
        finally:
            wrapper.called_list.pop()
            if len(wrapper.called_list) == 0:
                wrapper.stored_used_defaults = 'error'
        return result
    wrapper.called_list = []
    return wrapper

This uses the CifFile merge method to merge a list of filenames, with an initial check to determine DDL1/DDL2 merge style. In one case we merge save frames in a single block, in another case we merge data blocks. These are different levels.

Note that the data block name is passed to specify the parts of each object to be merged, rather than the objects themselves (not doing this was a bug that was caught a while ago).

<Merge dictionaries as CIFs>= (<-U)
# merge ddl dictionaries.  We should be passed filenames or CifFile
# objects
def merge_dic(diclist,mergemode="replace",ddlspec=None):
    dic_as_cif_list = []
    for dic in diclist:
        if not isinstance(dic,CifFile) and \
           not isinstance(dic,(unicode,str)):
               raise TypeError("Require list of CifFile names/objects for dictionary merging")
        if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic))
        else: dic_as_cif_list.append(dic)
    # we now merge left to right
    basedic = dic_as_cif_list[0]
    if "on_this_dictionary" in basedic:   #DDL1 style only
        for dic in dic_as_cif_list[1:]:
           basedic.merge(dic,mode=mergemode,match_att=["_name"])
    elif len(basedic.keys()) == 1:                     #One block: DDL2/m style
        old_block = basedic[basedic.keys()[0]]
        for dic in dic_as_cif_list[1:]:
           new_block = dic[dic.keys()[0]]
           basedic.merge(dic,mode=mergemode,
                         single_block=[basedic.keys()[0],dic.keys()[0]],
                         match_att=["_item.name"],match_function=find_parent)
    return CifDic(basedic)

Find the main item from a parent-child list. We are asked to find the topmost parent in a ddl2 definition block containing multiple item.names. We use the insight that the parent item will be that item which is not in the list of children as well. If there are no item names, that means that we are dealing with something like a category -can they be merged??

<Get topmost parent>= (<-U)
def find_parent(ddl2_def):
    if "_item.name" not in ddl2_def:
       return None
    if isinstance(ddl2_def["_item.name"],unicode):
        return ddl2_def["_item.name"]
    if "_item_linked.child_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no child_names")
    if "_item_linked.parent_name" not in ddl2_def:
        raise CifError("Asked to find parent in block with no parent_names")
    result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]])
    if len(result)>1 or len(result)==0:
        raise CifError("Unable to find single unique parent data item")
    return result[0]

Cif Loop block class

With the removal (by PyCIFRW) of nested loops, this class is now unnecessary. It is now simply a pointer to StarFile.LoopBlock.

<CifLoopBlock class>= (<-U)
class CifLoopBlock(StarFile.LoopBlock):
    def __init__(self,data=(),**kwargs):
        super(CifLoopBlock,self).__init__(data,**kwargs)

<API documentation flags>= (<-U)
#No documentation flags
pycifrw-4.4/src/CifFile_module.nw000066400000000000000000006177621345362224200170750ustar00rootroot00000000000000<>= __copyright = """ PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. """ @ \section{Introduction} This file implements a general CIF reading/writing utility. The basic objects ([[CifFile/CifBlock]]) read and write syntactically correct CIF 1.1 files including save frames. Objects for validating CIFs are built on these basic objects: A [[CifDic]] object is derived from a [[CifFile]] created from a DDL1/2 dictionary; and the [[ValidCifFile/ValidCifBlock]] objects allow creation/checking of CIF files against a list of CIF dictionaries. The [[CifFile]] class is initialised with either no arguments (a new CIF file) or with the name of an already existing CIF file. Data items are accessed/changed/added using the python mapping type ie to get [[dataitem]] you would type [[value = cf[blockname][dataitem]]]. Note also that a CifFile object can be accessed as a mapping type, ie using square brackets. Most mapping operations have been implemented (see below). We build upon the objects defined in the StarFile class, by imposing a few extra restrictions where necessary. <<*>>= # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import try: from cStringIO import StringIO except ImportError: from io import StringIO # Python 2,3 compatibility try: from urllib import urlopen # for arbitrary opening from urlparse import urlparse, urljoin except: from urllib.request import urlopen from urllib.parse import urlparse, urljoin # The unicode type does not exist in Python3 as the str type # encompasses unicode. PyCIFRW tests for 'unicode' would fail # Suggestions for a better approach welcome. if isinstance(u"abc",str): #Python3 unicode = str <> import re,sys from . import StarFile from .StarFile import StarList #put in global scope for exec statement try: import numpy #put in global scope for exec statement from .drel import drel_runtime #put in global scope for exec statement except ImportError: pass #will fail when using dictionaries for calcs from copy import copy #must be in global scope for exec statement <> <> <> <> <> <> <> <> <> <> <> <> @ \section{CifFile} A CifFile is subclassed from a StarFile. Our StarFile class has an optional check of line length, which we use. A CifFile object is a dictionary of CifBlock objects, accessed by block name. As the maximum line length is subject to change, we allow the length to be specified, with the current default set at 2048 characters (Cif 1.1). For reading in files, we only flag a length error if the parameter [[strict]] is true, in which case we use parameter [[maxinlength]] as our maximum line length on input. Parameter [[maxoutlength]] sets the maximum line size for output. If [[maxoutlength]] is not specified, it defaults to the maximum input length. Note that this applies to the input only. For changing output length, you can provide an optional parameter in the [[WriteOut]] method. <>= class CifFile(StarFile.StarFile): <> @ When initialising, we add those parts that are unique to the CifFile as opposed to a simple collection of blocks - i.e. reading in from a file, and some line length restrictions. We do not indent this section in this noweb file, so that our comment characters output at the beginning of the line. <>= def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs): super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs) self.strict = strict self.header_comment = \ """ ########################################################################## # Crystallographic Information Format file # Produced by PyCifRW module # # This is a CIF file. CIF has been adopted by the International # Union of Crystallography as the standard for data archiving and # transmission. # # For information on this file format, follow the CIF links at # http://www.iucr.org ########################################################################## """ @ \section{Cif Block class} CifBlocks exist(ed) as a separate class in order to enforce non-nested loops and maximum dataname lengths. As nested loops have been removed completely from PyCIFRW, they are no longer necessary but kept here for backwards compatibility. <>= class CifBlock(StarFile.StarBlock): """ A class to hold a single block of a CIF file. A `CifBlock` object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. `b['_a_dataname']`. All other Python dictionary methods are also available (e.g. `keys()`, `values()`). Looped datanames will return a list of values. ## Initialisation When provided, `data` should be another `CifBlock` whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ <> <> <> <> @ A CifBlock is a StarBlock with a very few restrictions. <>= def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs): """When provided, `data` should be another CifBlock whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ if strict: maxnamelength=75 else: maxnamelength=-1 super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs) self.dictionary = None #DDL dictionary referring to this block self.compat_mode = compat_mode #old-style behaviour of setitem def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname) @ The second line in the copy method switches the class of the returned object to be a CifBlock. It may not be necessary. <>= def __setitem__(self,key,value): self.AddItem(key,value) # for backwards compatibility make a single-element loop if self.compat_mode: if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList): # single element loop self.CreateLoop([key]) def copy(self): newblock = super(CifBlock,self).copy() return self.copy.im_class(newblock) #catch inheritance @ This function was added for the dictionary validation routines. It will return a list where each member is itself a list of item names, corresponding to the names in each loop of the file. <>= def loopnames(self): return [self.loops[a] for a in self.loops] @ Adding a data item. In the old, deprecated method we are passed a tuple with the (set) of data names at the beginning, and a (set) of values for them following. We implement this behaviour by looping over the input datanames, and adding them to the set of keys. When we have finished, we create the loop. We check the length of the name, and give an error if the name is greater than 75 characters, which is the CIF 1.1 maximum length. We also check for consistency, by making sure the new item is not in the block already. If it is, we replace it (consistent with the meaning of square brackets). If it is in a loop, we replace the looped value and all other items in that loop block. This means that when adding loops, we must add them all at once if we call this routine directly. We typecheck the data items. They can be tuples, strings or lists. If we have a list of values for a single item, the item name should also occur in a single member tuple. <>= def AddCifItem(self,data): """ *DEPRECATED*. Use `AddItem` instead.""" # we accept only tuples, strings and lists!! if not (isinstance(data[0],(unicode,tuple,list,str))): raise TypeError('Cif datanames are either a string, tuple or list') # we catch single item loops as well... if isinstance(data[0],(unicode,str)): self.AddSingleCifItem(data[0],list(data[1])) if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop self.CreateLoop([data[0]]) return # otherwise, we loop over the datanames keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) [self.AddSingleCifItem(a,b) for a,b in keyvals] # and create the loop self.CreateLoop(data[0][0]) def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value) @ Reading in a file. We use the STAR grammar parser. Note that the blocks returned will be locked for changing ([[overwrite=False]]) and can be unlocked by setting block.overwrite to True. <>= def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF', permissive=False): """ Read in a CIF file, returning a `CifFile` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2). * `scoping` is only relevant where nested save frames are expected (STAR2 only). `instance` scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. `dictionary` scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for `standard` is `CIF`, which when set enforces a maximum length of 75 characters for datanames and has no other effect. """ finalcif = CifFile(scoping=scoping,standard=standard) return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype, permissive=permissive) #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs) @ Defining an error class: we simply derive a 'nothing' class from the root Python class <>= class CifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Format error: '+ self.value class ValidCifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Validity error: ' + self.value class CifRecursionError(Exception): def __init__(self,key_value,call_stack): self.key_value = key_value self.call_stack = call_stack def __str__(self): return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack)) @ \section {Dictionaries} To avoid ambiguity with the Python dictionary type, we use capital D to denote CIF Dictionaries where misinterpretation is possible. We build our Dictionary behaviour on top of the StarFile object, which is notionally a collection of StarBlocks. A Dictionary is simply a collection of datablocks, where each datablock corresponds to a single definition. DDL1 had no category definitions. We adopt a data model whereby the excess information in a DDL2 dictionary is absorbed into special methods (and I am thinking here of the [[_item_type_list.construct]] stuff which appears at the global level), which we initialise ourselves for a DDL1 dictionary. The square bracket notation is repurposed to mean access to the appropriate definition, as the save frame name and the definition may be slightly (or completely) different. <>= <> class CifDic(StarFile.StarFile): """Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only): * do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False * do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy' * do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All * heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically. """ <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> @ \subsection {Dictionary blocks} A dictionary block is essentially identical to a StarBlock, with the extra semantics of chasing through `_import.get` calls in order to transparently return attributes defined in separate dictionaries. If the `_import_cache` is empty, this is skipped. <>= class DicBlock(StarFile.StarBlock): """A definition block within a dictionary, which allows imports to be transparently followed""" def __init__(self,*args,**kwargs): super(DicBlock,self).__init__(*args,**kwargs) self._import_cache = {} def __getitem__(self,dataname): value = None if super(DicBlock,self).has_key("_import.get") and self._import_cache: value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) try: final_value = super(DicBlock,self).__getitem__(dataname) except KeyError: #not there final_value = value if final_value is None: raise KeyError("%s not found" % dataname) return final_value def has_key(self,key): try: self[key] except KeyError: return False return True def add_dict_cache(self,name,cached): """Add a loaded dictionary to this block's cache""" self._import_cache[name]=cached def follow_import(self,import_info,dataname): """Find the dataname values from the imported dictionary. `import_info` is a list of import locations""" latest_value = None for import_ref in import_info: file_loc = import_ref["file"] if file_loc not in self._import_cache: raise ValueError("Dictionary for import %s not found" % file_loc) import_from = self._import_cache[file_loc] miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,file_loc)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if mode == "contents": #only this is used at this level latest_value = import_target.get(dataname,latest_value) return latest_value @ \subsection {Initialisation} We want to be able to accept strings, giving the file name of the CIF dictionary, and pre-initialised [[CifFile]] objects. We do not accept [[CifDic]] objects. Our initialisation procedure first unifies the interface to the Dictionary, and then runs through the Dictionary producing a normalised form. Following this, type and category information can be collected for later reference. Validation functions are listed so that it would be possible to add and remove them from the "valid set". This behaviour has not yet been implemented. When loading DDLm dictionaries we may recursively call this initialisation function with a dictionary to be imported as the argument. In this case we do not want to do all the method derivation, as the necessary categories will be loaded into the calling dictionary rather than the currently initialising dictionary. So there is a keyword argument to stop the operations that should operate on the dictionary as a whole taking place. The dREL methods require Numpy support, but we do not wish to introduce a global dependence on Numpy. Therefore, we introduce a 'switch' which will return Numpy arrays from the __getitem__ method instead of StarLists. It is intended that the dREL methods will turn this on only during execution, then turn it off afterwards. Note that DDLm importation logic provides many choices. We have a choice of 'No', 'Contents', 'Full' and 'All' for the amount that is imported. If `heavy` is False, no definition material will be replaced, rather the import will be resolved dynamically. <>= def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True, grammar='auto',heavy=True,**kwargs): self.do_minimum = do_minimum if do_minimum: do_imports = 'No' do_dREL = False if do_dREL: do_imports = 'All' if heavy == 'Light' and do_imports not in ('contents','No'): raise(ValueError,"Light imports only available for mode 'contents'") self.template_cache = {} #for DDLm imports self.ddlm_functions = {} #for DDLm functions self.switch_numpy(False) #no Numpy arrays returned super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs) self.standard = 'Dic' #for correct output order self.scoping = 'dictionary' (self.dicname,self.diclang) = self.dic_determine() print('%s is a %s dictionary' % (self.dicname,self.diclang)) self.scopes_mandatory = {} self.scopes_naughty = {} # rename and expand out definitions using "_name" in DDL dictionaries if self.diclang == "DDL1": self.DDL1_normalise() #this removes any non-definition entries self.create_def_block_table() #From now on, [] uses definition_id if self.diclang == "DDL1": self.ddl1_cat_load() elif self.diclang == "DDL2": self.DDL2_normalise() #iron out some DDL2 tricky bits elif self.diclang == "DDLm": self.scoping = 'dictionary' #expose all save frames if do_imports is not 'No': self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine self.create_alias_table() self.create_cat_obj_table() self.create_cat_key_table() if do_dREL: print('Doing full dictionary initialisation') self.initialise_drel() self.add_category_info(full=do_dREL) # initialise type information self.typedic={} self.primdic = {} #typecode<->primitive type translation self.add_type_info() self.install_validation_functions() @ These routines seek to impose a uniform structure on dictionaries written in DDL1, DDL2 and DDLm. Historically, the richer and more systematic DDL2 approach was used to describe DDL1 definitions. With the advent of DDLm, the DDLm paradigm is likely to overtake DDL2. When interpreting the following routines, therefore, bear in mind that they were originally written with DDL2 in mind, and are gradually shifting to DDLm. <>= <> <> <> <> <> <> @ This function determines whether we have a DDLm, DDL2 or DDL1 dictionary. We are built from a [[CifFile]] object. The current method looks for an [[on_this_dictionary]] block, which implies DDL1, or a single block, which implies DDL2/DDLM. This is also where we define some universal keys for uniform access to DDL attributes. <>= def dic_determine(self): if "on_this_dictionary" in self: self.master_block = super(CifDic,self).__getitem__("on_this_dictionary") self.def_id_spec = "_name" self.cat_id_spec = "_category.id" #we add this ourselves self.type_spec = "_type" self.enum_spec = "_enumeration" self.cat_spec = "_category" self.esd_spec = "_type_conditions" self.must_loop_spec = "_list" self.must_exist_spec = "_list_mandatory" self.list_ref_spec = "_list_reference" self.key_spec = "_list_mandatory" self.unique_spec = "_list_uniqueness" self.child_spec = "_list_link_child" self.parent_spec = "_list_link_parent" self.related_func = "_related_function" self.related_item = "_related_item" self.primitive_type = "_type" self.dep_spec = "xxx" self.cat_list = [] #to save searching all the time name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"] version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"] return (name+version,"DDL1") elif len(self.get_roots()) == 1: # DDL2/DDLm self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0]) # now change to dictionary scoping self.scoping = 'dictionary' name = self.master_block["_dictionary.title"] version = self.master_block["_dictionary.version"] if self.master_block.has_key("_dictionary.class"): #DDLm self.enum_spec = '_enumeration_set.state' self.key_spec = '_category.key_id' self.must_exist_spec = None self.cat_spec = '_name.category_id' self.primitive_type = '_type.contents' self.cat_id_spec = "_definition.id" self.def_id_spec = "_definition.id" return(name+version,"DDLm") else: #DDL2 self.cat_id_spec = "_category.id" self.def_id_spec = "_item.name" self.key_spec = "_category_mandatory.name" self.type_spec = "_item_type.code" self.enum_spec = "_item_enumeration.value" self.esd_spec = "_item_type_conditions.code" self.cat_spec = "_item.category_id" self.loop_spec = "there_is_no_loop_spec!" self.must_loop_spec = "xxx" self.must_exist_spec = "_item.mandatory_code" self.child_spec = "_item_linked.child_name" self.parent_spec = "_item_linked.parent_name" self.related_func = "_item_related.function_code" self.related_item = "_item_related.related_name" self.unique_spec = "_category_key.name" self.list_ref_spec = "xxx" self.primitive_type = "_type" self.dep_spec = "_item_dependent.dependent_name" return (name+version,"DDL2") else: raise CifError("Unable to determine dictionary DDL version") @ DDL1 differences. Firstly, in DDL1 you can loop a [[_name]] to get definitions of related names (e.g. x,y,z). Secondly, the data block name is missing the initial underscore, so we need to read the [[_name]] value. There is one block without a [[_name]] attribute, which we proceed to destroy (exercise for the reader: which one?). A further complex difference is in the way that ranges are specified. A DDL2 dictionary generally loops the [[_item_range.maximum/minimum]] items, in order to specify inclusion of the endpoints of the range, whereas DDL1 dictionaries simply specify ranges as [[n:m]]. We translate these values into [[item_range]] specifications. If the [[_list]] item is missing for a dictionary definition, it defaults to no, i.e. the item cannot be listed. We explicitly include this in our transformations. The dictionaries also contain categories, which are used to impose constraints on groupings of items in lists. Category names in DDL2 dictionaries have no leading underscore, and the constraints are stored directly in the category definition. So, with a DDL1 dictionary, we rewrite things to match the DDL2 methods. In particular, the [[list_uniqueness]] item becomes the [[category_key.name]] attribute of the category. This may apply to [[_list_mandatory]] and /or [[_list_reference]] to, but the current specification is vague. Also, it is possible for cross-item references (e.g. in a [[_list_reference]]) to include a whole range of items by terminating the name with an underscore. It is then understood to include anything starting with those characters. We explicitly try to expand these references out. Note the way we convert to DDL2-style type definitions; any definition having a _type_construct regular expression triggers the definition of a whole new type, which is stored as per DDL2, for the later type dictionary construction process to find. <>= def DDL1_normalise(self): # switch off block name collision checks self.standard = None # add default type information in DDL2 style # initial types and constructs base_types = ["char","numb","null"] prim_types = base_types[:] base_constructs = [".*", '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', "\"\" "] for key,value in self.items(): newnames = [key] #keep by default if "_name" in value: real_name = value["_name"] if isinstance(real_name,list): #looped values for looped_name in real_name: new_value = value.copy() new_value["_name"] = looped_name #only looped name self[looped_name] = new_value newnames = real_name else: self[real_name] = value newnames = [real_name] # delete the old one if key not in newnames: del self[key] # loop again to normalise the contents of each definition for key,value in self.items(): #unlock the block save_overwrite = value.overwrite value.overwrite = True # deal with a missing _list, _type_conditions if "_list" not in value: value["_list"] = 'no' if "_type_conditions" not in value: value["_type_conditions"] = 'none' # deal with enumeration ranges if "_enumeration_range" in value: max,min = self.getmaxmin(value["_enumeration_range"]) if min == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) elif max == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) else: self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) #add any type construct information if "_type_construct" in value: base_types.append(value["_name"]+"_type") #ie dataname_type base_constructs.append(value["_type_construct"]+"$") prim_types.append(value["_type"]) #keep a record value["_type"] = base_types[-1] #the new type name #make categories conform with ddl2 #note that we must remove everything from the last underscore if value.get("_category",None) == "category_overview": last_under = value["_name"].rindex("_") catid = value["_name"][1:last_under] value["_category.id"] = catid #remove square bracks if catid not in self.cat_list: self.cat_list.append(catid) value.overwrite = save_overwrite # we now add any missing categories before filling in the rest of the # information for key,value in self.items(): #print('processing ddl1 definition %s' % key) if "_category" in self[key]: if self[key]["_category"] not in self.cat_list: # rogue category, add it in newcat = self[key]["_category"] fake_name = "_" + newcat + "_[]" newcatdata = CifBlock() newcatdata["_category"] = "category_overview" newcatdata["_category.id"] = newcat newcatdata["_type"] = "null" self[fake_name] = newcatdata self.cat_list.append(newcat) # write out the type information in DDL2 style self.master_block.AddLoopItem(( ("_item_type_list.code","_item_type_list.construct", "_item_type_list.primitive_code"), (base_types,base_constructs,prim_types) )) @ DDL2 has a few idiosyncracies of its own. For some reason, in the definition of a parent item, all the child items are listed and their mandatory/not mandatory status specified. This duplicates information under the child item itself, although there is something on the web indicating that this is purely cosmetic and not strictly necessary. For our purposes, we want to extract the mandatory/not mandatory nature of the current item, which appears to be conventionally at the top of the list (we do not assume this below). The only way of determining what the actual item name is is to look at the save frame name, which is a bit of a fragile tactic - especially as dictionary merge operations are supposed to look for _item.name. So, in these cases, we have to assume the save frame name is the one we want, and find this entry in the list. Additionally, the child entry doesn't contain the category specification, so we add this into the child entry at the same time, together with a pointer to the parent item. Such entries then have a loop listing parents and children down the whole hierarchy, starting with the current item. We disentangle this, placing parent item attributes in the child items, moving sub-children down to their level. Sub children may not exist at all, so we create them if necessary. To make life more interesting, the PDBX have an entry_pc placeholder in which additional (and sometimes repeated) parent-child relationships can be expressed. We cannot assume that any given parent-child relationship is stated at a single site in the file. What is more, it appears that multiple parents for a single child are defined in the _entry.pdbx_pc entry. Our changes to the file pre-checking are therefore restricted to making sure that the child contains information about the parents; we do not interfere with the parent's information about the children, even if we consider that to be superfluous. Note that we will have to add parent-child validity checks to check consistency among all these relationships. Update: in the DDL-2.1.6 file, only the parents/children are looped, rather than the item names, so we have to check looping separately. Next: DDL2 contains aliases to DDL1 item names, so in theory we should be able to use a DDL2 dictionary to validate a DDL1-style CIF file. We create separate definition blocks for each alias to enable this. Also, we flatten out any single-element lists for item_name. This is simply to avoid the value of e.g. category_id being a single-element list instead of a string. Note also that _item.category_id in DDL2 is 'implicit', meaning in this case that you can determine it from the item name. We add in the category for simplicity. <>= <> def DDL2_normalise(self): listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) # now filter out all the single element lists! dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) for item_def in dodgy_defs: <> <> # now flatten any single element lists single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) for flat_def in single_defs: flat_keys = self[flat_def].GetLoop('_item.name').keys() for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] # now deal with the multiple lists # next we do aliases all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] for aliased in all_aliases: my_aliases = listify(self[aliased]['_item_aliases.alias_name']) for alias in my_aliases: self[alias] = self[aliased].copy() #we are going to delete stuff... del self[alias]["_item_aliases.alias_name"] @ As some DDL2 dictionaries neglect children, we repopulate the skeleton or non-existent definitions that may be provided in the dictionary. <>= # print("DDL2 norm: processing %s" % item_def) thisdef = self[item_def] packet_no = thisdef['_item.name'].index(item_def) realcat = thisdef['_item.category_id'][packet_no] realmand = thisdef['_item.mandatory_code'][packet_no] # first add in all the missing categories # we don't replace the entry in the list corresponding to the # current item, as that would wipe out the information we want for child_no in range(len(thisdef['_item.name'])): if child_no == packet_no: continue child_name = thisdef['_item.name'][child_no] child_cat = thisdef['_item.category_id'][child_no] child_mand = thisdef['_item.mandatory_code'][child_no] if child_name not in self: self[child_name] = CifBlock() self[child_name]['_item.name'] = child_name self[child_name]['_item.category_id'] = child_cat self[child_name]['_item.mandatory_code'] = child_mand self[item_def]['_item.name'] = item_def self[item_def]['_item.category_id'] = realcat self[item_def]['_item.mandatory_code'] = realmand @ Populating parent and child links. The DDL2 model uses parent-child relationships to create relational database behaviour. This means that the emphasis is on simply linking two ids together directionally. This link is not necessarily inside a definition that is being linked, but we require that any parents and children are identified within the definition that they relate to. This means we have to sometimes relocate and expand links. As an item can simultaneously be both a parent and a child, we need to explicitly fill in the links even within a single definition. <>= target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ '_item_linked.parent_name' in self[a]] # now dodgy_defs contains all definition blocks with more than one child/parent link for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance for item_def in dodgy_defs: print('Processing %s' % item_def) thisdef = self[item_def] child_list = thisdef['_item_linked.child_name'] parents = thisdef['_item_linked.parent_name'] # for each parent, find the list of children. family = list(zip(parents,child_list)) notmychildren = family #We aim to remove non-children # Loop over the parents, relocating as necessary while len(notmychildren): # get all children of first entry mychildren = [a for a in family if a[0]==notmychildren[0][0]] print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) for parent,child in mychildren: #parent is the same for all # Make sure that we simply add in the new entry for the child, not replace it, # otherwise we might spoil the child entry loop structure try: childloop = self[child].GetLoop('_item_linked.parent_name') except KeyError: print('Creating new parent entry %s for definition %s' % (parent,child)) self[child]['_item_linked.parent_name'] = [parent] childloop = self[child].GetLoop('_item_linked.parent_name') childloop.AddLoopItem(('_item_linked.child_name',[child])) continue else: # A parent loop already exists and so will a child loop due to the # call to create_pcloop above pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] if len(goodpars)>0: #no need to add it print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) continue print('Adding %s to %s entry' % (parent,child)) newpacket = childloop.GetPacket(0) #essentially a copy, I hope setattr(newpacket,'_item_linked.child_name',child) setattr(newpacket,'_item_linked.parent_name',parent) childloop.AddPacket(newpacket) # # Make sure the parent also points to the children. We get # the current entry, then add our # new values if they are not there already # parent_name = mychildren[0][0] old_children = self[parent_name].get('_item_linked.child_name',[]) old_parents = self[parent_name].get('_item_linked.parent_name',[]) oldfamily = zip(old_parents,old_children) newfamily = [] print('Old parents -> %s' % repr(old_parents)) for jj, childname in mychildren: alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] if len(alreadythere)>0: continue 'Adding new child %s to parent definition at %s' % (childname,parent_name) old_children.append(childname) old_parents.append(parent_name) # Now output the loop, blowing away previous definitions. If there is something # else in this category, we are destroying it. newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) del self[parent_name]['_item_linked.parent_name'] del self[parent_name]['_item_linked.child_name'] self[parent_name].insert_loop(newloop) print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) # now make a new,smaller list notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] @ In order to handle parent-child relationships in a regular way, we want to assume that all parent-child entries occur in a loop, with both members present. This routine does that for us. If the parent is missing, it is assumed to be the currently-defined item. If the child is missing, likewise. <>= def create_pcloop(self,definition): old_children = self[definition].get('_item_linked.child_name',[]) old_parents = self[definition].get('_item_linked.parent_name',[]) if isinstance(old_children,unicode): old_children = [old_children] if isinstance(old_parents,unicode): old_parents = [old_parents] if (len(old_children)==0 and len(old_parents)==0) or \ (len(old_children) > 1 and len(old_parents)>1): return if len(old_children)==0: old_children = [definition]*len(old_parents) if len(old_parents)==0: old_parents = [definition]*len(old_children) newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) try: del self[definition]['_item_linked.parent_name'] del self[definition]['_item_linked.child_name'] except KeyError: pass self[definition].insert_loop(newloop) @ Loading the DDL1 categories with DDL2-type information. DDL2 people wisely put category-wide information in the category definition rather than spreading it out between category items. We collect this information together here. This routine is the big time-waster in initialising a DDL1 dictionary, so we have attempted to optimize it by locally defining functions, instead of using lambdas, and making one loop through the dictionary instead of hundreds. <>= def ddl1_cat_load(self): deflist = self.keys() #slight optimization cat_mand_dic = {} cat_unique_dic = {} # a function to extract any necessary information from each definition def get_cat_info(single_def): if self[single_def].get(self.must_exist_spec)=='yes': thiscat = self[single_def]["_category"] curval = cat_mand_dic.get(thiscat,[]) curval.append(single_def) cat_mand_dic[thiscat] = curval # now the unique items... # cif_core.dic throws us a curly one: the value of list_uniqueness is # not the same as the defined item for publ_body_label, so we have # to collect both together. We assume a non-listed entry, which # is true for all current (May 2005) ddl1 dictionaries. if self[single_def].get(self.unique_spec,None)!=None: thiscat = self[single_def]["_category"] new_unique = self[single_def][self.unique_spec] uis = cat_unique_dic.get(thiscat,[]) if single_def not in uis: uis.append(single_def) if new_unique not in uis: uis.append(new_unique) cat_unique_dic[thiscat] = uis [get_cat_info(a) for a in deflist] # apply the above function for cat in cat_mand_dic.keys(): self[cat]["_category_mandatory.name"] = cat_mand_dic[cat] for cat in cat_unique_dic.keys(): self[cat]["_category_key.name"] = cat_unique_dic[cat] @ A dataname can appear in a file under a different name if it has been aliased. We create an alias table to speed up lookup. The table is indexed by true name, with a list of alternatives. <>= def create_alias_table(self): """Populate an alias table that we can look up when searching for a dataname""" all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]] self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases]) @ DDLm internally refers to data items by the category.object notation, with the twist that child categories of loops can have their objects appear in the parent category. So this table prepares a complete list of (cat,obj):dataname correspondences, as the implementation of parent-child requires looking up a table each time searching for children. The recursive [[expand_base_table]] function returns a dictionary of (name,definition_id) pairs indexing the corresponding datanames. We must catch any keys and exclude them from this process, as they are allowed to have the same [[object_id]] as their parent key in the enclosing datablock and will overwrite the entry for the parent key if left in. We also note that the example dictionary allows these types of name collisions if an item is intended to be identical (e.g. _atom_site_aniso.type_symbol and atom_site.type_symbol), so we create a short list of possible alternative names for each (cat,obj) pair. The create_nested_key_table stores information about which keys index child categories. This way applications can search for any loops containing these keys and expand packets for dREL accordingly. <>= def create_cat_obj_table(self): """Populate a table indexed by (cat,obj) and returning the correct dataname""" base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \ for a in self.keys() if self[a].get('_definition.scope','Item')=='Item']) loopable = self.get_loopable_cats() loopers = [self.ddlm_immediate_children(a) for a in loopable] print('Loopable cats:' + repr(loopable)) loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers] expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0]) print("Expansion list:" + repr(expand_list)) extra_table = {} #for debugging we keep it separate from base_table until the end def expand_base_table(parent_cat,child_cats): extra_names = [] # first deal with all the child categories for child_cat in child_cats: nn = [] if child_cat in expand_list: # a nested category: grab its names nn = expand_base_table(child_cat,expand_list[child_cat]) # store child names extra_names += nn # add all child names to the table child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key'] child_names += extra_names extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table])) # and the repeated ones get appended instead repeats = [a for a in child_names if a in extra_table] for obj,name in repeats: extra_table[(parent_cat,obj)] += [name] # and finally, add our own names to the return list child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key'] return child_names [expand_base_table(parent,child) for parent,child in expand_list.items()] print('Expansion cat/obj values: ' + repr(extra_table)) # append repeated ones non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table]) repeats = [a for a in extra_table.keys() if a in base_table] base_table.update(non_repeats) for k in repeats: base_table[k] += extra_table[k] self.cat_obj_lookup_table = base_table self.loop_expand_list = expand_list def get_loopable_cats(self): """A short utility function which returns a list of looped categories. This is preferred to a fixed attribute as that fixed attribute would need to be updated after any edits""" return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop'] def create_cat_key_table(self): """Create a utility table with a list of keys applicable to each category. A key is a compound key, that is, it is a list""" self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name", [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()]) def collect_keys(parent_cat,child_cats): kk = [] for child_cat in child_cats: if child_cat in self.loop_expand_list: kk += collect_keys(child_cat) # add these keys to our list kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))] self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk return kk for k,v in self.loop_expand_list.items(): collect_keys(k,v) print('Keys for categories' + repr(self.cat_key_table)) @ CIF Dictionaries use the square bracket notation to refer to the definition, as for CifFile objects, but the key is the definition itself, rather than the block name. So we have to create a lookup table. However, template dictionaries may not have a _definition.id, which means we have to revert to their blockname, so we use blockname as a default. We also completely ignore case, which is a bit liberal, as definitions themselves are case-sensitive. We catch duplicate definitions (e.g. as a result of incorrect merging). If a definition is not found, we search any dictionaries that were imported in 'Full' mode. This means that definitions in the dictionary proper override anything in the imported dictionaries, as per definitions. <>= def create_def_block_table(self): """ Create an internal table matching definition to block id """ proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()] # now get the actual ids instead of blocks proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table]) # remove non-definitions if self.diclang != "DDL1": top_blocks = list([a[0].lower() for a in self.get_roots()]) else: top_blocks = ["on_this_dictionary"] # catch dodgy duplicates uniques = set([a[0] for a in proto_table]) if len(uniques)1] raise CifError('Duplicate definitions in dictionary:' + repr(dodgy)) self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks]) def __getitem__(self,key): """Access a datablock by definition id, after the lookup has been created""" try: return super(CifDic,self).__getitem__(self.block_id_table[key.lower()]) except AttributeError: #block_id_table not present yet return super(CifDic,self).__getitem__(key) except KeyError: # key is missing try: # print(Definition for %s not found, reverting to CifFile' % key) return super(CifDic,self).__getitem__(key) except KeyError: # try imports return self.lookup_imports(key) def __setitem__(self,key,value): """Add a new definition block""" super(CifDic,self).__setitem__(key,value) try: self.block_id_table[value['_definition.id']]=key except AttributeError: #does not exist yet pass def NewBlock(self,*args,**kwargs): newname = super(CifDic,self).NewBlock(*args,**kwargs) try: self.block_id_table[self[newname]['_definition.id']]=newname except AttributeError: #no block_id table pass def __delitem__(self,key): """Remove a definition""" try: super(CifDic,self).__delitem__(self.block_id_table[key.lower()]) del self.block_id_table[key.lower()] except (AttributeError,KeyError): #block_id_table not present yet super(CifDic,self).__delitem__(key) return # fix other datastructures # cat_obj table def keys(self): """Return all definitions""" try: return self.block_id_table.keys() except AttributeError: return super(CifDic,self).keys() def has_key(self,key): return key in self def __contains__(self,key): try: return key.lower() in self.block_id_table except AttributeError: return super(CifDic,self).__contains__(key) def items(self): """Return (key,value) pairs""" return list([(a,self[a]) for a in self.keys()]) @ Any Starfile method that uses the square-bracket notation or build-in syntax (e.g. del) to access keys may fail if the set of keys it uses is not that provided by the keys() method above, as the object delegation using super() does not apply. As we have set up our methods above to 'fall through' to the underlying CifFile, the process of renaming may or may not have called our del method to remove the definition, so we check. <>= def unlock(self): """Allow overwriting of all definitions in this collection""" for a in self.keys(): self[a].overwrite=True def lock(self): """Disallow changes in definitions""" for a in self.keys(): self[a].overwrite=False def rename(self,oldname,newname,blockname_as_well=True): """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True, change the underlying blockname too.""" if blockname_as_well: super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname) self.block_id_table[newname.lower()]=newname if oldname.lower() in self.block_id_table: #not removed del self.block_id_table[oldname.lower()] else: self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()] del self.block_id_table[oldname.lower()] return @ \subsection{Semantic information} <>= <> <> <> @ For convenience we provide ways of interrogating the semantic tree of categories. Note that if we are passed the top-level datablock, the semantic children are the syntactic children. An additional method finds the 'dangling' definitions, which are definitions that have no category definition present - these might be definitions added by this dictionary to categories found in other dictionaries. <>= def get_root_category(self): """Get the single 'Head' category of this dictionary""" root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head'] if len(root_cats)>1 or len(root_cats)==0: raise CifError("Cannot determine a unique Head category, got" % repr(root_cats)) return root_cats[0] def ddlm_immediate_children(self,catname): """Return a list of datanames for the immediate children of catname. These are semantic children (i.e. based on _name.category_id), not structural children as in the case of StarFile.get_immediate_children""" straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()] return list(straight_children) def ddlm_all_children(self,catname): """Return a list of all children, including the `catname`""" all_children = self.ddlm_immediate_children(catname) cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category'] for c in cat_children: all_children.remove(c) all_children += self.ddlm_all_children(c) return all_children + [catname] def is_semantic_child(self,parent,maybe_child): """Return true if `maybe_child` is a child of `parent`""" all_children = self.ddlm_all_children(parent) return maybe_child in all_children def ddlm_danglers(self): """Return a list of definitions that do not have a category defined for them, or are children of an unattached category""" top_block = self.get_root_category() connected = set(self.ddlm_all_children(top_block)) all_keys = set(self.keys()) unconnected = all_keys - connected return list(unconnected) def get_ddlm_parent(self,itemname): """Get the parent category of itemname""" parent = self[itemname].get('_name.category_id','') if parent == '': # use the top block by default raise CifError("%s has no parent" % itemname) return parent @ Some methods for interrogating categories for names. <>= def expand_category_opt(self,name_list): """Return a list of all non-category items in a category or return the name if the name is not a category""" new_list = [] for name in name_list: if self.get(name,{}).get('_definition.scope','Item') == 'Category': new_list += self.expand_category_opt([a for a in self.keys() if \ self[a].get('_name.category_id','').lower() == name.lower()]) else: new_list.append(name) return new_list def get_categories(self): """Return a list of category names""" return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category']) @ This method was added to facilitate running dREL scripts, which treat certain variables as having attributes which all belong to a single category. We return only the extension in keeping with dREL syntax. If [[names_only]] is true, we return only the object part of the dataname. Note that sub categories are excluded. TODO: use cat-obj table for speed. <>= def names_in_cat(self,cat,names_only=False): names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()] if not names_only: return list([a for a in names if self[a].get('_definition.scope','Item')=='Item']) else: return list([self[a]["_name.object_id"] for a in names]) @ DDLm introduces validity information in the enclosing datablock. It is a loop of scope, attribute values where the scope is one of dictionary (everywhere), category (whole category) and item (just the single definition). Validity can be mandatory, encouraged or not allowed. It only appears in the DDLm attributes dictionary, so this information is blank unless we are dealing with the DDLm dictionary. <>= def ddlm_parse_valid(self): if "_dictionary_valid.application" not in self.master_block: return for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"): scope = getattr(scope_pack,"_dictionary_valid.application") valid_info = getattr(scope_pack,"_dictionary_valid.attributes") if scope[1] == "Mandatory": self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info) elif scope[1] == "Prohibited": self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info) @ These methods were added when developing interactive editing tools, which allow shifting categories around. <>= <> <> @ Changing a category name involves changing the [[_name.category_id]] in all children as well as the category definition itself and datablock names, then updating our internal structures. <>= def change_category_name(self,oldname,newname): self.unlock() """Change the category name from [[oldname]] to [[newname]]""" if oldname not in self: raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname)) if newname in self: raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname)) child_defs = self.ddlm_immediate_children(oldname) self.rename(oldname,newname) #NB no name integrity checks self[newname]['_name.object_id']=newname self[newname]['_definition.id']=newname for child_def in child_defs: self[child_def]['_name.category_id'] = newname if self[child_def].get('_definition.scope','Item')=='Item': newid = self.create_catobj_name(newname,self[child_def]['_name.object_id']) self[child_def]['_definition.id']=newid self.rename(child_def,newid[1:]) #no underscore at the beginning self.lock() def create_catobj_name(self,cat,obj): """Combine category and object in approved fashion to create id""" return ('_'+cat+'.'+obj) def change_category(self,itemname,catname): """Move itemname into catname, return new handle""" defid = self[itemname] if defid['_name.category_id'].lower()==catname.lower(): print('Already in category, no change') return itemname if catname not in self: #don't have it print('No such category %s' % catname) return itemname self.unlock() objid = defid['_name.object_id'] defid['_name.category_id'] = catname newid = itemname # stays the same for categories if defid.get('_definition.scope','Item') == 'Item': newid = self.create_catobj_name(catname,objid) defid['_definition.id']= newid self.rename(itemname,newid) self.set_parent(catname,newid) self.lock() return newid def change_name(self,one_def,newobj): """Change the object_id of one_def to newobj. This is not used for categories, but can be used for dictionaries""" if '_dictionary.title' not in self[one_def]: #a dictionary block newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj) self.unlock() self.rename(one_def,newid) self[newid]['_definition.id']=newid self[newid]['_name.object_id']=newobj else: self.unlock() newid = newobj self.rename(one_def,newobj) self[newid]['_dictionary.title'] = newid self.lock() return newid # Note that our semantic parent is given by catparent, but our syntactic parent is # always just the root block def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False): """Add a new category to the dictionary with name [[catname]]. If [[catparent]] is None, the category will be a child of the topmost 'Head' category or else the top data block. If [[is_loop]] is false, a Set category is created. If [[allow_dangler]] is true, the parent category does not have to exist.""" if catname in self: raise CifError('Attempt to add existing category %s' % catname) self.unlock() syntactic_root = self.get_roots()[0][0] if catparent is None: semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head'] if len(semantic_root)>0: semantic_root = semantic_root[0] else: semantic_root = syntactic_root else: semantic_root = catparent realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root) self.block_id_table[catname.lower()]=realname self[catname]['_name.object_id'] = catname if not allow_dangler or catparent is None: self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id'] else: self[catname]['_name.category_id'] = catparent self[catname]['_definition.id'] = catname self[catname]['_definition.scope'] = 'Category' if is_loop: self[catname]['_definition.class'] = 'Loop' else: self[catname]['_definition.class'] = 'Set' self[catname]['_description.text'] = 'No definition provided' self.lock() return catname def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False): """Add itemname to category [[catparent]]. If itemname contains periods, all text before the final period is ignored. If [[allow_dangler]] is True, no check for a parent category is made.""" self.unlock() if '.' in itemname: objname = itemname.split('.')[-1] else: objname = itemname objname = objname.strip('_') if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'): raise CifError('No category %s in dictionary' % catparent) fullname = '_'+catparent.lower()+'.'+objname print('New name: %s' % fullname) syntactic_root = self.get_roots()[0][0] realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change # update our dictionary structures self.block_id_table[fullname]=realname self[fullname]['_definition.id']=fullname self[fullname]['_name.object_id']=objname self[fullname]['_name.category_id']=catparent self[fullname]['_definition.class']='Datum' self[fullname]['_description.text']=def_text def remove_definition(self,defname): """Remove a definition from the dictionary.""" if defname not in self: return if self[defname].get('_definition.scope')=='Category': children = self.ddlm_immediate_children(defname) [self.remove_definition(a) for a in children] cat_id = self[defname]['_definition.id'].lower() del self[defname] @ The DDLm architecture identifies a data definition by (category,object) which identifies a unique textual dataname appearing in the data file. Because of category joins when nested categories are looped, a single dataname may be referred to by several different category identifiers. The [[get_name_by_cat_obj]] routine will search all loop categories within the given category hierarchy until it finds the appropriate one. If [[give_default]] is True, the default construction '_catid.objid' is returned if nothin is found in the dictionary. This should only be used during testing as the lack of a corresponding definition in the dictionary means that it is unlikely that anything sensible will result. <>= def get_cat_obj(self,name): """Return (cat,obj) tuple. [[name]] must contain only a single period""" cat,obj = name.split('.') return (cat.strip('_'),obj) def get_name_by_cat_obj(self,category,object,give_default=False): """Return the dataname corresponding to the given category and object""" if category[0] == '_': #accidentally left in true_cat = category[1:].lower() else: true_cat = category.lower() try: return self.cat_obj_lookup_table[(true_cat,object.lower())][0] except KeyError: if give_default: return '_'+true_cat+'.'+object raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object)) @ Dictionaries have the category-wide information in the category definition area. We do not need to fill all of this in if we are not planning on running dREL. <>= def add_category_info(self,full=True): if self.diclang == "DDLm": <> if full: <> else: self.parent_lookup = {} self.key_equivs = {} @ This method was added for DDLm support. We are passed a category and a value, and must find a packet which has a matching key. We use the keyname as a way of finding the loop. <>= def get_key_pack(self,category,value,data): keyname = self[category][self.unique_spec] onepack = data.GetPackKey(keyname,value) return onepack @ For help in validation we create a lookup table which matches a category to its ultimate parent. This allows us to quickly check whether or not a data item is allowed to be co-looped with other data items. Note that we may have to draw in external dictionaries to do this properly, but to avoid holding the whole lot in memory, we simply stop searching up the parent tree if the parent block is missing. <>= catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category'] looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop'] self.parent_lookup = {} for one_cat in looped_cats: parent_cat = one_cat parent_def = self[parent_cat] next_up = parent_def['_name.category_id'].lower() while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop': parent_def = self[next_up] parent_cat = next_up next_up = parent_def['_name.category_id'].lower() self.parent_lookup[one_cat] = parent_cat @ The key hierarchy. This is in many ways reinventing the parent-child relationships that are laid out in DDL2 definitions. In order to access a particular packet using multiple datanames as compound keys, we need to be aware of which keys are related to which other keys. Relationships are always made explicit via the '_name.linked_item_id' attribute in DDLm, which always points to the parent. This is always present, even though it may be often be inferred using Loop category parent/child relationships, as compound keys in categories might introduce ambiguity. This datastructure allows us to provide a key, and obtain a list of equivalent keys, being all those above it in the hierarchy, that is, which it can be replaced by. If we are not doing dREL, we can afford to skip this. <>= self.key_equivs = {} for one_cat in looped_cats: #follow them up lower_keys = listify(self[one_cat]['_category_key.name']) start_keys = lower_keys[:] while len(lower_keys)>0: this_cat = self[lower_keys[0]]['_name.category_id'] parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a] #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent))) if len(parent)>1: raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent))) if len(parent)==0: break parent = parent[0] parent_keys = listify(self[parent]['_category_key.name']) linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys] # sanity check if set(parent_keys) != set(linked_keys): raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys)) # now add in our information for parent,child in zip(linked_keys,start_keys): self.key_equivs[child] = self.key_equivs.get(child,[])+[parent] lower_keys = linked_keys #preserves order of start keys @ Section{DDLm functionality} DDLm is a far more complex dictionary standard than DDL2. We are able to import definitions in two modes, "Full" and "Contents". "Contents" simply copies the attributes found in the target definition, and is useful as a templating mechanism for commonly-seen attributes. "Full" brings in the entire definition block and all child definitions, and is useful for including entire dictionaries. As a special case, if we import a 'Head' definition into a 'Head' definition, we actually make all non-Head categories of the imported dictionary into child categories of the importing dictionary 'Head' category, and the imported 'Head' category disappears. ``Contents'' and ``Full'' modes are implemented dynamically, that is, when the value of an attribute is requested the dictionary resolves imports. The merging method of the StarFile object is purely syntactic and so does not understand DDLm relationships. We add all blocks as the children of the top-level dictionary block, and then in the case of a new 'Head' block we simply reparent the immediate semantic children of the old 'Head' block. <>= <> <> <> <> <>= def obtain_imports(self,import_mode,heavy=False): """Collate import information""" self._import_dics = [] import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]]) print('Import mode %s applied to following frames' % import_mode) print(str([a[0] for a in import_frames])) if import_mode != 'All': for i in range(len(import_frames)): import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()]) print('Importing following frames in mode %s' % import_mode) print(str(import_frames)) #resolve all references for parent_block,import_list in import_frames: for import_ref in import_list: file_loc = import_ref["file"] full_uri = self.resolve_path(file_loc) if full_uri not in self.template_cache: dic_as_cif = CifFile(full_uri,grammar=self.grammar) self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False) #this will recurse internal imports print('Added %s to cached dictionaries' % full_uri) import_from = self.template_cache[full_uri] dupl = import_ref.get('dupl','Exit') miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,full_uri)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if target_key in self and mode=='full': #so blockname will be duplicated if dupl == 'Exit': raise CifError('Import frame %s already in dictionary' % target_key) elif dupl == 'Ignore': continue if heavy: self.ddlm_import(parent_block,import_from,import_target,target_key,mode) else: self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode) @ The original way of doing imports was to completely merge the information from the imported file. This is slightly more efficient if information about import statements is not required. <>= def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'): """Import other dictionaries in place""" if mode == 'contents': #merge attributes only self[parent_block].merge(import_target) elif mode =="full": # Do the syntactic merge syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting from_cat_head = import_target['_name.object_id'] child_frames = import_from.ddlm_all_children(from_cat_head) # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False child_frames.remove(from_cat_head) # As we are in syntax land, we call the CifFile methods child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames]) child_blocks = super(CifDic,import_from).makebc(child_blocks) # Prune out any datablocks that have identical definitions from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()]) double_defs = list([b for b in from_defs.items() if self.has_key(b[1])]) print('Definitions for %s superseded' % repr(double_defs)) for b in double_defs: del child_blocks[b[0]] super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head) # print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames), mode,len(self))) # Now the semantic merge # First expand our definition <-> blockname tree self.create_def_block_table() merging_cat = self[parent_block]['_name.object_id'] #new parent if head_to_head: child_frames = self.ddlm_immediate_children(from_cat_head) #old children #the new parent is the importing category for all old children for f in child_frames: self[f].overwrite = True self[f]['_name.category_id'] = merging_cat self[f].overwrite = False # remove the old head del self[from_cat_head] print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat)) else: #imported category is only child from_frame = import_from[target_key]['_definition.id'] #so we can find it child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0] self[child_frame]['_name.category_id'] = merging_cat print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat)) # it will never happen again... del self[parent_block]["_import.get"] def resolve_path(self,file_loc): url_comps = urlparse(file_loc) if url_comps[0]: return file_loc #already full URI new_url = urljoin(self.my_uri,file_loc) #print("Transformed %s to %s for import " % (file_loc,new_url)) return new_url @ It is possible to not perform imports at reading time, but simply to register the links and resolve the imports if and when a definition is accessed. <>= def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'): """Register the imported dictionaries but do not alter any definitions. `parent_block` contains the id of the block that is importing. `import_target` is the block that should be imported. `import_from` is the CifFile that contains the definitions.""" if mode == 'contents': #merge attributes only self[parent_block].add_dict_cache(file_loc,import_from) elif mode =="full": # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False # Figure out the actual definition ID head_id = import_target["_definition.id"] # Adjust parent information merging_cat = self[parent_block]['_name.object_id'] from_cat_head = import_target['_name.object_id'] if not head_to_head: # imported category is only child import_target["_name.category_id"]=merging_cat self._import_dics = [(import_from,head_id)]+self._import_dics #prepend @ Lightweight importation simply records the import information without performing the import, and then when keys are accessed it checks through the imported dictionaries. The semantics are such that the last dictionary imported should be the first dictionary checked, as imports overwrite any definitions in preceding imports. <>= def lookup_imports(self,key): """Check the list of imported dictionaries for this definition""" for one_dic,head_def in self._import_dics: from_cat_head = one_dic[head_def]['_name.object_id'] possible_keys = one_dic.ddlm_all_children(from_cat_head) if key in possible_keys: return one_dic[key] raise KeyError("%s not found in import dictionaries" % key) @ Merging a whole dictionary. A dictionary is a collection of categories for the purposes of merging (later we may want to keep some audit information). <>= def get_whole_dict(self,source_dict,on_dupl,on_miss): for source_cat in source_dict.get_categories(): self.get_one_cat(source_dict,source_cat,on_dupl,on_miss) @ Merging a single category. If this category does not exist, we simply add the category block and any members of the category. If it does exist, we use the 'on_dupl' flag to resolve our behaviour, either ignoring, replacing, or dying a horrible death. If the specified block is missing in the external dictionary, we either skip it or die a horrible death. <>= def get_one_cat(self,source_dict,source_cat,on_dupl,on_miss): ext_cat = source_dict.get(source_cat,"") this_cat = self.get(source_cat,"") print("Adding category %s" % source_cat) if not ext_cat: if on_miss == "Ignore": pass else: raise CifError("Missing category %s" % source_cat) else: all_ext_defns = source_dict.keys() cat_list = filter(lambda a:source_dict[a].get("_name.category_id","").lower()==source_cat.lower(), all_ext_defns) print("Items: %s" % repr(cat_list)) if this_cat: # The category block itself is duplicated if on_dupl=="Ignore": pass elif on_dupl == "Exit": raise CifError("Duplicate category %s" % source_cat) else: self[source_cat] = ext_cat else: self[source_cat] = ext_cat # now do all member definitions for cat_defn in cat_list: self.add_one_defn(source_dict,cat_defn,on_dupl) def add_one_defn(self,source_dict,cat_defn,on_dupl): if cat_defn in self: if on_dupl == "Ignore": pass elif on_dupl == "Exit": raise CifError("Duplicate definition %s" % cat_defn) else: self[cat_defn] = source_dict[cat_defn] else: self[cat_defn] = source_dict[cat_defn] print(" "+cat_defn) @ This actually follows the children of the category down. We get a list of child categories and add them one by one recursively. <>= def get_one_cat_with_children(self,source_dict,source_cat,on_dupl,on_miss): self.get_one_cat(source_dict,source_cat,on_dupl,on_miss) child_cats = [a for a in source_dict.get_categories() if source_dict[a]["_category.parent_id"]==source_dict[source_cat]["_definition.id"]] for child_cat in child_cats: self.get_one_cat(source_dict,child_cat,on_dupl,on_miss) @ Importing into definitions. We are adjusting only the attributes of a single definition. <>= def import_attributes(self,mykey,source_dict,source_def,on_dupl,on_miss): # process missing if source_def not in source_dict: if on_miss == 'Exit': raise CifError('Missing definition for import %s' % source_def) else: return #nothing else to do # now do the import print('Adding attributes from %s to %s' % (source_def,mykey)) self[mykey].merge(source_dict[source_def],mode='replace',match_att= \ ['_definition.id','_name.category_id','_name.object_id']) def import_loop(self,mykey,source_dict,source_def,loop_name,on_miss): # process imssing if source_def not in source_dict: if on_miss == 'Exit': raise CifError('Missing definition for import %s' % source_def) else: return #nothing else to do print('Adding %s attributes from %s to %s' % (loop_name,source_def,mykey)) state_loop = source_dict[source_def].GetLoop(loop_name) self[mykey].insert_loop(state_loop) @ \section{Validation} A DDL provides lots of information that can be used to check a datafile or dictionary for consistency. Currently, the DDL-appropriate routines are installed at initialisation time. <>= <> <> <> <> <> <> <> @ Each dictionary has a set of validation functions associated with it based on the information contained in the DDL. The following function is called on initialisation. <>= def install_validation_functions(self): """Install the DDL-appropriate validation checks""" if self.diclang != 'DDLm': # functions which check conformance self.item_validation_funs = [ self.validate_item_type, self.validate_item_esd, self.validate_item_enum, self.validate_enum_range, self.validate_looping ] # functions checking loop values self.loop_validation_funs = [ self.validate_loop_membership, self.validate_loop_key, self.validate_loop_references ] # where we need to look at other values self.global_validation_funs = [ self.validate_exclusion, self.validate_parent, self.validate_child, self.validate_dependents, self.validate_uniqueness ] # where only a full block will do self.block_validation_funs = [ self.validate_mandatory_category ] # removal is quicker with special checks self.global_remove_validation_funs = [ self.validate_remove_parent_child ] elif self.diclang == 'DDLm': self.item_validation_funs = [ self.validate_item_enum, self.validate_item_esd_ddlm, ] self.loop_validation_funs = [ self.validate_looping_ddlm, self.validate_loop_key_ddlm, self.validate_loop_membership ] self.global_validation_funs = [] self.block_validation_funs = [ self.check_mandatory_items, self.check_prohibited_items ] self.global_remove_validation_funs = [] self.optimize = False # default value self.done_parents = [] self.done_children = [] self.done_keys = [] @ Some things are independent of where an item occurs in the file; we check those things here. All functions are expected to return a dictionary with at least one key: "result", as well as optional keys depending on the type of error. <>= <> <> <> <> <> <> @ Validate the type of an item We use the expressions for type that we have available to check that the type of the item passed to us matches up. We may have a list of items, so be aware of that. We define a tiny matching function so that we do not have to do a double match to catch the non-matching case, which returns None and thus an attribute error if we immediately try to get a group. Note also that none of the extant dictionaries use the 'none' or 'seq' values for type. The seq value in particular would complicate matters. <>= def validate_item_type(self,item_name,item_value): def mymatch(m,a): res = m.match(a) if res != None: return res.group() else: return "" target_type = self[item_name].get(self.type_spec) if target_type == None: # e.g. a category definition return {"result":True} # not restricted in any way matchexpr = self.typedic[target_type] item_values = listify(item_value) #for item in item_values: #print("Type match " + item_name + " " + item + ":",) #skip dots and question marks check_all = [a for a in item_values if a !="." and a != "?"] check_all = [a for a in check_all if mymatch(matchexpr,a) != a] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} @ DDLm types are far more nuanced, and we are not provided with prepacked regular expressions in order to check them. We have identified the following checks: that the type is in the correct container; that the contents are as described in _type.contents; that 'State' purpose datanames have a list of enumerated states; that 'Link' purpose datanames have '_name.linked_item_id' in the same definition; that 'SU' purpose datanames also has the above. <>= def decide(self,result_list): """Construct the return list""" if len(result_list)==0: return {"result":True} else: return {"result":False,"bad_values":result_list} def validate_item_container(self, item_name,item_value): container_type = self[item_name]['_type.container'] item_values = listify(item_value) if container_type == 'Single': okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))] return decide(okcheck) if container_type in ('Multiple','List'): okcheck = [a for a in item_values if not isinstance(a,StarList)] return decide(okcheck) if container_type == 'Array': #A list with numerical values okcheck = [a for a in item_values if not isinstance(a,StarList)] first_check = decide(okcheck) if not first_check['result']: return first_check #num_check = [a for a in item_values if len([b for b in a if not isinstance @ Esds. Numbers are sometimes not allowed to have esds appended. The default is that esds are not OK, and we should also skip anything that has character type, as that is automatically not a candidate for esds. Note that we make use of the primitive type here; there are some cases where a string type looks like an esd, so unless we know we have a number we ignore these cases. DDLm requires an esd if _type.purpose is Measurand, and should not have an esd if _type.purpose is Number. <>= def validate_item_esd(self,item_name,item_value): if self[item_name].get(self.primitive_type) != 'numb': return {"result":None} can_esd = self[item_name].get(self.esd_spec,"none") == "esd" if can_esd: return {"result":True} #must be OK! item_values = listify(item_value) check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None]) if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_item_esd_ddlm(self,item_name,item_value): if self[item_name].get('self.primitive_type') not in \ ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']: return {"result":None} can_esd = True if self[item_name].get('_type.purpose') != 'Measurand': can_esd = False item_values = listify(item_value) check_all = [get_number_with_esd(a)[1] for a in item_values] check_all = [v for v in check_all if (can_esd and v == None) or \ (not can_esd and v != None)] if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} @ Enumeration ranges. Our dictionary has been prepared as for a DDL2 dictionary, where loops are used to specify closed or open ranges: if an entry exists where maximum and minimum values are equal, this means that this value is included in the range; otherwise, ranges are open. Our value is already numerical. <>= def validate_enum_range(self,item_name,item_value): if "_item_range.minimum" not in self[item_name] and \ "_item_range.maximum" not in self[item_name]: return {"result":None} minvals = self[item_name].get("_item_range.minimum",default = ["."]) maxvals = self[item_name].get("_item_range.maximum",default = ["."]) def makefloat(a): if a == ".": return a else: return float(a) maxvals = map(makefloat, maxvals) minvals = map(makefloat, minvals) rangelist = list(zip(minvals,maxvals)) item_values = listify(item_value) def map_check(rangelist,item_value): if item_value == "?" or item_value == ".": return True iv,esd = get_number_with_esd(item_value) if iv==None: return None #shouldn't happen as is numb type for lower,upper in rangelist: #check the minima if lower == ".": lower = iv - 1 if upper == ".": upper = iv + 1 if iv > lower and iv < upper: return True if upper == lower and iv == upper: return True # debug # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper)) return False check_all = [a for a in item_values if map_check(rangelist,a) != True] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} @ Note that we must make a copy of the enum list, otherwise when we add in our ? and . they will modify the Cif in place, very sneakily, and next time we have a loop length check, e.g. in writing out, we will probably have a mismatch. <>= def validate_item_enum(self,item_name,item_value): try: enum_list = self[item_name][self.enum_spec][:] except KeyError: return {"result":None} enum_list.append(".") #default value enum_list.append("?") #unknown item_values = listify(item_value) #print("Enum check: {!r} in {!r}".format(item_values, enum_list)) check_all = [a for a in item_values if a not in enum_list] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} @ Check that something can be looped. For DDL1 we have yes, no and both, For DDL2 there is no explicit restriction on looping beyond membership in a category. Note that the DDL1 language specifies a default value of 'no' for this item, so when not explicitly allowed by the dictionary, listing is prohibited. In DDLm, only members of 'Loop' categories allow looping. As we transition the whole setup to DDLm-type data structures, the two calls below will merge and move to the looping checks rather than the single item checks. <>= def validate_looping(self,item_name,item_value): try: must_loop = self[item_name][self.must_loop_spec] except KeyError: return {"result":None} if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped return {"result":False} #this could be triggered if must_loop == 'no' and not isinstance(item_value,(unicode,str)): return {"result":False} return {"result":True} def validate_looping_ddlm(self,loop_names): """Check that all names are loopable""" truly_loopy = self.get_final_cats(loop_names) if len(truly_loopy)>= <> <> <> <> <> @ Loop membership. The most common constraints on a loop are that all items are from the same category, and that loops of a certain category must contain a certain key to be valid. The latter test should be performed after the former test. DDLm allows nested loop categories, so an item from a child category can appear in a parent category loop if both are from 'Loop' categories. <>= def validate_loop_membership(self,loop_names): final_cat = self.get_final_cats(loop_names) bad_items = [a for a in final_cat if a != final_cat[0]] if len(bad_items)>0: return {"result":False,"bad_items":bad_items} else: return {"result":True} def get_final_cats(self,loop_names): """Return a list of the uppermost parent categories for the loop_names. Names that are not from loopable categories are ignored.""" try: categories = [self[a][self.cat_spec].lower() for a in loop_names] except KeyError: #category_id is mandatory raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) truly_looped = [a for a in categories if a in self.parent_lookup.keys()] return [self.parent_lookup[a] for a in truly_looped] @ The items specified by [[_list_mandatory]] (DDL1) must be present in a loop containing items of a given category (and it follows that only one loop in a given data block is available for any category containing such an item). This has been explicitly described as a key in DDL2. In DDLm, any key from a parent looped category is acceptable as well as the key of the given category itself. <>= def validate_loop_key(self,loop_names): category = self[loop_names[0]][self.cat_spec] # find any unique values which must be present key_spec = self[category].get(self.key_spec,[]) for names_to_check in key_spec: if isinstance(names_to_check,unicode): #only one names_to_check = [names_to_check] for loop_key in names_to_check: if loop_key not in loop_names: #is this one of those dang implicit items? if self[loop_key].get(self.must_exist_spec,None) == "implicit": continue #it is virtually there... alternates = self.get_alternates(loop_key) if alternates == []: return {"result":False,"bad_items":loop_key} for alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":loop_key} # no alternates return {"result":True} @ Validating keys in DDLm. We move everything to the uppermost parent category, and then lookup what keys can be used. If any of these are present, we are happy. This might miss some subtleties in mixed or unmixed loops? <>= def validate_loop_key_ddlm(self,loop_names): """Make sure at least one of the necessary keys are available""" final_cats = self.get_final_cats(loop_names) if len(final_cats)>0: poss_keys = self.cat_key_table[final_cats[0]][0] # found_keys = [a for a in poss_keys if a in loop_names] if len(found_keys)>0: return {"result":True} else: return {"result":False,"bad_items":poss_keys} else: return {"result":True} @ The [[_list_reference]] value specifies data names which must co-occur with the defined data name. We check that this is indeed the case for all items in the loop. We trace through alternate values as well. In DDL1 dictionaries, a name terminating with an underscore indicates that any(?) corresponding name is suitable. <>= def validate_loop_references(self,loop_names): must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names] must_haves = [a for a in must_haves if a != None] # build a flat list. For efficiency we don't remove duplicates,as # we expect no more than the order of 10 or 20 looped names. def flat_func(a,b): if isinstance(b,unicode): a.append(b) #single name else: a.extend(b) #list of names return a flat_mh = [] [flat_func(flat_mh,a) for a in must_haves] group_mh = filter(lambda a:a[-1]=="_",flat_mh) single_mh = filter(lambda a:a[-1]!="_",flat_mh) res = [a for a in single_mh if a not in loop_names] def check_gr(s_item, name_list): nl = map(lambda a:a[:len(s_item)],name_list) if s_item in nl: return True return False res_g = [a for a in group_mh if check_gr(a,loop_names)] if len(res) == 0 and len(res_g) == 0: return {"result":True} # construct alternate list alternates = map(lambda a: (a,self.get_alternates(a)),res) alternates = [a for a in alternates if a[1] != []] # next line purely for error reporting missing_alts = [a[0] for a in alternates if a[1] == []] if len(alternates) != len(res): return {"result":False,"bad_items":missing_alts} #short cut; at least one #doesn't have an altern #loop over alternates for orig_name,alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates return {"result":True} #found alternates @ A utility function to return a list of alternate names given a main name. In DDL2 we have to deal with aliases. Each aliased item appears in our normalised dictionary independently, so there is no need to resolve aliases when looking up a data name. However, the original definition using DDL2-type names is simply copied to this aliased name during normalisation, so all references to other item names (e.g. _item_dependent) have to be resolved using the present function. These aliases are returned in any case, so if we had a data file which mixed DDL1 and DDL2 style names, it may turn out to be valid, and what's more, we wouldn't necessarily detect an error if a data name and its alias were present - need to ponder this. The exclusive_only option will only return items which must not co-exist with the item name in the same datablock. This includes aliases, and allows us to do a check that items and their aliases are not present at the same time in a data file. <>= def get_alternates(self,main_name,exclusive_only=False): alternates = self[main_name].get(self.related_func,None) alt_names = [] if alternates != None: alt_names = self[main_name].get(self.related_item,None) if isinstance(alt_names,unicode): alt_names = [alt_names] alternates = [alternates] together = zip(alt_names,alternates) if exclusive_only: alt_names = [a for a in together if a[1]=="alternate_exclusive" \ or a[1]=="replace"] else: alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"] alt_names = list([a[0] for a in alt_names]) # now do the alias thing alias_names = listify(self[main_name].get("_item_aliases.alias_name",[])) alt_names.extend(alias_names) # print("Alternates for {}: {!r}".format(main_name, alt_names)) return alt_names @ Some checks require access to the entire data block. These functions take both a provisional dictionary and a global dictionary; the provisional dictionary includes items which will go into the dictionary together with the current item, and the global dictionary includes items which apply to all data blocks (this is for validation of DDL1/2 dictionaries). <>= <> <> <> <> @ DDL2 dictionaries introduce the "alternate exclusive" category for related items. We also unilaterally include items listed in aliases as acting in this way. <>= def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}): alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)] item_name_list = [a.lower() for a in whole_block.keys()] item_name_list.extend([a.lower() for a in provisional_items.keys()]) bad = [a for a in alternates if a in item_name_list] if len(bad)>0: print("Bad: %s, alternates %s" % (repr(bad),repr(alternates))) return {"result":False,"bad_items":bad} else: return {"result":True} @ When validating parent/child relations, we check the parent link to the children, and separately check that parents exist for any children present. Switching on optimisation will remove the redundancy in this procedure, but only if no changes are made to the relevant data items between the two checks. It appears that DDL2 dictionaries allow parents to be absent if children take only unspecified values (i.e. dot or question mark). We catch this case. The provisional items dictionary includes items that are going to be included with the present item (in a single loop structure) so the philosophy of inclusion must be all or nothing. When validating DDL2 dictionaries themselves, we are allowed access to other definition blocks in order to resolve parent-child pointers. We will be able to find these save frames inside the globals dictionary (they will in this case be collected inside a CifBlock object). When removing, we look at the item to make sure that no child items require it to be present. <>= # validate that parent exists and contains matching values def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}): parent_item = self[item_name].get(self.parent_spec) if not parent_item: return {"result":None} #no parent specified if isinstance(parent_item,list): parent_item = parent_item[0] if self.optimize: if parent_item in self.done_parents: return {"result":None} else: self.done_parents.append(parent_item) print("Done parents %s" % repr(self.done_parents)) # initialise parent/child values if isinstance(item_value,unicode): child_values = [item_value] else: child_values = item_value[:] #copy for safety # track down the parent # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block)) # if globals contains the parent values, we are doing a DDL2 dictionary, and so # we have collected all parent values into the global block - so no need to search # for them elsewhere. # print("Looking for {!r}".format(parent_item)) parent_values = globals.get(parent_item) if not parent_values: parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # go for alternates namespace = whole_block.keys() namespace.extend(provisional_items.keys()) namespace.extend(globals.keys()) alt_names = filter_present(self.get_alternates(parent_item),namespace) if len(alt_names) == 0: if len([a for a in child_values if a != "." and a != "?"])>0: return {"result":False,"parent":parent_item}#no parent available -> error else: return {"result":None} #maybe True is more appropriate?? parent_item = alt_names[0] #should never be more than one?? parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # check global block parent_values = globals.get(parent_item) if isinstance(parent_values,unicode): parent_values = [parent_values] #print("Checking parent %s against %s, values %r/%r" % (parent_item, # item_name, parent_values, child_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing) > 0: return {"result":False,"bad_values":missing,"parent":parent_item} return {"result":True} def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}): try: child_items = self[item_name][self.child_spec][:] #copy except KeyError: return {"result":None} #not relevant # special case for dictionaries -> we check parents of children only if item_name in globals: #dictionary so skip return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] if isinstance(item_value,unicode): # single value parent_values = [item_value] else: parent_values = item_value[:] # expand child list with list of alternates for child_item in child_items[:]: child_items.extend(self.get_alternates(child_item)) # now loop over the children for child_item in child_items: if self.optimize: if child_item in self.done_children: return {"result":None} else: self.done_children.append(child_item) print("Done children %s" % repr(self.done_children)) if child_item in provisional_items: child_values = provisional_items[child_item][:] elif child_item in whole_block: child_values = whole_block[child_item][:] else: continue if isinstance(child_values,unicode): child_values = [child_values] # print("Checking child %s against %s, values %r/%r" % (child_item, # item_name, child_values, parent_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing)>0: return {"result":False,"bad_values":missing,"child":child_item} return {"result":True} #could mean that no child items present #a generic checker: all child vals should appear in parent_vals def check_parent_child(self,parent_vals,child_vals): # shield ourselves from dots and question marks pv = parent_vals[:] pv.extend([".","?"]) res = [a for a in child_vals if a not in pv] #print("Missing: %s" % res) return res def validate_remove_parent_child(self,item_name,whole_block): try: child_items = self[item_name][self.child_spec] except KeyError: return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] for child_item in child_items: if child_item in whole_block: return {"result":False,"child":child_item} return {"result":True} @ The DDL2 [[_item_dependent]] attribute at first glance appears to be the same as [[_list_reference]], however the dependent item does not have to appear in a loop at all, and neither does the other item name. Perhaps this behaviour was intended to be implied by having looped [[_names]] in DDL1 dictionaries, but we can't be sure and so don't implement this yet. <>= def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}): try: dep_items = self[item_name][self.dep_spec][:] except KeyError: return {"result":None} #not relevant if isinstance(dep_items,unicode): dep_items = [dep_items] actual_names = whole_block.keys() actual_names.extend(prov.keys()) actual_names.extend(globals.keys()) missing = [a for a in dep_items if a not in actual_names] if len(missing) > 0: alternates = map(lambda a:[self.get_alternates(a),a],missing) # compact way to get a list of alternative items which are # present have_check = [(filter_present(b[0],actual_names), b[1]) for b in alternates] have_check = list([a for a in have_check if len(a[0])==0]) if len(have_check) > 0: have_check = [a[1] for a in have_check] return {"result":False,"bad_items":have_check} return {"result":True} @ The [[_list_uniqueness]] attribute permits specification of a single or multiple items which must have a unique combined value. Currently it is only used in the powder dictionary to indicate that peaks must have a unique index and in the core dictionary to indicate the a publication section name with its label must be unique; however it would appear to implicitly apply to any index-type value in any dictionary. This is used precisely once in the cif_core dictionary in a non-intuitive manner, but we code for this here. The value of the [[_list_uniqueness]] attribute can actually refer to another data name, which together with the defined name must be unique. DDL2 dictionaries do away with separate [[_list_mandatory]] and [[_list_uniqueness]] attributes, instead using a [[_category_key]]. If multiple keys are specified, they must be unique in combination, in accordance with standard relational database behaviour. <>= def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={}, globals={}): category = self[item_name].get(self.cat_spec) if category == None: print("No category found for %s" % item_name) return {"result":None} # print("Category {!r} for item {}".format(category, item_name)) # we make a copy in the following as we will be removing stuff later! unique_i = self[category].get("_category_key.name",[])[:] if isinstance(unique_i,unicode): unique_i = [unique_i] if item_name not in unique_i: #no need to verify return {"result":None} if isinstance(item_value,unicode): #not looped return {"result":None} # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i)) # check that we can't optimize by not doing this check if self.optimize: if unique_i in self.done_keys: return {"result":None} else: self.done_keys.append(unique_i) val_list = [] # get the matching data from any other data items unique_i.remove(item_name) other_data = [] if len(unique_i) > 0: # i.e. do have others to think about for other_name in unique_i: # we look for the value first in the provisional dict, then the main block # the logic being that anything in the provisional dict overrides the # main block if other_name in provisional_items: other_data.append(provisional_items[other_name]) elif other_name in whole_block: other_data.append(whole_block[other_name]) elif self[other_name].get(self.must_exist_spec)=="implicit": other_data.append([item_name]*len(item_value)) #placeholder else: return {"result":False,"bad_items":other_name}#missing data name # ok, so we go through all of our values # this works by comparing lists of strings to one other, and # so could be fooled if you think that '1.' and '1' are # identical for i in range(len(item_value)): #print("Value no. %d" % i, end=" ") this_entry = item_value[i] for j in range(len(other_data)): this_entry = " ".join([this_entry,other_data[j][i]]) #print("Looking for {!r} in {!r}: ".format(this_entry, val_list)) if this_entry in val_list: return {"result":False,"bad_values":this_entry} val_list.append(this_entry) return {"result":True} <>= <> <> <> @ DDL2 introduces a new idea, that of a mandatory category, items of which must be present. We check only this particular fact, and leave the checks for mandatory items within the category, keys etc. to the relevant routines. This would appear to be applicable to dictionaries only. Also, although the natural meaning for a DDL2 dictionary would be that items from these categories must appear in every definition block, this is not what happens in practice, as category definitions do not have anything from the (mandatory) _item_description category. We therefore adopt the supremely useless meaning that mandatory categories in a dictionary context mean only that somewhere, maybe in only one save frame, an item from this category exists. This interpretation is forced by using the "fake_mand" argument, which then assumes that the alternative routine will be used to set the error information on a dictionary-wide basis. <>= def validate_mandatory_category(self,whole_block): mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"] if len(mand_cats) == 0: return {"result":True} # print("Mandatory categories - {!r}".format(mand_cats) # find which categories each of our datanames belongs to all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()] missing = set(mand_cats) - set(all_cats) if len(missing) > 0: return {"result":False,"bad_items":repr(missing)} return {"result":True} @ Processing DDLm mandatory categories/items DDLm manages mandatory items by providing a table in the DDLm dictionary which classifies datanames into mandatory/recommended/prohibited for dictionary, category or item scopes. Note that the following check might fail for categories and dictionaries if '_definition.scope' or '_dictionary.title' is missing. <>= def check_mandatory_items(self,whole_block,default_scope='Item'): """Return an error if any mandatory items are missing""" if len(self.scopes_mandatory)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block]) if len(missing)==0: return {"result":True} else: return {"result":False,"bad_items":missing} <>= def check_prohibited_items(self,whole_block,default_scope='Item'): """Return an error if any prohibited items are present""" if len(self.scopes_naughty)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' present = list([a for a in self.scopes_naughty[scope] if a in whole_block]) if len(present)==0: return {"result":True} else: return {"result":False,"bad_items":present} @ These validation checks are intended to be called externally. They return a dictionary keyed by item name with value being a list of the results of the check functions. The individual functions return a dictionary which contains at least the key "result", and in case of error relevant keys relating to the error. <>= def run_item_validation(self,item_name,item_value): return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])} def run_loop_validation(self,loop_names): return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])} def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}): results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs]) return {item_name:results} def run_block_validation(self,whole_block,block_scope='Item'): results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs]) # fix up the return values return {"whole_block":results} @ Optimization: the dictionary validation routines normally retain no history of what has been checked, as they are executed on a per-item basis. This leads to duplication of the uniqueness check, when there is more than one key, and duplication of the parent-child check, once for the parent and once for the child. By switching on optimisation, a record is kept and these checks will not be repeated. This is safe only if none of the relevant items is altered while optimisation is on, and optimisation should be switched off as soon as all the checks are finished. <>= def optimize_on(self): self.optimize = True self.done_keys = [] self.done_children = [] self.done_parents = [] def optimize_off(self): self.optimize = False self.done_keys = [] self.done_children = [] self.done_parents = [] @ Preparing our type expressions In DDL2 dictionaries our type expressions are given in the main block as POSIX regexps, so we can pass them on to the re package. For DDL1 dictionaries we could get them from the DDL1 language definition, but for now we just hard code them. Essentially only the number definition is important, as the syntax check during reading/writing will catch any char violations. Note that the python re engine is not POSIX compliant in that it will not return the longest leftmost match, but rather the first leftmost match. John Bollinger suggested an obvious fix: we append a [[$]] to force a full match. In other regexp editing, the [[\{]] sequence inside the character sets of some of the regexps is actually interpreted as an escaped bracket, so the backslash vanishes. We add it back in by doing a very hackish and ugly substitution which substitues these two characters anywhere that they occur inside square brackets. A final change is to insert a [[\r]] wherever we find a [[\n]] - it seems that this has been left out. After these changes, and appending on default expressions as well, we can now work with DDL2 expressions directly. We keep the primitive code for the single reason that we need to know when we are dealing with a number that has an esd appended, and this is flagged by the primitive code being of type 'numb'. <>= def add_type_info(self): if "_item_type_list.construct" in self.master_block: types = self.master_block["_item_type_list.code"] prim_types = self.master_block["_item_type_list.primitive_code"] constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]]) # add in \r wherever we see \n, and change \{ to \\{ def regex_fiddle(mm_regex): brack_match = r"((.*\[.+)(\\{)(.*\].*))" ret_match = r"((.*\[.+)(\\n)(.*\].*))" fixed_regexp = mm_regex[:] #copy # fix the brackets bm = re.match(brack_match,mm_regex) if bm != None: fixed_regexp = bm.expand(r"\2\\\\{\4") # fix missing \r rm = re.match(ret_match,fixed_regexp) if rm != None: fixed_regexp = rm.expand(r"\2\3\\r\4") #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp)) return fixed_regexp constructs = map(regex_fiddle,constructs) for typecode,construct in zip(types,constructs): self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL) # now make a primitive <-> type construct mapping for typecode,primtype in zip(types,prim_types): self.primdic[typecode] = primtype @ \section{Linkage to dREL} The drel_ast_yacc package will generate an Abstract Syntax Tree, which we then convert to a Python function using [[py_from_ast.make_function]]. We use it during initialisation to transform all methods to python expressions, and then the [[derive_item]] method will use this to try to derive the expression. Note that newline is the only recognised statement separator in dREL, so we make sure all lines are separated in this way. We also allow multiple 'Evaluation' methods, which is an enhancement of the current standard. The [[make_function]] function requires dictionary information to be supplied regarding looped categories and keys. If we were really serious about dictionary-driven software, the attribute lookups that follow would not use get(), but square brackets and allow default values to be returned. However, that would require assigning a dictionary to the dictionary and consequent automated searches which I cannot be bothered to do at this stage. Just be aware that the default value in the get() statement is the _enumeration.default specified in ddl.dic. <>= <> <> <> <> <> <> <> <> <> @ Full initialisation. This can take some time so we optionally skip it, but can call this function separately at a later stage if needed. <>= def initialise_drel(self): """Parse drel functions and prepare data structures in dictionary""" self.ddlm_parse_valid() #extract validity information from data block self.transform_drel() #parse the drel functions self.add_drel_funcs() #put the drel functions into the namespace <>= def transform_drel(self): from .drel import drel_ast_yacc from .drel import py_from_ast import traceback parser = drel_ast_yacc.parser lexer = drel_ast_yacc.lexer my_namespace = self.keys() my_namespace = dict(zip(my_namespace,my_namespace)) # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")] derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \ and self[a].get("_name.category_id","")!= "function"] for derivable in derivable_list: target_id = derivable # reset the list of visible names for parser special_ids = [dict(zip(self.keys(),self.keys()))] print("Target id: %s" % derivable) drel_exprs = self[derivable]["_method.expression"] drel_purposes = self[derivable]["_method.purpose"] all_methods = [] if not isinstance(drel_exprs,list): drel_exprs = [drel_exprs] drel_purposes = [drel_purposes] for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs): if drel_purpose != 'Evaluation': continue drel_expr = "\n".join(drel_expr.splitlines()) # print("Transforming %s" % drel_expr) # List categories are treated differently... try: meth_ast = parser.parse(drel_expr+"\n",lexer=lexer) except: print('Syntax error in method for %s; leaving as is' % derivable) a,b = sys.exc_info()[:2] print((repr(a),repr(b))) print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout)) # reset the lexer lexer.begin('INITIAL') continue # Construct the python method cat_meth = False if self[derivable].get('_definition.scope','Item') == 'Category': cat_meth = True pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id, loopable=loop_info, cif_dic = self,cat_meth=cat_meth) all_methods.append(pyth_meth) if len(all_methods)>0: save_overwrite = self[derivable].overwrite self[derivable].overwrite = True self[derivable]["_method.py_expression"] = all_methods self[derivable].overwrite = save_overwrite #print("Final result:\n " + repr(self[derivable]["_method.py_expression"])) @ Drel functions are all stored in category 'functions' in our final dictionary. We want to convert them to executable python code and store them in an appropriate namespace which we can then pass to our individual item methods. As dREL accepts only linefeed as a terminator, we convert the input text as required. <>= def add_drel_funcs(self): from .drel import drel_ast_yacc from .drel import py_from_ast funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function'] funcnames = [(self[a]["_name.object_id"], getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist] # create executable python code... parser = drel_ast_yacc.parser # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) for funcname,funcbody in funcnames: newline_body = "\n".join(funcbody.splitlines()) parser.target_id = funcname res_ast = parser.parse(newline_body) py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self) #print('dREL library function ->\n' + py_function) global_table = globals() exec(py_function, global_table) #add to namespace #print('Globals after dREL functions added:' + repr(globals())) self.ddlm_functions = globals() #for outside access @ When a dictionary is available during CIF file access, we can resolve a missing dataname in four ways: (1) check if it is defined under an alias; (2) use a dREL method to calculate the value; (3) use default values if defined. We resolve in this priority. Note that we also convert to the appropriate type. A subsection of (2) is that, if the entire category is missing, we can either use DDLm category construction information or a category method to find our values; we only do this if no items in the category are present. We raise a StarDerivationError if we cannot derive the item, and internally we set result to None as we go through the various ways of deriving the item. The store_value flag asks us to update the ciffile object with the new value. We remove any numpy dependencies before doing this, which means that we must recreate the numpy type when returning it. The [[allow_defaults]] flag allows default values to be derived. In a situation where multiple methods are available for deriving an item, a calculation that accepts default values will return incorrect values in any situation where an alternative calculation method would have given correct values. For example, if the default value of axis.vector[n] is 0, but I can use an alternative derivation for axis.vector from a different setting, then a calculation that creates axis.vector from the components will give the wrong answer as it will fill in default values when the components are missing. The track_recursion decorator code handles this by propagating the initial value of allow_defaults to nested calls. <>= @track_recursion def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True): key = start_key #starting value result = None #success is a non-None value default_result = False #we have not used a default value <> the_category = self[key]["_name.category_id"] cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] # store any default value in case we have a problem def_val = self[key].get("_enumeration.default","") def_index_val = self[key].get("_enumeration.def_index_id","") if len(has_cat_names)==0: # try category method <> # Recalculate in case it actually worked has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] the_funcs = self[key].get('_method.py_expression',"") if the_funcs: #attempt to calculate it <> if result is None and allow_defaults: # try defaults <> # read it in if result is None: #can't do anything else print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults))) raise StarFile.StarDerivationError(start_key) <> # now try to insert the new information into the right place # find if items of this category already appear... # Never cache empty values if not (isinstance(result,list) and len(result)==0) and\ store_value: if self[key].get("_definition.scope","Item")=='Item': if is_looped: result = self.store_new_looped_value(key,cifdata,result,default_result) else: result = self.store_new_unlooped_value(key,cifdata,result) else: self.store_new_cat_values(cifdata,result,the_category) return result @ Adjusting our calculated value. If we have used a default value or we have None, we need to make the dimension match the currently-existing length of the category. <>= is_looped = False if self[the_category].get('_definition.class','Set')=='Loop': is_looped = True if len(has_cat_names)>0: #this category already exists if result is None or default_result: #need to create a list of values loop_len = len(cifdata[has_cat_names[0]]) out_result = [result]*loop_len result = out_result else: #nothing exists in this category, we can't store this at all print('Resetting result %s for %s to null list as category is empty' % (key,result)) result = [] @ Storing a dREL-derived value back into our CifFile. The dREL value (or potentially a simple default value) may correspond to an entire column, or even an entire loop for category methods. We have to distinguish between list values that are StarLists, that is, a single CIF value, and list values that correspond to a column of a loop. Additionally, testing has revealed that we cannot judge the type of elements in a list by the first element (e.g. could be a plain list, then a numpy array). The [[conv_from_numpy]] mini-functions are designed to handle arbitrary numpy arrays quickly. <>= def store_new_looped_value(self,key,cifdata,result,default_result): """Store a looped value from the dREL system into a CifFile""" # try to change any matrices etc. to lists the_category = self[key]["_name.category_id"] out_result = result if result is not None and not default_result: # find any numpy arrays def conv_from_numpy(one_elem): if not hasattr(one_elem,'dtype'): if isinstance(one_elem,(list,tuple)): return StarFile.StarList([conv_from_numpy(a) for a in one_elem]) return one_elem if one_elem.size > 1: #so is not a float return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()]) else: try: return one_elem.item(0) except: return one_elem out_result = [conv_from_numpy(a) for a in result] # so out_result now contains a value suitable for storage cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if a in cifdata] print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names)) if len(has_cat_names)>0: #this category already exists cifdata[key] = out_result #lengths must match or else!! cifdata.AddLoopName(has_cat_names[0],key) else: cifdata[key] = out_result cifdata.CreateLoop([key]) print('Loop info:' + repr(cifdata.loops)) return out_result def store_new_unlooped_value(self,key,cifdata,result): """Store a single value from the dREL system""" if result is not None and hasattr(result,'dtype'): if result.size > 1: out_result = StarFile.StarList(result.tolist()) cifdata[key] = out_result else: cifdata[key] = result.item(0) else: cifdata[key] = result return result @ Storing category results. dREL allows 'category methods', which initialise an entire category. The dREL system that we have written returns a dictionary of lists, with the dictionary keys being item names. It is sufficient for us to extract each of these names and pass them to our normal storage routine. If some of the values in the category key are duplicated, we bail, as we may overwrite previous values. We also bail if we do not have exactly the same datanames available, as we are too lazy to insert 'unknown' in the non-matching positions. <>= def store_new_cat_values(self,cifdata,result,the_category): """Store the values in [[result]] into [[cifdata]]""" the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key'] double_names = [a for a in result.keys() if a in cifdata] if len(double_names)>0: already_present = [a for a in self.names_in_cat(the_category) if a in cifdata] if set(already_present) != set(result.keys()): print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys())))) return #check key values old_keys = set(cifdata[the_key]) common_keys = old_keys & set(result[the_key]) if len(common_keys)>0: print("Category %s not updated, key values in common:" % (common_keys)) return #extend result values with old values for one_name,one_value in result.items(): result[one_name].extend(cifdata[one_name]) for one_name, one_value in result.items(): try: self.store_new_looped_value(one_name,cifdata,one_value,False) except StarFile.StarError: print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value))) #put the key as the first item print('Fixing item order for {}'.format(repr(the_key))) for one_key in the_key: #should only be one cifdata.ChangeItemOrder(one_key,0) @ Executing a dREL method. The execution defines a function, 'pyfunc' which is then itself executed in global scope. This has caused us some grief in order to get the bindings right (e.g. having StarList in scope). Essentially, anything that the method might refer to should be in scope at this point, otherwise the way Python works it will be too late to have things in scope within the enclosing routine that calls this function. Importing the necessary modules at the beginning of the module file (as done here) seems to be a reliable way to go. <>= #global_table = globals() #global_table.update(self.ddlm_functions) for one_func in the_funcs: print('Executing function for %s:' % key) #print(one_func) exec(one_func, globals()) #will access dREL functions, puts "pyfunc" in scope # print('in following global environment: ' + repr(global_table)) stored_setting = cifdata.provide_value cifdata.provide_value = True try: result = pyfunc(cifdata) except CifRecursionError as s: print(s) result = None except StarFile.StarDerivationError as s: print(s) result = None finally: cifdata.provide_value = stored_setting if result is not None: break #print("Function returned {!r}".format(result)) @ \subsection{Creating categories} A category can be created from scratch (i.e. the identifiers produced) if the appropriate DDLm attributes are defined - currently, experimental attributes 'category_construct_local' are included in the test dictionaries for this purpose. They define two types of 'pullback' (see any category theory textbook), which we can use to create a category. If these attributes are absent, we can instead execute a category method. We only add any new category items calculated in this way if the category does not exist or (i) the category IDs are not already present and (ii) the set of attributes calculated is an exact match for the set of datanames already present. <>= cat_result = {} pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]] pulled_from_cats = [(k,[ self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']] ) for k in pulled_from_cats] pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]] if '_category_construct_local.type' in self[the_category]: print("**Now constructing category %s using DDLm attributes**" % the_category) try: cat_result = self.construct_category(the_category,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s (error)' % the_category) # Trying a pull-back when the category is partially populated # will not work, hence we test that cat_result has no keys if len(pulled_to_cats)>0 and len(cat_result)==0: print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats))) try: cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s from pullback information (error)' % the_category) if '_method.py_expression' in self[the_category] and key not in cat_result: print("**Now applying category method for %s in search of %s**" % (the_category,key)) cat_result = self.derive_item(the_category,cifdata,store_value=True) print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result)) # do we now have our value? if key in cat_result: return cat_result[key] @ Constructing categories using DDLm attributes. We have defined local attributes that describe category construction using mathematical 'pullbacks'. We can use these to fill a category, but also to populate a category if the pullback category is available. We use [[list]] to coerce all values to a list in case we are passed a numpy array, which does not have an 'index' method. <>= def construct_category(self,category,cifdata,store_value=True): """Construct a category using DDLm attributes""" con_type = self[category].get('_category_construct_local.type',None) if con_type == None: return {} if con_type == 'Pullback' or con_type == 'Filter': morphisms = self[category]['_category_construct_local.components'] morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat cats = [self[a]['_name.category_id'] for a in morphisms] cat_keys = [self[a]['_category.key_id'] for a in cats] cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat if con_type == 'Filter': int_filter = self[category].get('_category_construct_local.integer_filter',None) text_filter = self[category].get('_category_construct_local.text_filter',None) if int_filter is not None: morph_values.append([int(a) for a in int_filter]) if text_filter is not None: morph_values.append(text_filter) cat_values.append(range(len(morph_values[-1]))) # create the mathematical product filtered by equality of dataname values pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \ if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]] # now prepare for return if len(pullback_ids)==0: return {} newids = self[category]['_category_construct_local.new_ids'] fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids] if con_type == 'Pullback': final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids)) elif con_type == 'Filter': #simple filter final_results = {fullnewids[0]:[x[0] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) if store_value: self.store_new_cat_values(cifdata,final_results,category) return final_results @ Going the other way. If we have the pulled-back category, we can populate the pulled-from categories with their identifier items using projections from the pulled-back category. In the special case that we have a pullback that uses a filter function with a single element, we can automatically populate the whole commutative square. We also by default populate identically-named datanames. The projection datanames are given in _category_construct_local.new_ids, and they always map to the key of the projected-to category. <>= def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True): """Each of the categories in source_categories are pullbacks that include the target_category""" target_key = self[target_category]['_category.key_id'] result = {target_key:[]} first_time = True # for each source category, determine which element goes to the target for sc in source_categories: components = self[sc]['_category_construct_local.components'] comp_cats = [self[c]['_name.category_id'] for c in components] new_ids = self[sc]['_category_construct_local.new_ids'] source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids] if len(components) == 2: # not a filter element_pos = comp_cats.index(target_category) old_id = source_ids[element_pos] print('Using %s to populate %s' % (old_id,target_key)) result[target_key].extend(cifdata[old_id]) # project through all identical names extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key]) # we only include keys that are common to all categories if first_time: result.update(extra_result) else: for k in extra_result.keys(): if k in result: print('Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids) if len(extra_result)>0 or source_ids[0] in cifdata: #something is present result[target_key].extend(cifdata[source_ids[0]]) for k in extra_result.keys(): if k in result: print('Reverse filter: Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: result[k]=extra_result[k] # Bonus derivation if there is a singleton filter if self[sc]['_category_construct_local.type'] == 'Filter': int_filter = self[sc].get('_category_construct_local.integer_filter',None) text_filter = self[sc].get('_category_construct_local.text_filter',None) if int_filter is not None: filter_values = int_filter else: filter_values = text_filter if len(filter_values)==1: #a singleton extra_dataname = self[sc]['_category_construct_local.components'][0] if int_filter is not None: new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]]) else: new_value = filter_values * len(cifdata[source_ids[0]]) if extra_dataname not in result: result[extra_dataname] = new_value else: result[extra_dataname].extend(new_value) else: raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type']) first_time = False # check for sanity - all dataname lengths must be identical datalen = len(set([len(a) for a in result.values()])) if datalen != 1: raise AssertionError('Failed to construct equal-length category items,'+ repr(result)) if store_value: print('Now storing ' + repr(result)) self.store_new_cat_values(cifdata,result,target_category) return result def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]): """Copy across datanames for which the from_category key equals [[key_vals]]""" result = {} s_names_in_cat = set(self.names_in_cat(from_category,names_only=True)) t_names_in_cat = set(self.names_in_cat(to_category,names_only=True)) can_project = s_names_in_cat & t_names_in_cat can_project -= set(skip_names) #already dealt with source_key = self[from_category]['_category.key_id'] print('Source dataname set: ' + repr(s_names_in_cat)) print('Target dataname set: ' + repr(t_names_in_cat)) print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project)) for project_name in can_project: full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0] full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0] if key_vals is None: try: result[full_to_name] = cifdata[full_from_name] except StarFile.StarDerivationError: pass else: all_key_vals = cifdata[source_key] filter_pos = [all_key_vals.index(a) for a in key_vals] try: all_data_vals = cifdata[full_from_name] except StarFile.StarDerivationError: pass result[full_to_name] = [all_data_vals[i] for i in filter_pos] return result @ Aliases. If we have this item under a different name, find it and return it immediately after putting it into the correct type. We could be passed either the dictionary defined dataname, or any of its previous names. We have stored our aliases as a table indexed by dictionary-defined dataname in order to potentially translate from old to new datanames. Once we find a dataname that is present in the datafile, we return it. Note that we have two types of check: in one we are given an old-style dataname, and have to find the new or other old version (in which case we have to check the key of the table) and in the other check we are given the latest version of the dataname and have to check for older names in the datafile - this latter is the dREL situation so we have optimised for it be checking that first and making the modern datanames the table keys. Note that this section of code occurs first in the 'derive_item' routine and will change the value of 'key' to the dictionary value even if nothing is available in the datafile, thereby enabling the other derivation routes possible. <>= # check for aliases # check for an older form of a new value found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata] if len(found_it)>0: corrected_type = self.change_type(key,cifdata[found_it[0]]) return corrected_type # now do the reverse check - any alternative form alias_name = [a for a in self.alias_table.items() if key in a[1]] print('Aliases for %s: %s' % (key,repr(alias_name))) if len(alias_name)==1: key = alias_name[0][0] #actual definition name if key in cifdata: return self.change_type(key,cifdata[key]) found_it = [k for k in alias_name[0][1] if k in cifdata] if len(found_it)>0: return self.change_type(key,cifdata[found_it[0]]) elif len(alias_name)>1: raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name)) @ Using the defaults system. We also check out any default values which we could return in case of error. Note that ddlm adds the '_enumerations.def_index_id' as an alternative way to derive a value from a table. During development, we deliberately allow errors arising from the method to be propagated so that we can see anything that might be wrong. If we are using default values, we need to fill in the whole column of a looped category. This is taken care of at the end of the derivation function, so we simply set a flag to say that this is necessary. <>= if def_val: result = self.change_type(key,def_val) default_result = True elif def_index_val: #derive a default value index_vals = self[key]["_enumeration_default.index"] val_to_index = cifdata[def_index_val] #what we are keying on if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']: lcase_comp = True index_vals = [a.lower() for a in index_vals] # Handle loops if isinstance(val_to_index,list): if lcase_comp: val_to_index = [a.lower() for a in val_to_index] keypos = [index_vals.index(a) for a in val_to_index] result = [self[key]["_enumeration_default.value"][a] for a in keypos] else: if lcase_comp: val_to_index = val_to_index.lower() keypos = index_vals.index(val_to_index) #value error if no such value available result = self[key]["_enumeration_default.value"][keypos] default_result = True #flag that it must be extended result = self.change_type(key,result) print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index))) @ If a key is missing, we may sometimes fill in default values for it, for example, a missing atom type may be assumed to have a number in cell of 0. <>= def generate_default_packet(self,catname,catkey,keyvalue): """Return a StarPacket with items from ``catname`` and a key value of ``keyvalue``""" newpack = StarPacket() for na in self.names_in_cat(catname): def_val = self[na].get("_enumeration.default","") if def_val: final_val = self.change_type(na,def_val) newpack.extend(final_val) setattr(newpack,na,final_val) if len(newpack)>0: newpack.extend(keyvalue) setattr(newpack,catkey,keyvalue) return newpack @ In the single case of executing dREL methods, we wish to return numpy Arrays from our __getitem__ so that the mathematical operations proceed as expected for matrix etc. objects. This needs to be reimplimented: currently numpy must be installed for 'numerification' to work. <>= def switch_numpy(self,to_val): pass @ This function converts the string-valued items returned from the parser into types that correspond to the dictionary specifications. For DDLm it must also deal with potentially complex structures containing both strings and numbers. We have tried to avoid introducing a dependence on Numpy in general for PyCIFRW, but once we get into the realm of DDLm we require Numpy arrays in order to handle the various processing tasks. This routine is the one that will create the arrays from the StarList types, so needs access to numpy. However, this routine is only called if a DDLm dictionary has been provided, so we should still have no Numpy dependence for non DDLm cases For safety, we check that our object is really string-valued. In practice, this means that it is either a string, a list of strings, or a list of StarLists as these are the only datastructures that an as-parsed file will contain. <>= def change_type(self,itemname,inval): if inval == "?": return inval change_function = convert_type(self[itemname]) if isinstance(inval,list) and not isinstance(inval,StarFile.StarList): #from a loop newval = list([change_function(a) for a in inval]) else: newval = change_function(inval) return newval @ We may be passed float values which have esds appended. We catch this case by searching for an opening round bracket <>= def float_with_esd(inval): if isinstance(inval,unicode): j = inval.find("(") if j>=0: return float(inval[:j]) return float(inval) @ This function analyses a DDL1-type range expression, returning a maximum and minimum value. If the number format were ever to change, we need to change this right here, right now. <>= def getmaxmin(self,rangeexp): regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' regexp = regexp + ":" + regexp regexp = re.match(regexp,rangeexp) try: minimum = regexp.group(1) maximum = regexp.group(7) except AttributeError: print("Can't match %s" % rangeexp) if minimum == None: minimum = "." else: minimum = float(minimum) if maximum == None: maximum = "." else: maximum = float(maximum) return maximum,minimum @ \subsection {Outputting dictionaries} We would like dictionary blocks to be output in a readable order, that is, parent categories before their child definitions. The base BlockCollection output routines have no knowledge of save frame interrelations, so we have to override the output block order returned by the get_child_list routine. <>= def WriteOut(self,**kwargs): myblockorder = self.get_full_child_list() self.set_grammar(self.grammar) self.standard = 'Dic' return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs) def get_full_child_list(self): """Return a list of definition blocks in order parent-child-child-child-parent-child...""" top_block = self.get_roots()[0][0] root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head'] if len(root_cat) == 1: all_names = [top_block] + self.recurse_child_list(root_cat[0]) unrooted = self.ddlm_danglers() double_names = set(unrooted).intersection(set(all_names)) if len(double_names)>0: raise CifError('Names are children of internal and external categories:%s' % repr(double_names)) remaining = unrooted[:] for no_root in unrooted: if self[no_root].get('_definition.scope','Item')=='Category': all_names += [no_root] remaining.remove(no_root) these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()] all_names += these_children [remaining.remove(n) for n in these_children] # now sort by category ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining]) for e in ext_cats: cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e] [remaining.remove(n) for n in cat_items] all_names += cat_items if len(remaining)>0: print('WARNING: following items do not seem to belong to a category??') print(repr(remaining)) all_names += remaining print('Final block order: ' + repr(all_names)) return all_names raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead') def cat_from_name(self,one_name): """Guess the category from the name. This should be used only when this is not important semantic information, for example, when printing out""" (cat,obj) = one_name.split(".") if cat[0] == "_": cat = cat[1:] return cat def recurse_child_list(self,parentname): """Recursively expand the logical child list of [[parentname]]""" final_list = [parentname] child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()] child_blocks.sort() #we love alphabetical order child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item'] final_list += child_items child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category'] for child_cat in child_cats: final_list += self.recurse_child_list(child_cat) return final_list @ \section {Valid CIFS} A whole new can of worms is opened up when we require that a CIF is not only syntactically correct, but valid according to the specified dictionary. A valid CIF is essentially a collection of valid CIF blocks. It may be the case in the future that inter-block relationships need to be checked, so we define a separate [[ValidCifFile]] class. <>= class ValidCifBlock(CifBlock): """A `CifBlock` that is valid with respect to a given CIF dictionary. Methods of `CifBlock` are overridden where necessary to disallow addition of invalid items to the `CifBlock`. ## Initialisation * `dic` is a `CifDic` object to be used for validation. """ <> <> <> <> <> @ The [[dic]] argument contains a previously initialised dictionary. We can alternatively provide a list of filenames/CifFiles which are merged according to mergemode. Both cannot be provided. <>= def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords): CifBlock.__init__(self,*args,**kwords) if dic and diclist: print("Warning: diclist argument ignored when initialising ValidCifBlock") if isinstance(dic,CifDic): self.fulldic = dic else: raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument") if len(diclist)==0 and not dic: raise ValidCifError( "At least one dictionary must be specified") if diclist and not dic: self.fulldic = merge_dic(diclist,mergemode) if not self.run_data_checks()[0]: raise ValidCifError( self.report()) @ Run all of these data checks. The dictionary validation methods return a list of tuples (validation function name, result) for each item. When checking a full data block, we can make use of the optimisation facilities provided in the [[CifDic]] object. <>= def run_data_checks(self,verbose=False): self.v_result = {} self.fulldic.optimize_on() for dataname in self.keys(): update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname])) update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self)) for loop_names in self.loops.values(): update_value(self.v_result,self.fulldic.run_loop_validation(loop_names)) # now run block-level checks update_value(self.v_result,self.fulldic.run_block_validation(self)) # return false and list of baddies if anything didn't match self.fulldic.optimize_off() all_keys = list(self.v_result.keys()) #dictionary will change for test_key in all_keys: #print("%s: %r" % (test_key, self.v_result[test_key])) self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False] if len(self.v_result[test_key]) == 0: del self.v_result[test_key] isvalid = len(self.v_result)==0 #if not isvalid: # print("Baddies: {!r}".format(self.v_result)) return isvalid,self.v_result @ Report back. We summarize the contents of v_result. This routine is probably broken. <>= def report(self): outstr = StringIO() outstr.write( "Validation results\n") outstr.write( "------------------\n") print("%d invalid items found\n" % len(self.v_result)) for item_name,val_func_list in self.v_result.items(): outstr.write("%s fails following tests:\n" % item_name) for val_func in val_func_list: outstr.write("\t%s\n") return outstr.getvalue() @ It is not a mistake for a data name to be absent from any of the specified dictionaries, so we have to check that we have a match before running any data checks, rather than simply raising an error immediately. <>= def single_item_check(self,item_name,item_value): #self.match_single_item(item_name) if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_item_validation(item_name,item_value) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def loop_item_check(self,loop_names): in_dic_names = list([a for a in loop_names if a in self.fulldic]) if len(in_dic_names)==0: result = {loop_names[0]:[]} else: result = self.fulldic.run_loop_validation(in_dic_names) baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies)) return isvalid,baddies def global_item_check(self,item_name,item_value,provisional_items={}): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_global_validation(item_name, item_value,self,provisional_items = provisional_items) baddies = list([a for a in result[item_name] if a[1]["result"] is False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def remove_global_item_check(self,item_name): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_remove_global_validation(item_name,self,False) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies @ We need to override the base class methods here to prevent addition of an item that would render an object invalid. <>= <> <> <>= def AddCifItem(self,data): if isinstance(data[0],(unicode,str)): # single item valid,problems = self.single_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) valid,problems = self.global_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) elif isinstance(data[0],tuple) or isinstance(data[0],list): paired_data = list(zip(data[0],data[1])) for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems,name) valid,problems = self.loop_item_check(data[0]) self.report_if_invalid(valid,problems,data[0]) prov_dict = {} # for storing temporary items for name,value in paired_data: prov_dict[name]=value for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems,name) else: raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") super(ValidCifBlock,self).AddCifItem(data) def AddItem(self,key,value,**kwargs): """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" valid,problems = self.single_item_check(key,value) self.report_if_invalid(valid,problems,key) valid,problems = self.global_item_check(key,value) self.report_if_invalid(valid,problems,key) super(ValidCifBlock,self).AddItem(key,value,**kwargs) # utility function def report_if_invalid(self,valid,bad_list,data_name): if not valid: bad_tests = [a[0] for a in bad_list] error_string = ",".join(bad_tests) error_string = repr(data_name) + " fails following validity checks: " + error_string raise ValidCifError( error_string) def __delitem__(self,key): # we don't need to run single item checks; we do need to run loop and # global checks. if key in self: try: loop_items = self.GetLoop(key) except TypeError: loop_items = [] if loop_items: #need to check loop conformance loop_names = [a[0] for a in loop_items if a[0] != key] valid,problems = self.loop_item_check(loop_names) self.report_if_invalid(valid,problems) valid,problems = self.remove_global_item_check(key) self.report_if_invalid(valid,problems) self.RemoveCifItem(key) @ Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in [[data]], which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in [[AddCifItem]], when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance. <>= def AddToLoop(self,dataname,loopdata): # single item checks paired_data = loopdata.items() for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems) # loop item checks; merge with current loop found = 0 for aloop in self.block["loops"]: if dataname in aloop: loopnames = aloop.keys() for new_name in loopdata.keys(): if new_name not in loopnames: loopnames.append(new_name) valid,problems = self.looped_item_check(loopnames) self.report_if_invalid(valid,problems) prov_dict = loopdata.copy() for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems) CifBlock.AddToLoop(self,dataname,loopdata) @ Note that a dictionary must be specified in order to create a valid Cif file. This dictionary is then passed to any blocks. If they were already [[ValidCifBlocks]], they will be reinitialised. Note that, as reading a dictionary takes time, we do it immediately to save doing it later. As a convenience, we handle lists of filenames/CifFiles which are supposed to be dictionaries, and pass them directly to the [[ValidCifBlock]] object which will merge as necessary. Note that we have to set bigdic before calling __init__. The various calls down through the inheritance hierarchy end up calling ValidCifBlock with self.bigdic as one of the arguments. Also, this __init__ procedure could be called from within StarFile.__init__ if given a filename to read from, so we allow that bigdic might already have been set - and check for its existence before setting it again! <>= class ValidCifFile(CifFile): """A CIF file for which all datablocks are valid. Argument `dic` to initialisation specifies a `CifDic` object to use for validation.""" <> <> <>= def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs): print("WARNING: ValidCifFile will be removed in the next release.") if not diclist and not dic and not hasattr(self,'bigdic'): raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object") if not dic and diclist: #merge here for speed self.bigdic = merge_dic(diclist,mergemode) elif dic and not diclist: self.bigdic = dic CifFile.__init__(self,*args,**kwargs) for blockname in self.keys(): self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic) @ Whenever a new block is added, we have to additionally update our match array and perform a validation run. This definition shadows the definition in the parent class. <>= def NewBlock(self,blockname,blockcontents,**kwargs): CifFile.NewBlock(self,blockname,blockcontents,**kwargs) # dictionary[blockname] is now a CifBlock object. We # turn it into a ValidCifBlock object self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic, data=self.dictionary[blockname]) @ We provide some functions for straight validation. These serve as an example of the use of the CifDic class with the CifFile class. <>= <> <> <> @ A convenient wrapper class for dealing with the structure returned by validation. Perhaps a more elegant approach would be to return one of these objects from validation rather than wrap the validation routines inside. <>= class ValidationResult: """Represents validation result. It is initialised with """ def __init__(self,results): """results is return value of validate function""" self.valid_result, self.no_matches = results def report(self,use_html): """Return string with human-readable description of validation result""" return validate_report((self.valid_result, self.no_matches),use_html) def is_valid(self,block_name=None): """Return True for valid CIF file, otherwise False""" if block_name is not None: block_names = [block_name] else: block_names = self.valid_result.iterkeys() for block_name in block_names: if not self.valid_result[block_name] == (True,{}): valid = False break else: valid = True return valid def has_no_match_items(self,block_name=None): """Return true if some items are not found in dictionary""" if block_name is not None: block_names = [block_name] else: block_names = self.no_matches.iter_keys() for block_name in block_names: if self.no_matches[block_name]: has_no_match_items = True break else: has_no_match_items = False return has_no_match_items @ We provide a function to do straight validation, using the built-in methods of the dictionary type. We need to create a single dictionary from the multiple dictionaries we are passed, before doing our check. Also, we allow validation of dictionaries themselves, by passing a special flag [[isdic]]. This should only be used for DDL2/DDLm dictionaries, and simply makes save frames visible as ordinary blocks. DDL1 dictionaries validate OK if (any) global block is deleted. <>= def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False): """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing, to the results of merging the `CifDic` objects in `diclist` according to `mergemode`. Flag `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.""" if not isinstance(ciffile,CifFile): check_file = CifFile(ciffile) else: check_file = ciffile if not dic: fulldic = merge_dic(diclist,mergemode) else: fulldic = dic no_matches = {} valid_result = {} if isdic: #assume one block only check_file.scoping = 'instance' #only data blocks visible top_level = check_file.keys()[0] check_file.scoping = 'dictionary' #all blocks visible # collect a list of parents for speed if fulldic.diclang == 'DDL2': poss_parents = fulldic.get_all("_item_linked.parent_name") for parent in poss_parents: curr_parent = listify(check_file.get(parent,[])) new_vals = check_file.get_all(parent) new_vals.extend(curr_parent) if len(new_vals)>0: check_file[parent] = new_vals print("Added %s (len %d)" % (parent,len(check_file[parent]))) # now run the validations for block in check_file.keys(): if isdic and block == top_level: block_scope = 'Dictionary' elif isdic: block_scope = 'Item' else: block_scope = 'Datablock' no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic] # remove non-matching items print("Not matched: " + repr(no_matches[block])) for nogood in no_matches[block]: del check_file[block][nogood] print("Validating block %s, scope %s" % (block,block_scope)) valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope) return valid_result,no_matches def validate_report(val_result,use_html=False): valid_result,no_matches = val_result outstr = StringIO() if use_html: outstr.write("

Validation results

") else: outstr.write( "Validation results\n") outstr.write( "------------------\n") if len(valid_result) > 10: suppress_valid = True #don't clutter with valid messages if use_html: outstr.write("

For brevity, valid blocks are not reported in the output.

") else: suppress_valid = False for block in valid_result.keys(): block_result = valid_result[block] if block_result[0]: out_line = "Block '%s' is VALID" % block else: out_line = "Block '%s' is INVALID" % block if use_html: if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]: outstr.write( "

%s

" % out_line) else: outstr.write( "\n %s\n" % out_line) if len(no_matches[block])!= 0: if use_html: outstr.write( "

The following items were not found in the dictionary") outstr.write(" (note that this does not invalidate the data block):

") outstr.write("

\n") [outstr.write("" % it) for it in no_matches[block]] outstr.write("
%s
\n") else: outstr.write( "\n The following items were not found in the dictionary:\n") outstr.write("Note that this does not invalidate the data block\n") [outstr.write("%s\n" % it) for it in no_matches[block]] # now organise our results by type of error, not data item... error_type_dic = {} for error_item, error_list in block_result[1].items(): for func_name,bad_result in error_list: bad_result.update({"item_name":error_item}) try: error_type_dic[func_name].append(bad_result) except KeyError: error_type_dic[func_name] = [bad_result] # make a table of test name, test message info_table = {\ 'validate_item_type':\ "The following data items had badly formed values", 'validate_item_esd':\ "The following data items should not have esds appended", 'validate_enum_range':\ "The following data items have values outside permitted range", 'validate_item_enum':\ "The following data items have values outside permitted set", 'validate_looping':\ "The following data items violate looping constraints", 'validate_loop_membership':\ "The following looped data names are of different categories to the first looped data name", 'validate_loop_key':\ "A required dataname for this category is missing from the loop\n containing the dataname", 'validate_loop_key_ddlm':\ "A loop key is missing for the category containing the dataname", 'validate_loop_references':\ "A dataname required by the item is missing from the loop", 'validate_parent':\ "A parent dataname is missing or contains different values", 'validate_child':\ "A child dataname contains different values to the parent", 'validate_uniqueness':\ "One or more data items do not take unique values", 'validate_dependents':\ "A dataname required by the item is missing from the data block", 'validate_exclusion': \ "Both dataname and exclusive alternates or aliases are present in data block", 'validate_mandatory_category':\ "A required category is missing from this block", 'check_mandatory_items':\ "A required data attribute is missing from this block", 'check_prohibited_items':\ "A prohibited data attribute is present in this block"} for test_name,test_results in error_type_dic.items(): if use_html: outstr.write(html_error_report(test_name,info_table[test_name],test_results)) else: outstr.write(error_report(test_name,info_table[test_name],test_results)) outstr.write("\n\n") return outstr.getvalue() # A function to lay out a single error report. We are passed # the name of the error (one of our validation functions), the # explanation to print out, and a dictionary with the error # information. We print no more than 50 characters of the item def error_report(error_name,error_explanation,error_dics): retstring = "\n\n " + error_explanation + ":\n\n" headstring = "%-32s" % "Item name" bodystring = "" if "bad_values" in error_dics[0]: headstring += "%-20s" % "Bad value(s)" if "bad_items" in error_dics[0]: headstring += "%-20s" % "Bad dataname(s)" if "child" in error_dics[0]: headstring += "%-20s" % "Child" if "parent" in error_dics[0]: headstring += "%-20s" % "Parent" headstring +="\n" for error in error_dics: bodystring += "\n%-32s" % error["item_name"] if "bad_values" in error: out_vals = [repr(a)[:50] for a in error["bad_values"]] bodystring += "%-20s" % out_vals if "bad_items" in error: bodystring += "%-20s" % repr(error["bad_items"]) if "child" in error: bodystring += "%-20s" % repr(error["child"]) if "parent" in error: bodystring += "%-20s" % repr(error["parent"]) return retstring + headstring + bodystring # This lays out an HTML error report def html_error_report(error_name,error_explanation,error_dics,annotate=[]): retstring = "

" + error_explanation + ":

" retstring = retstring + "" headstring = "" bodystring = "" if "bad_values" in error_dics[0]: headstring += "" if "bad_items" in error_dics[0]: headstring += "" if "child" in error_dics[0]: headstring += "" if "parent" in error_dics[0]: headstring += "" headstring +="\n" for error in error_dics: bodystring += "" % error["item_name"] if "bad_values" in error: bodystring += "" % error["bad_values"] if "bad_items" in error: bodystring += "" % error["bad_items"] if "child" in error: bodystring += "" % error["child"] if "parent" in error: bodystring += "" % error["parent"] bodystring += "\n" return retstring + headstring + bodystring + "
Item nameBad value(s)Bad dataname(s)ChildParent
%s%s%s%s%s
\n" @ This function executes validation checks provided in the CifDic. The validation calls create a dictionary containing the test results for each item name. Each item has a list of (test name,result) tuples. After running the tests, we contract these lists to contain only false results, and then remove all items containing no false results. <>= def run_data_checks(check_block,fulldic,block_scope='Item'): v_result = {} for key in check_block.keys(): update_value(v_result, fulldic.run_item_validation(key,check_block[key])) update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block)) for loopnames in check_block.loops.values(): update_value(v_result, fulldic.run_loop_validation(loopnames)) update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope)) # return false and list of baddies if anything didn't match all_keys = list(v_result.keys()) for test_key in all_keys: v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False] if len(v_result[test_key]) == 0: del v_result[test_key] # if even one false one is found, this should trigger # print("Baddies: {!r}".format(v_result)) isvalid = len(v_result)==0 return isvalid,v_result <>= <> <> <> <> <> <> <> @ This support function uses re capturing to work out the number's value. The re contains 7 groups: group 0 is the entire expression; group 1 is the overall match in the part prior to esd brackets; group 2 is the match with a decimal point, group 3 is the digits after the decimal point, group 4 is the match without a decimal point. Group 5 is the esd bracket contents, and group 6 is the exponent. The esd should be returned as an independent number. We count the number of digits after the decimal point, create the esd in terms of this, and then, if necessary, apply the exponent. <>= def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd @ For dREl operations we require that all numerical types actually appear as numerical types rather than strings. This function takes a datablock and a dictionary and converts all the datablock contents to numerical values according to the dictionary specifications. Note that as written we are happy to interpret a floating point string as an integer. We are therefore assuming that the value has been validated. <>= <> <> <> <> <> @ Instead of returning a value, we return a function that can be used to convert the values. This saves time reconstructing the conversion function for every value in a loop. <>= def convert_type(definition): """Convert value to have the type given by definition""" #extract the actual required type information container = definition['_type.container'] dimension = definition.get('_type.dimension',StarFile.StarList([])) structure = interpret_structure(definition['_type.contents']) if container == 'Single': #a single value to convert return convert_single_value(structure) elif container == 'List': #lots of the same value return convert_list_values(structure,dimension) elif container == 'Multiple': #no idea return None elif container in ('Array','Matrix'): #numpy array return convert_matrix_values(structure) return lambda a:a #unable to convert <>= def convert_single_value(type_spec): """Convert a single item according to type_spec""" if type_spec == 'Real': return float_with_esd if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'): return int if type_spec == 'Complex': return complex if type_spec == 'Imag': return lambda a:complex(0,a) if type_spec in ('Code','Name','Tag'): #case-insensitive -> lowercase return lambda a:a.lower() return lambda a:a #can't do anything numeric @ Convert a whole DDLm list. A 'List' type implies a repetition of the types given in the 'type.contents' entry. We get all fancy and build a function to decode each entry in our input list. This function is then mapped over the List, and in the case of looped List values, it can be mapped over the dataname value as well. However, in the case of a single repetition, files are allowed to drop one level of enclosing brackets. We account for that here by detecting a one-element list and *not* mapping the conversion function. TODO: Note that we do not yet handle the case that we are supposed to convert to a Matrix, rather than a list. TODO: handle arbitrary dimension lists, rather than special-casing the character sequence '[1]'. <>= class convert_simple_list(object): """\ Callable object that converts values in a simple list according to the specified element structure. """ def __init__(self, structure): self.converters = [convert_single_value(tp) for tp in structure] return def __call__(self, element): if len(element) != len(self.converters): emsg = "Expected iterable of %i values, got %i." % ( (len(self.converters), len(element))) raise ValueError(emsg) rv = [f(e) for f, e in zip(self.converters, element)] return rv # End of class convert_single_value def convert_list_values(structure, dimension): """Convert the values according to the element structure given in [[structure]]""" # simple repetition if isinstance(structure, (unicode, str)): fcnv = convert_single_value(structure) # assume structure is a list of types else: fcnv = convert_simple_list(structure) rv = fcnv # setup nested conversion function when dimension differs from 1. if len(dimension) > 0 and int(dimension[0]) != 1: rv = lambda args : [fcnv(a) for a in args] return rv @ When storing a matrix/array value as a result of a calculation, we remove the numpy information and instead store as a StarList. The following routine will work transparently for either string or number-valued Star Lists, so we do not have to worry. <>= def convert_matrix_values(valtype): """Convert a dREL String or Float valued List structure to a numpy matrix structure""" # first convert to numpy array, then let numpy do the work try: import numpy except ImportError: return lambda a:a #cannot do it if valtype == 'Real': dtype = float elif valtype == 'Integer': dtype = int elif valtype == 'Complex': dtype = complex else: raise ValueError('Unknown matrix value type') fcnv = lambda a : numpy.asarray(a, dtype=dtype) return fcnv @ DDLm specifies List element composition using a notation of form 'cont(el,el,el...)' where 'cont' refers to a container constructor (list or matrix so far) and 'el' is a simple element type. If 'cont' is missing, the sequence of elements is a sequence of elements in a simple list. We have written a simple parser to interpret this. <>= def interpret_structure(struc_spec): """Interpret a DDLm structure specification""" from . import TypeContentsParser as t p = t.TypeParser(t.TypeParserScanner(struc_spec)) return getattr(p,"input")() <>= # A utility function to append to item values rather than replace them def update_value(base_dict,new_items): for new_key in new_items.keys(): if new_key in base_dict: base_dict[new_key].extend(new_items[new_key]) else: base_dict[new_key] = new_items[new_key] <>= #Transpose the list of lists passed to us def transpose(base_list): new_lofl = [] full_length = len(base_list) opt_range = range(full_length) for i in range(len(base_list[0])): new_packet = [] for j in opt_range: new_packet.append(base_list[j][i]) new_lofl.append(new_packet) return new_lofl # listify strings - used surprisingly often def listify(item): if isinstance(item,(unicode,str)): return [item] else: return item # given a list of search items, return a list of items # actually contained in the given data block def filter_present(namelist,datablocknames): return [a for a in namelist if a in datablocknames] # Make an item immutable, used if we want a list to be a key def make_immutable(values): """Turn list of StarList values into a list of immutable items""" if not isinstance(values[0],StarList): return values else: return [tuple(a) for a in values] @ Decorators. The following decorator keeps track of calls in order to detect recursion. We raise a special recursion error to allow the [[derive_item]] method to act accordingly. We also propagate the first-seen value of 'allow_defaults' recursively, so that the original call can control whether or not to use default values. Typically methods can be tried without, and then with, default values, to ensure that all possibilities for deriving the function are attempted first. <>= def track_recursion(in_this_func): """Keep an eye on a function call to make sure that the key argument hasn't been seen before""" def wrapper(*args,**kwargs): key_arg = args[1] if key_arg in wrapper.called_list: print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg))) raise CifRecursionError( key_arg,wrapper.called_list[:]) #failure if len(wrapper.called_list) == 0: #first time wrapper.stored_use_defaults = kwargs.get("allow_defaults",False) print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults)) else: kwargs["allow_defaults"] = wrapper.stored_use_defaults wrapper.called_list.append(key_arg) print('Recursion watch: call stack: ' + repr(wrapper.called_list)) try: result = in_this_func(*args,**kwargs) except StarFile.StarDerivationError as s: if len(wrapper.called_list) == 1: #no more raise StarFile.StarDerivationFailure(wrapper.called_list[0]) else: raise finally: wrapper.called_list.pop() if len(wrapper.called_list) == 0: wrapper.stored_used_defaults = 'error' return result wrapper.called_list = [] return wrapper @ This uses the [[CifFile]] merge method to merge a list of filenames, with an initial check to determine DDL1/DDL2 merge style. In one case we merge save frames in a single block, in another case we merge data blocks. These are different levels. Note that the data block name is passed to specify the parts of each object to be merged, rather than the objects themselves (not doing this was a bug that was caught a while ago). <>= # merge ddl dictionaries. We should be passed filenames or CifFile # objects def merge_dic(diclist,mergemode="replace",ddlspec=None): dic_as_cif_list = [] for dic in diclist: if not isinstance(dic,CifFile) and \ not isinstance(dic,(unicode,str)): raise TypeError("Require list of CifFile names/objects for dictionary merging") if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic)) else: dic_as_cif_list.append(dic) # we now merge left to right basedic = dic_as_cif_list[0] if "on_this_dictionary" in basedic: #DDL1 style only for dic in dic_as_cif_list[1:]: basedic.merge(dic,mode=mergemode,match_att=["_name"]) elif len(basedic.keys()) == 1: #One block: DDL2/m style old_block = basedic[basedic.keys()[0]] for dic in dic_as_cif_list[1:]: new_block = dic[dic.keys()[0]] basedic.merge(dic,mode=mergemode, single_block=[basedic.keys()[0],dic.keys()[0]], match_att=["_item.name"],match_function=find_parent) return CifDic(basedic) @ Find the main item from a parent-child list. We are asked to find the topmost parent in a ddl2 definition block containing multiple item.names. We use the insight that the parent item will be that item which is not in the list of children as well. If there are no item names, that means that we are dealing with something like a category -can they be merged?? <>= def find_parent(ddl2_def): if "_item.name" not in ddl2_def: return None if isinstance(ddl2_def["_item.name"],unicode): return ddl2_def["_item.name"] if "_item_linked.child_name" not in ddl2_def: raise CifError("Asked to find parent in block with no child_names") if "_item_linked.parent_name" not in ddl2_def: raise CifError("Asked to find parent in block with no parent_names") result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]]) if len(result)>1 or len(result)==0: raise CifError("Unable to find single unique parent data item") return result[0] @ \section{Cif Loop block class} With the removal (by PyCIFRW) of nested loops, this class is now unnecessary. It is now simply a pointer to StarFile.LoopBlock. <>= class CifLoopBlock(StarFile.LoopBlock): def __init__(self,data=(),**kwargs): super(CifLoopBlock,self).__init__(data,**kwargs) <>= #No documentation flags pycifrw-4.4/src/CifFile_module.py000066400000000000000000004516261345362224200170740ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import try: from cStringIO import StringIO except ImportError: from io import StringIO # Python 2,3 compatibility try: from urllib import urlopen # for arbitrary opening from urlparse import urlparse, urljoin except: from urllib.request import urlopen from urllib.parse import urlparse, urljoin # The unicode type does not exist in Python3 as the str type # encompasses unicode. PyCIFRW tests for 'unicode' would fail # Suggestions for a better approach welcome. if isinstance(u"abc",str): #Python3 unicode = str __copyright = """ PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. """ import re,sys from . import StarFile from .StarFile import StarList #put in global scope for exec statement try: import numpy #put in global scope for exec statement from .drel import drel_runtime #put in global scope for exec statement except ImportError: pass #will fail when using dictionaries for calcs from copy import copy #must be in global scope for exec statement def track_recursion(in_this_func): """Keep an eye on a function call to make sure that the key argument hasn't been seen before""" def wrapper(*args,**kwargs): key_arg = args[1] if key_arg in wrapper.called_list: print('Recursion watch: %s already called %d times' % (key_arg,wrapper.called_list.count(key_arg))) raise CifRecursionError( key_arg,wrapper.called_list[:]) #failure if len(wrapper.called_list) == 0: #first time wrapper.stored_use_defaults = kwargs.get("allow_defaults",False) print('All recursive calls will set allow_defaults to ' + repr(wrapper.stored_use_defaults)) else: kwargs["allow_defaults"] = wrapper.stored_use_defaults wrapper.called_list.append(key_arg) print('Recursion watch: call stack: ' + repr(wrapper.called_list)) try: result = in_this_func(*args,**kwargs) except StarFile.StarDerivationError as s: if len(wrapper.called_list) == 1: #no more raise StarFile.StarDerivationFailure(wrapper.called_list[0]) else: raise finally: wrapper.called_list.pop() if len(wrapper.called_list) == 0: wrapper.stored_used_defaults = 'error' return result wrapper.called_list = [] return wrapper class CifBlock(StarFile.StarBlock): """ A class to hold a single block of a CIF file. A `CifBlock` object can be treated as a Python dictionary, in particular, individual items can be accessed using square brackets e.g. `b['_a_dataname']`. All other Python dictionary methods are also available (e.g. `keys()`, `values()`). Looped datanames will return a list of values. ## Initialisation When provided, `data` should be another `CifBlock` whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ def __init__(self,data = (), strict = 1, compat_mode=False, **kwargs): """When provided, `data` should be another CifBlock whose contents will be copied to this block. * if `strict` is set, maximum name lengths will be enforced * `maxoutlength` is the maximum length for output lines * `wraplength` is the ideal length to make output lines * When set, `overwrite` allows the values of datanames to be changed (otherwise an error is raised). * `compat_mode` will allow deprecated behaviour of creating single-dataname loops using the syntax `a[_dataname] = [1,2,3,4]`. This should now be done by calling `CreateLoop` after setting the dataitem value. """ if strict: maxnamelength=75 else: maxnamelength=-1 super(CifBlock,self).__init__(data=data,maxnamelength=maxnamelength,**kwargs) self.dictionary = None #DDL dictionary referring to this block self.compat_mode = compat_mode #old-style behaviour of setitem def RemoveCifItem(self,itemname): """Remove `itemname` from the CifBlock""" self.RemoveItem(itemname) def __setitem__(self,key,value): self.AddItem(key,value) # for backwards compatibility make a single-element loop if self.compat_mode: if isinstance(value,(tuple,list)) and not isinstance(value,StarFile.StarList): # single element loop self.CreateLoop([key]) def copy(self): newblock = super(CifBlock,self).copy() return self.copy.im_class(newblock) #catch inheritance def AddCifItem(self,data): """ *DEPRECATED*. Use `AddItem` instead.""" # we accept only tuples, strings and lists!! if not (isinstance(data[0],(unicode,tuple,list,str))): raise TypeError('Cif datanames are either a string, tuple or list') # we catch single item loops as well... if isinstance(data[0],(unicode,str)): self.AddSingleCifItem(data[0],list(data[1])) if isinstance(data[1],(tuple,list)) and not isinstance(data[1],StarFile.StarList): # a single element loop self.CreateLoop([data[0]]) return # otherwise, we loop over the datanames keyvals = zip(data[0][0],[list(a) for a in data[1][0]]) [self.AddSingleCifItem(a,b) for a,b in keyvals] # and create the loop self.CreateLoop(data[0][0]) def AddSingleCifItem(self,key,value): """*Deprecated*. Use `AddItem` instead""" """Add a single data item. If it is part of a loop, a separate call should be made""" self.AddItem(key,value) def loopnames(self): return [self.loops[a] for a in self.loops] class CifFile(StarFile.StarFile): def __init__(self,datasource=None,strict=1,standard='CIF',**kwargs): super(CifFile,self).__init__(datasource=datasource,standard=standard, **kwargs) self.strict = strict self.header_comment = \ """ ########################################################################## # Crystallographic Information Format file # Produced by PyCifRW module # # This is a CIF file. CIF has been adopted by the International # Union of Crystallography as the standard for data archiving and # transmission. # # For information on this file format, follow the CIF links at # http://www.iucr.org ########################################################################## """ class CifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Format error: '+ self.value class ValidCifError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nCif Validity error: ' + self.value class CifRecursionError(Exception): def __init__(self,key_value,call_stack): self.key_value = key_value self.call_stack = call_stack def __str__(self): return "Derivation has recursed, %s seen twice (call stack %s)" % (self.key_value,repr(self.call_stack)) class DicBlock(StarFile.StarBlock): """A definition block within a dictionary, which allows imports to be transparently followed""" def __init__(self,*args,**kwargs): super(DicBlock,self).__init__(*args,**kwargs) self._import_cache = {} def __getitem__(self,dataname): value = None if super(DicBlock,self).has_key("_import.get") and self._import_cache: value = self.follow_import(super(DicBlock,self).__getitem__("_import.get"),dataname) try: final_value = super(DicBlock,self).__getitem__(dataname) except KeyError: #not there final_value = value if final_value is None: raise KeyError("%s not found" % dataname) return final_value def has_key(self,key): try: self[key] except KeyError: return False return True def add_dict_cache(self,name,cached): """Add a loaded dictionary to this block's cache""" self._import_cache[name]=cached def follow_import(self,import_info,dataname): """Find the dataname values from the imported dictionary. `import_info` is a list of import locations""" latest_value = None for import_ref in import_info: file_loc = import_ref["file"] if file_loc not in self._import_cache: raise ValueError("Dictionary for import %s not found" % file_loc) import_from = self._import_cache[file_loc] miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,file_loc)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if mode == "contents": #only this is used at this level latest_value = import_target.get(dataname,latest_value) return latest_value class CifDic(StarFile.StarFile): """Create a Cif Dictionary object from the provided source, which can be a filename/URL or a CifFile. Optional arguments (relevant to DDLm only): * do_minimum (Boolean): Do not set up the dREL system for auto-calculation or perform imports. This implies do_imports=False and do_dREL=False * do_imports = No/Full/Contents/All: If not 'No', intepret _import.get statements for Full mode/Contents mode/Both respectively. See also option 'heavy' * do_dREL = True/False: Parse and convert all dREL methods to Python. Implies do_imports=All * heavy = True/False: (Experimental). If True, importation overwrites definitions. If False, attributes are resolved dynamically. """ def __init__(self,dic,do_minimum=False,do_imports='All', do_dREL=True, grammar='auto',heavy=True,**kwargs): self.do_minimum = do_minimum if do_minimum: do_imports = 'No' do_dREL = False if do_dREL: do_imports = 'All' if heavy == 'Light' and do_imports not in ('contents','No'): raise(ValueError,"Light imports only available for mode 'contents'") self.template_cache = {} #for DDLm imports self.ddlm_functions = {} #for DDLm functions self.switch_numpy(False) #no Numpy arrays returned super(CifDic,self).__init__(datasource=dic,grammar=grammar,blocktype=DicBlock,**kwargs) self.standard = 'Dic' #for correct output order self.scoping = 'dictionary' (self.dicname,self.diclang) = self.dic_determine() print('%s is a %s dictionary' % (self.dicname,self.diclang)) self.scopes_mandatory = {} self.scopes_naughty = {} # rename and expand out definitions using "_name" in DDL dictionaries if self.diclang == "DDL1": self.DDL1_normalise() #this removes any non-definition entries self.create_def_block_table() #From now on, [] uses definition_id if self.diclang == "DDL1": self.ddl1_cat_load() elif self.diclang == "DDL2": self.DDL2_normalise() #iron out some DDL2 tricky bits elif self.diclang == "DDLm": self.scoping = 'dictionary' #expose all save frames if do_imports is not 'No': self.obtain_imports(import_mode=do_imports,heavy=heavy)#recursively calls this routine self.create_alias_table() self.create_cat_obj_table() self.create_cat_key_table() if do_dREL: print('Doing full dictionary initialisation') self.initialise_drel() self.add_category_info(full=do_dREL) # initialise type information self.typedic={} self.primdic = {} #typecode<->primitive type translation self.add_type_info() self.install_validation_functions() def dic_determine(self): if "on_this_dictionary" in self: self.master_block = super(CifDic,self).__getitem__("on_this_dictionary") self.def_id_spec = "_name" self.cat_id_spec = "_category.id" #we add this ourselves self.type_spec = "_type" self.enum_spec = "_enumeration" self.cat_spec = "_category" self.esd_spec = "_type_conditions" self.must_loop_spec = "_list" self.must_exist_spec = "_list_mandatory" self.list_ref_spec = "_list_reference" self.key_spec = "_list_mandatory" self.unique_spec = "_list_uniqueness" self.child_spec = "_list_link_child" self.parent_spec = "_list_link_parent" self.related_func = "_related_function" self.related_item = "_related_item" self.primitive_type = "_type" self.dep_spec = "xxx" self.cat_list = [] #to save searching all the time name = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_name"] version = super(CifDic,self).__getitem__("on_this_dictionary")["_dictionary_version"] return (name+version,"DDL1") elif len(self.get_roots()) == 1: # DDL2/DDLm self.master_block = super(CifDic,self).__getitem__(self.get_roots()[0][0]) # now change to dictionary scoping self.scoping = 'dictionary' name = self.master_block["_dictionary.title"] version = self.master_block["_dictionary.version"] if self.master_block.has_key("_dictionary.class"): #DDLm self.enum_spec = '_enumeration_set.state' self.key_spec = '_category.key_id' self.must_exist_spec = None self.cat_spec = '_name.category_id' self.primitive_type = '_type.contents' self.cat_id_spec = "_definition.id" self.def_id_spec = "_definition.id" return(name+version,"DDLm") else: #DDL2 self.cat_id_spec = "_category.id" self.def_id_spec = "_item.name" self.key_spec = "_category_mandatory.name" self.type_spec = "_item_type.code" self.enum_spec = "_item_enumeration.value" self.esd_spec = "_item_type_conditions.code" self.cat_spec = "_item.category_id" self.loop_spec = "there_is_no_loop_spec!" self.must_loop_spec = "xxx" self.must_exist_spec = "_item.mandatory_code" self.child_spec = "_item_linked.child_name" self.parent_spec = "_item_linked.parent_name" self.related_func = "_item_related.function_code" self.related_item = "_item_related.related_name" self.unique_spec = "_category_key.name" self.list_ref_spec = "xxx" self.primitive_type = "_type" self.dep_spec = "_item_dependent.dependent_name" return (name+version,"DDL2") else: raise CifError("Unable to determine dictionary DDL version") def DDL1_normalise(self): # switch off block name collision checks self.standard = None # add default type information in DDL2 style # initial types and constructs base_types = ["char","numb","null"] prim_types = base_types[:] base_constructs = [".*", '(-?(([0-9]*[.][0-9]+)|([0-9]+)[.]?)([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|\?|\.', "\"\" "] for key,value in self.items(): newnames = [key] #keep by default if "_name" in value: real_name = value["_name"] if isinstance(real_name,list): #looped values for looped_name in real_name: new_value = value.copy() new_value["_name"] = looped_name #only looped name self[looped_name] = new_value newnames = real_name else: self[real_name] = value newnames = [real_name] # delete the old one if key not in newnames: del self[key] # loop again to normalise the contents of each definition for key,value in self.items(): #unlock the block save_overwrite = value.overwrite value.overwrite = True # deal with a missing _list, _type_conditions if "_list" not in value: value["_list"] = 'no' if "_type_conditions" not in value: value["_type_conditions"] = 'none' # deal with enumeration ranges if "_enumeration_range" in value: max,min = self.getmaxmin(value["_enumeration_range"]) if min == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max),(max,min)))) elif max == ".": self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,min),(min,min)))) else: self[key].AddLoopItem((("_item_range.maximum","_item_range.minimum"),((max,max,min),(max,min,min)))) #add any type construct information if "_type_construct" in value: base_types.append(value["_name"]+"_type") #ie dataname_type base_constructs.append(value["_type_construct"]+"$") prim_types.append(value["_type"]) #keep a record value["_type"] = base_types[-1] #the new type name #make categories conform with ddl2 #note that we must remove everything from the last underscore if value.get("_category",None) == "category_overview": last_under = value["_name"].rindex("_") catid = value["_name"][1:last_under] value["_category.id"] = catid #remove square bracks if catid not in self.cat_list: self.cat_list.append(catid) value.overwrite = save_overwrite # we now add any missing categories before filling in the rest of the # information for key,value in self.items(): #print('processing ddl1 definition %s' % key) if "_category" in self[key]: if self[key]["_category"] not in self.cat_list: # rogue category, add it in newcat = self[key]["_category"] fake_name = "_" + newcat + "_[]" newcatdata = CifBlock() newcatdata["_category"] = "category_overview" newcatdata["_category.id"] = newcat newcatdata["_type"] = "null" self[fake_name] = newcatdata self.cat_list.append(newcat) # write out the type information in DDL2 style self.master_block.AddLoopItem(( ("_item_type_list.code","_item_type_list.construct", "_item_type_list.primitive_code"), (base_types,base_constructs,prim_types) )) def ddl1_cat_load(self): deflist = self.keys() #slight optimization cat_mand_dic = {} cat_unique_dic = {} # a function to extract any necessary information from each definition def get_cat_info(single_def): if self[single_def].get(self.must_exist_spec)=='yes': thiscat = self[single_def]["_category"] curval = cat_mand_dic.get(thiscat,[]) curval.append(single_def) cat_mand_dic[thiscat] = curval # now the unique items... # cif_core.dic throws us a curly one: the value of list_uniqueness is # not the same as the defined item for publ_body_label, so we have # to collect both together. We assume a non-listed entry, which # is true for all current (May 2005) ddl1 dictionaries. if self[single_def].get(self.unique_spec,None)!=None: thiscat = self[single_def]["_category"] new_unique = self[single_def][self.unique_spec] uis = cat_unique_dic.get(thiscat,[]) if single_def not in uis: uis.append(single_def) if new_unique not in uis: uis.append(new_unique) cat_unique_dic[thiscat] = uis [get_cat_info(a) for a in deflist] # apply the above function for cat in cat_mand_dic.keys(): self[cat]["_category_mandatory.name"] = cat_mand_dic[cat] for cat in cat_unique_dic.keys(): self[cat]["_category_key.name"] = cat_unique_dic[cat] def create_pcloop(self,definition): old_children = self[definition].get('_item_linked.child_name',[]) old_parents = self[definition].get('_item_linked.parent_name',[]) if isinstance(old_children,unicode): old_children = [old_children] if isinstance(old_parents,unicode): old_parents = [old_parents] if (len(old_children)==0 and len(old_parents)==0) or \ (len(old_children) > 1 and len(old_parents)>1): return if len(old_children)==0: old_children = [definition]*len(old_parents) if len(old_parents)==0: old_parents = [definition]*len(old_children) newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) try: del self[definition]['_item_linked.parent_name'] del self[definition]['_item_linked.child_name'] except KeyError: pass self[definition].insert_loop(newloop) def DDL2_normalise(self): listed_defs = filter(lambda a:isinstance(self[a].get('_item.name'),list),self.keys()) # now filter out all the single element lists! dodgy_defs = filter(lambda a:len(self[a]['_item.name']) > 1, listed_defs) for item_def in dodgy_defs: # print("DDL2 norm: processing %s" % item_def) thisdef = self[item_def] packet_no = thisdef['_item.name'].index(item_def) realcat = thisdef['_item.category_id'][packet_no] realmand = thisdef['_item.mandatory_code'][packet_no] # first add in all the missing categories # we don't replace the entry in the list corresponding to the # current item, as that would wipe out the information we want for child_no in range(len(thisdef['_item.name'])): if child_no == packet_no: continue child_name = thisdef['_item.name'][child_no] child_cat = thisdef['_item.category_id'][child_no] child_mand = thisdef['_item.mandatory_code'][child_no] if child_name not in self: self[child_name] = CifBlock() self[child_name]['_item.name'] = child_name self[child_name]['_item.category_id'] = child_cat self[child_name]['_item.mandatory_code'] = child_mand self[item_def]['_item.name'] = item_def self[item_def]['_item.category_id'] = realcat self[item_def]['_item.mandatory_code'] = realmand target_defs = [a for a in self.keys() if '_item_linked.child_name' in self[a] or \ '_item_linked.parent_name' in self[a]] # now dodgy_defs contains all definition blocks with more than one child/parent link for item_def in dodgy_defs: self.create_pcloop(item_def) #regularise appearance for item_def in dodgy_defs: print('Processing %s' % item_def) thisdef = self[item_def] child_list = thisdef['_item_linked.child_name'] parents = thisdef['_item_linked.parent_name'] # for each parent, find the list of children. family = list(zip(parents,child_list)) notmychildren = family #We aim to remove non-children # Loop over the parents, relocating as necessary while len(notmychildren): # get all children of first entry mychildren = [a for a in family if a[0]==notmychildren[0][0]] print("Parent %s: %d children" % (notmychildren[0][0],len(mychildren))) for parent,child in mychildren: #parent is the same for all # Make sure that we simply add in the new entry for the child, not replace it, # otherwise we might spoil the child entry loop structure try: childloop = self[child].GetLoop('_item_linked.parent_name') except KeyError: print('Creating new parent entry %s for definition %s' % (parent,child)) self[child]['_item_linked.parent_name'] = [parent] childloop = self[child].GetLoop('_item_linked.parent_name') childloop.AddLoopItem(('_item_linked.child_name',[child])) continue else: # A parent loop already exists and so will a child loop due to the # call to create_pcloop above pars = [a for a in childloop if getattr(a,'_item_linked.child_name','')==child] goodpars = [a for a in pars if getattr(a,'_item_linked.parent_name','')==parent] if len(goodpars)>0: #no need to add it print('Skipping duplicated parent - child entry in %s: %s - %s' % (child,parent,child)) continue print('Adding %s to %s entry' % (parent,child)) newpacket = childloop.GetPacket(0) #essentially a copy, I hope setattr(newpacket,'_item_linked.child_name',child) setattr(newpacket,'_item_linked.parent_name',parent) childloop.AddPacket(newpacket) # # Make sure the parent also points to the children. We get # the current entry, then add our # new values if they are not there already # parent_name = mychildren[0][0] old_children = self[parent_name].get('_item_linked.child_name',[]) old_parents = self[parent_name].get('_item_linked.parent_name',[]) oldfamily = zip(old_parents,old_children) newfamily = [] print('Old parents -> %s' % repr(old_parents)) for jj, childname in mychildren: alreadythere = [a for a in oldfamily if a[0]==parent_name and a[1] ==childname] if len(alreadythere)>0: continue 'Adding new child %s to parent definition at %s' % (childname,parent_name) old_children.append(childname) old_parents.append(parent_name) # Now output the loop, blowing away previous definitions. If there is something # else in this category, we are destroying it. newloop = CifLoopBlock(dimension=1) newloop.AddLoopItem(('_item_linked.parent_name',old_parents)) newloop.AddLoopItem(('_item_linked.child_name',old_children)) del self[parent_name]['_item_linked.parent_name'] del self[parent_name]['_item_linked.child_name'] self[parent_name].insert_loop(newloop) print('New parents -> %s' % repr(self[parent_name]['_item_linked.parent_name'])) # now make a new,smaller list notmychildren = [a for a in notmychildren if a[0]!=mychildren[0][0]] # now flatten any single element lists single_defs = filter(lambda a:len(self[a]['_item.name'])==1,listed_defs) for flat_def in single_defs: flat_keys = self[flat_def].GetLoop('_item.name').keys() for flat_key in flat_keys: self[flat_def][flat_key] = self[flat_def][flat_key][0] # now deal with the multiple lists # next we do aliases all_aliases = [a for a in self.keys() if self[a].has_key('_item_aliases.alias_name')] for aliased in all_aliases: my_aliases = listify(self[aliased]['_item_aliases.alias_name']) for alias in my_aliases: self[alias] = self[aliased].copy() #we are going to delete stuff... del self[alias]["_item_aliases.alias_name"] def ddlm_parse_valid(self): if "_dictionary_valid.application" not in self.master_block: return for scope_pack in self.master_block.GetLoop("_dictionary_valid.application"): scope = getattr(scope_pack,"_dictionary_valid.application") valid_info = getattr(scope_pack,"_dictionary_valid.attributes") if scope[1] == "Mandatory": self.scopes_mandatory[scope[0]] = self.expand_category_opt(valid_info) elif scope[1] == "Prohibited": self.scopes_naughty[scope[0]] = self.expand_category_opt(valid_info) def obtain_imports(self,import_mode,heavy=False): """Collate import information""" self._import_dics = [] import_frames = list([(a,self[a]['_import.get']) for a in self.keys() if '_import.get' in self[a]]) print('Import mode %s applied to following frames' % import_mode) print(str([a[0] for a in import_frames])) if import_mode != 'All': for i in range(len(import_frames)): import_frames[i] = (import_frames[i][0],[a for a in import_frames[i][1] if a.get('mode','Contents').lower() == import_mode.lower()]) print('Importing following frames in mode %s' % import_mode) print(str(import_frames)) #resolve all references for parent_block,import_list in import_frames: for import_ref in import_list: file_loc = import_ref["file"] full_uri = self.resolve_path(file_loc) if full_uri not in self.template_cache: dic_as_cif = CifFile(full_uri,grammar=self.grammar) self.template_cache[full_uri] = CifDic(dic_as_cif,do_imports=import_mode,heavy=heavy,do_dREL=False) #this will recurse internal imports print('Added %s to cached dictionaries' % full_uri) import_from = self.template_cache[full_uri] dupl = import_ref.get('dupl','Exit') miss = import_ref.get('miss','Exit') target_key = import_ref["save"] try: import_target = import_from[target_key] except KeyError: if miss == 'Exit': raise CifError('Import frame %s not found in %s' % (target_key,full_uri)) else: continue # now import appropriately mode = import_ref.get("mode",'Contents').lower() if target_key in self and mode=='full': #so blockname will be duplicated if dupl == 'Exit': raise CifError('Import frame %s already in dictionary' % target_key) elif dupl == 'Ignore': continue if heavy: self.ddlm_import(parent_block,import_from,import_target,target_key,mode) else: self.ddlm_import_light(parent_block,import_from,import_target,target_key,file_loc,mode) def ddlm_import(self,parent_block,import_from,import_target,target_key,mode='All'): """Import other dictionaries in place""" if mode == 'contents': #merge attributes only self[parent_block].merge(import_target) elif mode =="full": # Do the syntactic merge syntactic_head = self[self.get_parent(parent_block)] #root frame if no nesting from_cat_head = import_target['_name.object_id'] child_frames = import_from.ddlm_all_children(from_cat_head) # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False child_frames.remove(from_cat_head) # As we are in syntax land, we call the CifFile methods child_blocks = list([import_from.block_id_table[a.lower()] for a in child_frames]) child_blocks = super(CifDic,import_from).makebc(child_blocks) # Prune out any datablocks that have identical definitions from_defs = dict([(a,child_blocks[a].get('_definition.id','').lower()) for a in child_blocks.keys()]) double_defs = list([b for b in from_defs.items() if self.has_key(b[1])]) print('Definitions for %s superseded' % repr(double_defs)) for b in double_defs: del child_blocks[b[0]] super(CifDic,self).merge_fast(child_blocks,parent=syntactic_head) # print('Syntactic merge of %s (%d defs) in %s mode, now have %d defs' % (target_key,len(child_frames), mode,len(self))) # Now the semantic merge # First expand our definition <-> blockname tree self.create_def_block_table() merging_cat = self[parent_block]['_name.object_id'] #new parent if head_to_head: child_frames = self.ddlm_immediate_children(from_cat_head) #old children #the new parent is the importing category for all old children for f in child_frames: self[f].overwrite = True self[f]['_name.category_id'] = merging_cat self[f].overwrite = False # remove the old head del self[from_cat_head] print('Semantic merge: %d defs reparented from %s to %s' % (len(child_frames),from_cat_head,merging_cat)) else: #imported category is only child from_frame = import_from[target_key]['_definition.id'] #so we can find it child_frame = [d for d in self.keys() if self[d]['_definition.id']==from_frame][0] self[child_frame]['_name.category_id'] = merging_cat print('Semantic merge: category for %s : now %s' % (from_frame,merging_cat)) # it will never happen again... del self[parent_block]["_import.get"] def resolve_path(self,file_loc): url_comps = urlparse(file_loc) if url_comps[0]: return file_loc #already full URI new_url = urljoin(self.my_uri,file_loc) #print("Transformed %s to %s for import " % (file_loc,new_url)) return new_url def ddlm_import_light(self,parent_block,import_from,import_target,target_key,file_loc,mode='All'): """Register the imported dictionaries but do not alter any definitions. `parent_block` contains the id of the block that is importing. `import_target` is the block that should be imported. `import_from` is the CifFile that contains the definitions.""" if mode == 'contents': #merge attributes only self[parent_block].add_dict_cache(file_loc,import_from) elif mode =="full": # Check for Head merging Head if self[parent_block].get('_definition.class','Datum')=='Head' and \ import_target.get('_definition.class','Datum')=='Head': head_to_head = True else: head_to_head = False # Figure out the actual definition ID head_id = import_target["_definition.id"] # Adjust parent information merging_cat = self[parent_block]['_name.object_id'] from_cat_head = import_target['_name.object_id'] if not head_to_head: # imported category is only child import_target["_name.category_id"]=merging_cat self._import_dics = [(import_from,head_id)]+self._import_dics #prepend def lookup_imports(self,key): """Check the list of imported dictionaries for this definition""" for one_dic,head_def in self._import_dics: from_cat_head = one_dic[head_def]['_name.object_id'] possible_keys = one_dic.ddlm_all_children(from_cat_head) if key in possible_keys: return one_dic[key] raise KeyError("%s not found in import dictionaries" % key) def create_def_block_table(self): """ Create an internal table matching definition to block id """ proto_table = [(super(CifDic,self).__getitem__(a),a) for a in super(CifDic,self).keys()] # now get the actual ids instead of blocks proto_table = list([(a[0].get(self.cat_id_spec,a[0].get(self.def_id_spec,a[1])),a[1]) for a in proto_table]) # remove non-definitions if self.diclang != "DDL1": top_blocks = list([a[0].lower() for a in self.get_roots()]) else: top_blocks = ["on_this_dictionary"] # catch dodgy duplicates uniques = set([a[0] for a in proto_table]) if len(uniques)1] raise CifError('Duplicate definitions in dictionary:' + repr(dodgy)) self.block_id_table = dict([(a[0].lower(),a[1].lower()) for a in proto_table if a[1].lower() not in top_blocks]) def __getitem__(self,key): """Access a datablock by definition id, after the lookup has been created""" try: return super(CifDic,self).__getitem__(self.block_id_table[key.lower()]) except AttributeError: #block_id_table not present yet return super(CifDic,self).__getitem__(key) except KeyError: # key is missing try: # print(Definition for %s not found, reverting to CifFile' % key) return super(CifDic,self).__getitem__(key) except KeyError: # try imports return self.lookup_imports(key) def __setitem__(self,key,value): """Add a new definition block""" super(CifDic,self).__setitem__(key,value) try: self.block_id_table[value['_definition.id']]=key except AttributeError: #does not exist yet pass def NewBlock(self,*args,**kwargs): newname = super(CifDic,self).NewBlock(*args,**kwargs) try: self.block_id_table[self[newname]['_definition.id']]=newname except AttributeError: #no block_id table pass def __delitem__(self,key): """Remove a definition""" try: super(CifDic,self).__delitem__(self.block_id_table[key.lower()]) del self.block_id_table[key.lower()] except (AttributeError,KeyError): #block_id_table not present yet super(CifDic,self).__delitem__(key) return # fix other datastructures # cat_obj table def keys(self): """Return all definitions""" try: return self.block_id_table.keys() except AttributeError: return super(CifDic,self).keys() def has_key(self,key): return key in self def __contains__(self,key): try: return key.lower() in self.block_id_table except AttributeError: return super(CifDic,self).__contains__(key) def items(self): """Return (key,value) pairs""" return list([(a,self[a]) for a in self.keys()]) def unlock(self): """Allow overwriting of all definitions in this collection""" for a in self.keys(): self[a].overwrite=True def lock(self): """Disallow changes in definitions""" for a in self.keys(): self[a].overwrite=False def rename(self,oldname,newname,blockname_as_well=True): """Change a _definition.id from oldname to newname, and if `blockname_as_well` is True, change the underlying blockname too.""" if blockname_as_well: super(CifDic,self).rename(self.block_id_table[oldname.lower()],newname) self.block_id_table[newname.lower()]=newname if oldname.lower() in self.block_id_table: #not removed del self.block_id_table[oldname.lower()] else: self.block_id_table[newname.lower()]=self.block_id_table[oldname.lower()] del self.block_id_table[oldname.lower()] return def get_root_category(self): """Get the single 'Head' category of this dictionary""" root_cats = [r for r in self.keys() if self[r].get('_definition.class','Datum')=='Head'] if len(root_cats)>1 or len(root_cats)==0: raise CifError("Cannot determine a unique Head category, got" % repr(root_cats)) return root_cats[0] def ddlm_immediate_children(self,catname): """Return a list of datanames for the immediate children of catname. These are semantic children (i.e. based on _name.category_id), not structural children as in the case of StarFile.get_immediate_children""" straight_children = [a for a in self.keys() if self[a].get('_name.category_id','').lower() == catname.lower()] return list(straight_children) def ddlm_all_children(self,catname): """Return a list of all children, including the `catname`""" all_children = self.ddlm_immediate_children(catname) cat_children = [a for a in all_children if self[a].get('_definition.scope','Item') == 'Category'] for c in cat_children: all_children.remove(c) all_children += self.ddlm_all_children(c) return all_children + [catname] def is_semantic_child(self,parent,maybe_child): """Return true if `maybe_child` is a child of `parent`""" all_children = self.ddlm_all_children(parent) return maybe_child in all_children def ddlm_danglers(self): """Return a list of definitions that do not have a category defined for them, or are children of an unattached category""" top_block = self.get_root_category() connected = set(self.ddlm_all_children(top_block)) all_keys = set(self.keys()) unconnected = all_keys - connected return list(unconnected) def get_ddlm_parent(self,itemname): """Get the parent category of itemname""" parent = self[itemname].get('_name.category_id','') if parent == '': # use the top block by default raise CifError("%s has no parent" % itemname) return parent def expand_category_opt(self,name_list): """Return a list of all non-category items in a category or return the name if the name is not a category""" new_list = [] for name in name_list: if self.get(name,{}).get('_definition.scope','Item') == 'Category': new_list += self.expand_category_opt([a for a in self.keys() if \ self[a].get('_name.category_id','').lower() == name.lower()]) else: new_list.append(name) return new_list def get_categories(self): """Return a list of category names""" return list([c for c in self.keys() if self[c].get("_definition.scope")=='Category']) def names_in_cat(self,cat,names_only=False): names = [a for a in self.keys() if self[a].get('_name.category_id','').lower()==cat.lower()] if not names_only: return list([a for a in names if self[a].get('_definition.scope','Item')=='Item']) else: return list([self[a]["_name.object_id"] for a in names]) def create_alias_table(self): """Populate an alias table that we can look up when searching for a dataname""" all_aliases = [a for a in self.keys() if '_alias.definition_id' in self[a]] self.alias_table = dict([[a,self[a]['_alias.definition_id']] for a in all_aliases]) def create_cat_obj_table(self): """Populate a table indexed by (cat,obj) and returning the correct dataname""" base_table = dict([((self[a].get('_name.category_id','').lower(),self[a].get('_name.object_id','').lower()),[self[a].get('_definition.id','')]) \ for a in self.keys() if self[a].get('_definition.scope','Item')=='Item']) loopable = self.get_loopable_cats() loopers = [self.ddlm_immediate_children(a) for a in loopable] print('Loopable cats:' + repr(loopable)) loop_children = [[b for b in a if b.lower() in loopable ] for a in loopers] expand_list = dict([(a,b) for a,b in zip(loopable,loop_children) if len(b)>0]) print("Expansion list:" + repr(expand_list)) extra_table = {} #for debugging we keep it separate from base_table until the end def expand_base_table(parent_cat,child_cats): extra_names = [] # first deal with all the child categories for child_cat in child_cats: nn = [] if child_cat in expand_list: # a nested category: grab its names nn = expand_base_table(child_cat,expand_list[child_cat]) # store child names extra_names += nn # add all child names to the table child_names = [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(child_cat) if self[n].get('_type.purpose','') != 'Key'] child_names += extra_names extra_table.update(dict([((parent_cat,obj),[name]) for obj,name in child_names if (parent_cat,name) not in extra_table])) # and the repeated ones get appended instead repeats = [a for a in child_names if a in extra_table] for obj,name in repeats: extra_table[(parent_cat,obj)] += [name] # and finally, add our own names to the return list child_names += [(self[n]['_name.object_id'].lower(),self[n]['_definition.id']) \ for n in self.names_in_cat(parent_cat) if self[n].get('_type.purpose','')!='Key'] return child_names [expand_base_table(parent,child) for parent,child in expand_list.items()] print('Expansion cat/obj values: ' + repr(extra_table)) # append repeated ones non_repeats = dict([a for a in extra_table.items() if a[0] not in base_table]) repeats = [a for a in extra_table.keys() if a in base_table] base_table.update(non_repeats) for k in repeats: base_table[k] += extra_table[k] self.cat_obj_lookup_table = base_table self.loop_expand_list = expand_list def get_loopable_cats(self): """A short utility function which returns a list of looped categories. This is preferred to a fixed attribute as that fixed attribute would need to be updated after any edits""" return [a.lower() for a in self.keys() if self[a].get('_definition.class','')=='Loop'] def create_cat_key_table(self): """Create a utility table with a list of keys applicable to each category. A key is a compound key, that is, it is a list""" self.cat_key_table = dict([(c,[listify(self[c].get("_category_key.name", [self[c].get("_category.key_id")]))]) for c in self.get_loopable_cats()]) def collect_keys(parent_cat,child_cats): kk = [] for child_cat in child_cats: if child_cat in self.loop_expand_list: kk += collect_keys(child_cat) # add these keys to our list kk += [listify(self[child_cat].get('_category_key.name',[self[child_cat].get('_category.key_id')]))] self.cat_key_table[parent_cat] = self.cat_key_table[parent_cat] + kk return kk for k,v in self.loop_expand_list.items(): collect_keys(k,v) print('Keys for categories' + repr(self.cat_key_table)) def add_type_info(self): if "_item_type_list.construct" in self.master_block: types = self.master_block["_item_type_list.code"] prim_types = self.master_block["_item_type_list.primitive_code"] constructs = list([a + "$" for a in self.master_block["_item_type_list.construct"]]) # add in \r wherever we see \n, and change \{ to \\{ def regex_fiddle(mm_regex): brack_match = r"((.*\[.+)(\\{)(.*\].*))" ret_match = r"((.*\[.+)(\\n)(.*\].*))" fixed_regexp = mm_regex[:] #copy # fix the brackets bm = re.match(brack_match,mm_regex) if bm != None: fixed_regexp = bm.expand(r"\2\\\\{\4") # fix missing \r rm = re.match(ret_match,fixed_regexp) if rm != None: fixed_regexp = rm.expand(r"\2\3\\r\4") #print("Regexp %s becomes %s" % (mm_regex,fixed_regexp)) return fixed_regexp constructs = map(regex_fiddle,constructs) for typecode,construct in zip(types,constructs): self.typedic[typecode] = re.compile(construct,re.MULTILINE|re.DOTALL) # now make a primitive <-> type construct mapping for typecode,primtype in zip(types,prim_types): self.primdic[typecode] = primtype def add_category_info(self,full=True): if self.diclang == "DDLm": catblocks = [c for c in self.keys() if self[c].get('_definition.scope')=='Category'] looped_cats = [a for a in catblocks if self[a].get('_definition.class','Set') == 'Loop'] self.parent_lookup = {} for one_cat in looped_cats: parent_cat = one_cat parent_def = self[parent_cat] next_up = parent_def['_name.category_id'].lower() while next_up in self and self[next_up].get('_definition.class','Set') == 'Loop': parent_def = self[next_up] parent_cat = next_up next_up = parent_def['_name.category_id'].lower() self.parent_lookup[one_cat] = parent_cat if full: self.key_equivs = {} for one_cat in looped_cats: #follow them up lower_keys = listify(self[one_cat]['_category_key.name']) start_keys = lower_keys[:] while len(lower_keys)>0: this_cat = self[lower_keys[0]]['_name.category_id'] parent = [a for a in looped_cats if self[this_cat]['_name.category_id'].lower()==a] #print(Processing %s, keys %s, parent %s" % (this_cat,repr(lower_keys),repr(parent))) if len(parent)>1: raise CifError("Category %s has more than one parent: %s" % (one_cat,repr(parent))) if len(parent)==0: break parent = parent[0] parent_keys = listify(self[parent]['_category_key.name']) linked_keys = [self[l]["_name.linked_item_id"] for l in lower_keys] # sanity check if set(parent_keys) != set(linked_keys): raise CifError("Parent keys and linked keys are different! %s/%s" % (parent_keys,linked_keys)) # now add in our information for parent,child in zip(linked_keys,start_keys): self.key_equivs[child] = self.key_equivs.get(child,[])+[parent] lower_keys = linked_keys #preserves order of start keys else: self.parent_lookup = {} self.key_equivs = {} def change_category_name(self,oldname,newname): self.unlock() """Change the category name from [[oldname]] to [[newname]]""" if oldname not in self: raise KeyError('Cannot rename non-existent category %s to %s' % (oldname,newname)) if newname in self: raise KeyError('Cannot rename %s to %s as %s already exists' % (oldname,newname,oldname)) child_defs = self.ddlm_immediate_children(oldname) self.rename(oldname,newname) #NB no name integrity checks self[newname]['_name.object_id']=newname self[newname]['_definition.id']=newname for child_def in child_defs: self[child_def]['_name.category_id'] = newname if self[child_def].get('_definition.scope','Item')=='Item': newid = self.create_catobj_name(newname,self[child_def]['_name.object_id']) self[child_def]['_definition.id']=newid self.rename(child_def,newid[1:]) #no underscore at the beginning self.lock() def create_catobj_name(self,cat,obj): """Combine category and object in approved fashion to create id""" return ('_'+cat+'.'+obj) def change_category(self,itemname,catname): """Move itemname into catname, return new handle""" defid = self[itemname] if defid['_name.category_id'].lower()==catname.lower(): print('Already in category, no change') return itemname if catname not in self: #don't have it print('No such category %s' % catname) return itemname self.unlock() objid = defid['_name.object_id'] defid['_name.category_id'] = catname newid = itemname # stays the same for categories if defid.get('_definition.scope','Item') == 'Item': newid = self.create_catobj_name(catname,objid) defid['_definition.id']= newid self.rename(itemname,newid) self.set_parent(catname,newid) self.lock() return newid def change_name(self,one_def,newobj): """Change the object_id of one_def to newobj. This is not used for categories, but can be used for dictionaries""" if '_dictionary.title' not in self[one_def]: #a dictionary block newid = self.create_catobj_name(self[one_def]['_name.category_id'],newobj) self.unlock() self.rename(one_def,newid) self[newid]['_definition.id']=newid self[newid]['_name.object_id']=newobj else: self.unlock() newid = newobj self.rename(one_def,newobj) self[newid]['_dictionary.title'] = newid self.lock() return newid # Note that our semantic parent is given by catparent, but our syntactic parent is # always just the root block def add_category(self,catname,catparent=None,is_loop=True,allow_dangler=False): """Add a new category to the dictionary with name [[catname]]. If [[catparent]] is None, the category will be a child of the topmost 'Head' category or else the top data block. If [[is_loop]] is false, a Set category is created. If [[allow_dangler]] is true, the parent category does not have to exist.""" if catname in self: raise CifError('Attempt to add existing category %s' % catname) self.unlock() syntactic_root = self.get_roots()[0][0] if catparent is None: semantic_root = [a for a in self.keys() if self[a].get('_definition.class',None)=='Head'] if len(semantic_root)>0: semantic_root = semantic_root[0] else: semantic_root = syntactic_root else: semantic_root = catparent realname = super(CifDic,self).NewBlock(catname,parent=syntactic_root) self.block_id_table[catname.lower()]=realname self[catname]['_name.object_id'] = catname if not allow_dangler or catparent is None: self[catname]['_name.category_id'] = self[semantic_root]['_name.object_id'] else: self[catname]['_name.category_id'] = catparent self[catname]['_definition.id'] = catname self[catname]['_definition.scope'] = 'Category' if is_loop: self[catname]['_definition.class'] = 'Loop' else: self[catname]['_definition.class'] = 'Set' self[catname]['_description.text'] = 'No definition provided' self.lock() return catname def add_definition(self,itemname,catparent,def_text='PLEASE DEFINE ME',allow_dangler=False): """Add itemname to category [[catparent]]. If itemname contains periods, all text before the final period is ignored. If [[allow_dangler]] is True, no check for a parent category is made.""" self.unlock() if '.' in itemname: objname = itemname.split('.')[-1] else: objname = itemname objname = objname.strip('_') if not allow_dangler and (catparent not in self or self[catparent]['_definition.scope']!='Category'): raise CifError('No category %s in dictionary' % catparent) fullname = '_'+catparent.lower()+'.'+objname print('New name: %s' % fullname) syntactic_root = self.get_roots()[0][0] realname = super(CifDic,self).NewBlock(fullname, fix=False, parent=syntactic_root) #low-level change # update our dictionary structures self.block_id_table[fullname]=realname self[fullname]['_definition.id']=fullname self[fullname]['_name.object_id']=objname self[fullname]['_name.category_id']=catparent self[fullname]['_definition.class']='Datum' self[fullname]['_description.text']=def_text def remove_definition(self,defname): """Remove a definition from the dictionary.""" if defname not in self: return if self[defname].get('_definition.scope')=='Category': children = self.ddlm_immediate_children(defname) [self.remove_definition(a) for a in children] cat_id = self[defname]['_definition.id'].lower() del self[defname] def get_cat_obj(self,name): """Return (cat,obj) tuple. [[name]] must contain only a single period""" cat,obj = name.split('.') return (cat.strip('_'),obj) def get_name_by_cat_obj(self,category,object,give_default=False): """Return the dataname corresponding to the given category and object""" if category[0] == '_': #accidentally left in true_cat = category[1:].lower() else: true_cat = category.lower() try: return self.cat_obj_lookup_table[(true_cat,object.lower())][0] except KeyError: if give_default: return '_'+true_cat+'.'+object raise KeyError('No such category,object in the dictionary: %s %s' % (true_cat,object)) def WriteOut(self,**kwargs): myblockorder = self.get_full_child_list() self.set_grammar(self.grammar) self.standard = 'Dic' return super(CifDic,self).WriteOut(blockorder = myblockorder,**kwargs) def get_full_child_list(self): """Return a list of definition blocks in order parent-child-child-child-parent-child...""" top_block = self.get_roots()[0][0] root_cat = [a for a in self.keys() if self[a].get('_definition.class','Datum')=='Head'] if len(root_cat) == 1: all_names = [top_block] + self.recurse_child_list(root_cat[0]) unrooted = self.ddlm_danglers() double_names = set(unrooted).intersection(set(all_names)) if len(double_names)>0: raise CifError('Names are children of internal and external categories:%s' % repr(double_names)) remaining = unrooted[:] for no_root in unrooted: if self[no_root].get('_definition.scope','Item')=='Category': all_names += [no_root] remaining.remove(no_root) these_children = [n for n in unrooted if self[n]['_name.category_id'].lower()==no_root.lower()] all_names += these_children [remaining.remove(n) for n in these_children] # now sort by category ext_cats = set([self[r].get('_name.category_id',self.cat_from_name(r)).lower() for r in remaining]) for e in ext_cats: cat_items = [r for r in remaining if self[r].get('_name.category_id',self.cat_from_name(r)).lower() == e] [remaining.remove(n) for n in cat_items] all_names += cat_items if len(remaining)>0: print('WARNING: following items do not seem to belong to a category??') print(repr(remaining)) all_names += remaining print('Final block order: ' + repr(all_names)) return all_names raise ValueError('Dictionary contains no/multiple Head categories, please print as plain CIF instead') def cat_from_name(self,one_name): """Guess the category from the name. This should be used only when this is not important semantic information, for example, when printing out""" (cat,obj) = one_name.split(".") if cat[0] == "_": cat = cat[1:] return cat def recurse_child_list(self,parentname): """Recursively expand the logical child list of [[parentname]]""" final_list = [parentname] child_blocks = [a for a in self.child_table.keys() if self[a].get('_name.category_id','').lower() == parentname.lower()] child_blocks.sort() #we love alphabetical order child_items = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Item'] final_list += child_items child_cats = [a for a in child_blocks if self[a].get('_definition.scope','Item') == 'Category'] for child_cat in child_cats: final_list += self.recurse_child_list(child_cat) return final_list def get_key_pack(self,category,value,data): keyname = self[category][self.unique_spec] onepack = data.GetPackKey(keyname,value) return onepack def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd def getmaxmin(self,rangeexp): regexp = '(-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?)([eEdD][+-]?[0-9]+)?)*' regexp = regexp + ":" + regexp regexp = re.match(regexp,rangeexp) try: minimum = regexp.group(1) maximum = regexp.group(7) except AttributeError: print("Can't match %s" % rangeexp) if minimum == None: minimum = "." else: minimum = float(minimum) if maximum == None: maximum = "." else: maximum = float(maximum) return maximum,minimum def initialise_drel(self): """Parse drel functions and prepare data structures in dictionary""" self.ddlm_parse_valid() #extract validity information from data block self.transform_drel() #parse the drel functions self.add_drel_funcs() #put the drel functions into the namespace def transform_drel(self): from .drel import drel_ast_yacc from .drel import py_from_ast import traceback parser = drel_ast_yacc.parser lexer = drel_ast_yacc.lexer my_namespace = self.keys() my_namespace = dict(zip(my_namespace,my_namespace)) # we provide a table of loopable categories {cat_name:((key1,key2..),[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) # parser.listable_items = [a for a in self.keys() if "*" in self[a].get("_type.dimension","")] derivable_list = [a for a in self.keys() if "_method.expression" in self[a] \ and self[a].get("_name.category_id","")!= "function"] for derivable in derivable_list: target_id = derivable # reset the list of visible names for parser special_ids = [dict(zip(self.keys(),self.keys()))] print("Target id: %s" % derivable) drel_exprs = self[derivable]["_method.expression"] drel_purposes = self[derivable]["_method.purpose"] all_methods = [] if not isinstance(drel_exprs,list): drel_exprs = [drel_exprs] drel_purposes = [drel_purposes] for drel_purpose,drel_expr in zip(drel_purposes,drel_exprs): if drel_purpose != 'Evaluation': continue drel_expr = "\n".join(drel_expr.splitlines()) # print("Transforming %s" % drel_expr) # List categories are treated differently... try: meth_ast = parser.parse(drel_expr+"\n",lexer=lexer) except: print('Syntax error in method for %s; leaving as is' % derivable) a,b = sys.exc_info()[:2] print((repr(a),repr(b))) print(traceback.print_tb(sys.exc_info()[-1],None,sys.stdout)) # reset the lexer lexer.begin('INITIAL') continue # Construct the python method cat_meth = False if self[derivable].get('_definition.scope','Item') == 'Category': cat_meth = True pyth_meth = py_from_ast.make_python_function(meth_ast,"pyfunc",target_id, loopable=loop_info, cif_dic = self,cat_meth=cat_meth) all_methods.append(pyth_meth) if len(all_methods)>0: save_overwrite = self[derivable].overwrite self[derivable].overwrite = True self[derivable]["_method.py_expression"] = all_methods self[derivable].overwrite = save_overwrite #print("Final result:\n " + repr(self[derivable]["_method.py_expression"])) def add_drel_funcs(self): from .drel import drel_ast_yacc from .drel import py_from_ast funclist = [a for a in self.keys() if self[a].get("_name.category_id","")=='function'] funcnames = [(self[a]["_name.object_id"], getattr(self[a].GetKeyedPacket("_method.purpose","Evaluation"),"_method.expression")) for a in funclist] # create executable python code... parser = drel_ast_yacc.parser # we provide a table of loopable categories {cat_name:(key,[item_name,...]),...}) loopable_cats = self.get_loopable_cats() loop_keys = [listify(self[a]["_category_key.name"]) for a in loopable_cats] loop_keys = [[self[a]['_name.object_id'] for a in b] for b in loop_keys] cat_names = [self.names_in_cat(a,names_only=True) for a in loopable_cats] loop_info = dict(zip(loopable_cats,zip(loop_keys,cat_names))) for funcname,funcbody in funcnames: newline_body = "\n".join(funcbody.splitlines()) parser.target_id = funcname res_ast = parser.parse(newline_body) py_function = py_from_ast.make_python_function(res_ast,None,targetname=funcname,func_def=True,loopable=loop_info,cif_dic = self) #print('dREL library function ->\n' + py_function) global_table = globals() exec(py_function, global_table) #add to namespace #print('Globals after dREL functions added:' + repr(globals())) self.ddlm_functions = globals() #for outside access @track_recursion def derive_item(self,start_key,cifdata,store_value = False,allow_defaults=True): key = start_key #starting value result = None #success is a non-None value default_result = False #we have not used a default value # check for aliases # check for an older form of a new value found_it = [k for k in self.alias_table.get(key,[]) if k in cifdata] if len(found_it)>0: corrected_type = self.change_type(key,cifdata[found_it[0]]) return corrected_type # now do the reverse check - any alternative form alias_name = [a for a in self.alias_table.items() if key in a[1]] print('Aliases for %s: %s' % (key,repr(alias_name))) if len(alias_name)==1: key = alias_name[0][0] #actual definition name if key in cifdata: return self.change_type(key,cifdata[key]) found_it = [k for k in alias_name[0][1] if k in cifdata] if len(found_it)>0: return self.change_type(key,cifdata[found_it[0]]) elif len(alias_name)>1: raise CifError('Dictionary error: dataname alias appears in different definitions: ' + repr(alias_name)) the_category = self[key]["_name.category_id"] cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] # store any default value in case we have a problem def_val = self[key].get("_enumeration.default","") def_index_val = self[key].get("_enumeration.def_index_id","") if len(has_cat_names)==0: # try category method cat_result = {} pulled_from_cats = [k for k in self.keys() if '_category_construct_local.components' in self[k]] pulled_from_cats = [(k,[ self[n]['_name.category_id'] for n in self[k]['_category_construct_local.components']] ) for k in pulled_from_cats] pulled_to_cats = [k[0] for k in pulled_from_cats if the_category in k[1]] if '_category_construct_local.type' in self[the_category]: print("**Now constructing category %s using DDLm attributes**" % the_category) try: cat_result = self.construct_category(the_category,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s (error)' % the_category) # Trying a pull-back when the category is partially populated # will not work, hence we test that cat_result has no keys if len(pulled_to_cats)>0 and len(cat_result)==0: print("**Now populating category %s from pulled-back category %s" % (the_category,repr(pulled_to_cats))) try: cat_result = self.push_from_pullback(the_category,pulled_to_cats,cifdata,store_value=True) except (CifRecursionError,StarFile.StarDerivationError): print('** Failed to construct category %s from pullback information (error)' % the_category) if '_method.py_expression' in self[the_category] and key not in cat_result: print("**Now applying category method for %s in search of %s**" % (the_category,key)) cat_result = self.derive_item(the_category,cifdata,store_value=True) print("**Tried pullbacks, obtained for %s " % the_category + repr(cat_result)) # do we now have our value? if key in cat_result: return cat_result[key] # Recalculate in case it actually worked has_cat_names = [a for a in cat_names if cifdata.has_key_or_alias(a)] the_funcs = self[key].get('_method.py_expression',"") if the_funcs: #attempt to calculate it #global_table = globals() #global_table.update(self.ddlm_functions) for one_func in the_funcs: print('Executing function for %s:' % key) #print(one_func) exec(one_func, globals()) #will access dREL functions, puts "pyfunc" in scope # print('in following global environment: ' + repr(global_table)) stored_setting = cifdata.provide_value cifdata.provide_value = True try: result = pyfunc(cifdata) except CifRecursionError as s: print(s) result = None except StarFile.StarDerivationError as s: print(s) result = None finally: cifdata.provide_value = stored_setting if result is not None: break #print("Function returned {!r}".format(result)) if result is None and allow_defaults: # try defaults if def_val: result = self.change_type(key,def_val) default_result = True elif def_index_val: #derive a default value index_vals = self[key]["_enumeration_default.index"] val_to_index = cifdata[def_index_val] #what we are keying on if self[def_index_val]['_type.contents'] in ['Code','Name','Tag']: lcase_comp = True index_vals = [a.lower() for a in index_vals] # Handle loops if isinstance(val_to_index,list): if lcase_comp: val_to_index = [a.lower() for a in val_to_index] keypos = [index_vals.index(a) for a in val_to_index] result = [self[key]["_enumeration_default.value"][a] for a in keypos] else: if lcase_comp: val_to_index = val_to_index.lower() keypos = index_vals.index(val_to_index) #value error if no such value available result = self[key]["_enumeration_default.value"][keypos] default_result = True #flag that it must be extended result = self.change_type(key,result) print("Indexed on %s to get %s for %s" % (def_index_val,repr(result),repr(val_to_index))) # read it in if result is None: #can't do anything else print('Warning: no way of deriving item %s, allow_defaults is %s' % (key,repr(allow_defaults))) raise StarFile.StarDerivationError(start_key) is_looped = False if self[the_category].get('_definition.class','Set')=='Loop': is_looped = True if len(has_cat_names)>0: #this category already exists if result is None or default_result: #need to create a list of values loop_len = len(cifdata[has_cat_names[0]]) out_result = [result]*loop_len result = out_result else: #nothing exists in this category, we can't store this at all print('Resetting result %s for %s to null list as category is empty' % (key,result)) result = [] # now try to insert the new information into the right place # find if items of this category already appear... # Never cache empty values if not (isinstance(result,list) and len(result)==0) and\ store_value: if self[key].get("_definition.scope","Item")=='Item': if is_looped: result = self.store_new_looped_value(key,cifdata,result,default_result) else: result = self.store_new_unlooped_value(key,cifdata,result) else: self.store_new_cat_values(cifdata,result,the_category) return result def store_new_looped_value(self,key,cifdata,result,default_result): """Store a looped value from the dREL system into a CifFile""" # try to change any matrices etc. to lists the_category = self[key]["_name.category_id"] out_result = result if result is not None and not default_result: # find any numpy arrays def conv_from_numpy(one_elem): if not hasattr(one_elem,'dtype'): if isinstance(one_elem,(list,tuple)): return StarFile.StarList([conv_from_numpy(a) for a in one_elem]) return one_elem if one_elem.size > 1: #so is not a float return StarFile.StarList([conv_from_numpy(a) for a in one_elem.tolist()]) else: try: return one_elem.item(0) except: return one_elem out_result = [conv_from_numpy(a) for a in result] # so out_result now contains a value suitable for storage cat_names = [a for a in self.keys() if self[a].get("_name.category_id",None)==the_category] has_cat_names = [a for a in cat_names if a in cifdata] print('Adding {}, found pre-existing names: '.format(key) + repr(has_cat_names)) if len(has_cat_names)>0: #this category already exists cifdata[key] = out_result #lengths must match or else!! cifdata.AddLoopName(has_cat_names[0],key) else: cifdata[key] = out_result cifdata.CreateLoop([key]) print('Loop info:' + repr(cifdata.loops)) return out_result def store_new_unlooped_value(self,key,cifdata,result): """Store a single value from the dREL system""" if result is not None and hasattr(result,'dtype'): if result.size > 1: out_result = StarFile.StarList(result.tolist()) cifdata[key] = out_result else: cifdata[key] = result.item(0) else: cifdata[key] = result return result def construct_category(self,category,cifdata,store_value=True): """Construct a category using DDLm attributes""" con_type = self[category].get('_category_construct_local.type',None) if con_type == None: return {} if con_type == 'Pullback' or con_type == 'Filter': morphisms = self[category]['_category_construct_local.components'] morph_values = [cifdata[a] for a in morphisms] # the mapped values for each cat cats = [self[a]['_name.category_id'] for a in morphisms] cat_keys = [self[a]['_category.key_id'] for a in cats] cat_values = [list(cifdata[a]) for a in cat_keys] #the category key values for each cat if con_type == 'Filter': int_filter = self[category].get('_category_construct_local.integer_filter',None) text_filter = self[category].get('_category_construct_local.text_filter',None) if int_filter is not None: morph_values.append([int(a) for a in int_filter]) if text_filter is not None: morph_values.append(text_filter) cat_values.append(range(len(morph_values[-1]))) # create the mathematical product filtered by equality of dataname values pullback_ids = [(x,y) for x in cat_values[0] for y in cat_values[1] \ if morph_values[0][cat_values[0].index(x)]==morph_values[1][cat_values[1].index(y)]] # now prepare for return if len(pullback_ids)==0: return {} newids = self[category]['_category_construct_local.new_ids'] fullnewids = [self.cat_obj_lookup_table[(category,n)][0] for n in newids] if con_type == 'Pullback': final_results = {fullnewids[0]:[x[0] for x in pullback_ids],fullnewids[1]:[x[1] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) final_results.update(self.duplicate_datanames(cifdata,cats[1],category,key_vals = final_results[fullnewids[1]],skip_names=newids)) elif con_type == 'Filter': #simple filter final_results = {fullnewids[0]:[x[0] for x in pullback_ids]} final_results.update(self.duplicate_datanames(cifdata,cats[0],category,key_vals = final_results[fullnewids[0]],skip_names=newids)) if store_value: self.store_new_cat_values(cifdata,final_results,category) return final_results def push_from_pullback(self,target_category,source_categories,cifdata,store_value=True): """Each of the categories in source_categories are pullbacks that include the target_category""" target_key = self[target_category]['_category.key_id'] result = {target_key:[]} first_time = True # for each source category, determine which element goes to the target for sc in source_categories: components = self[sc]['_category_construct_local.components'] comp_cats = [self[c]['_name.category_id'] for c in components] new_ids = self[sc]['_category_construct_local.new_ids'] source_ids = [self.cat_obj_lookup_table[(sc,n)][0] for n in new_ids] if len(components) == 2: # not a filter element_pos = comp_cats.index(target_category) old_id = source_ids[element_pos] print('Using %s to populate %s' % (old_id,target_key)) result[target_key].extend(cifdata[old_id]) # project through all identical names extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids+[target_key]) # we only include keys that are common to all categories if first_time: result.update(extra_result) else: for k in extra_result.keys(): if k in result: print('Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: extra_result = self.duplicate_datanames(cifdata,sc,target_category,skip_names=new_ids) if len(extra_result)>0 or source_ids[0] in cifdata: #something is present result[target_key].extend(cifdata[source_ids[0]]) for k in extra_result.keys(): if k in result: print('Reverse filter: Updating %s: was %s' % (k,repr(result[k]))) result[k].extend(extra_result[k]) else: result[k]=extra_result[k] # Bonus derivation if there is a singleton filter if self[sc]['_category_construct_local.type'] == 'Filter': int_filter = self[sc].get('_category_construct_local.integer_filter',None) text_filter = self[sc].get('_category_construct_local.text_filter',None) if int_filter is not None: filter_values = int_filter else: filter_values = text_filter if len(filter_values)==1: #a singleton extra_dataname = self[sc]['_category_construct_local.components'][0] if int_filter is not None: new_value = [int(filter_values[0])] * len(cifdata[source_ids[0]]) else: new_value = filter_values * len(cifdata[source_ids[0]]) if extra_dataname not in result: result[extra_dataname] = new_value else: result[extra_dataname].extend(new_value) else: raise ValueError('Unexpected category construct type' + self[sc]['_category_construct_local.type']) first_time = False # check for sanity - all dataname lengths must be identical datalen = len(set([len(a) for a in result.values()])) if datalen != 1: raise AssertionError('Failed to construct equal-length category items,'+ repr(result)) if store_value: print('Now storing ' + repr(result)) self.store_new_cat_values(cifdata,result,target_category) return result def duplicate_datanames(self,cifdata,from_category,to_category,key_vals=None,skip_names=[]): """Copy across datanames for which the from_category key equals [[key_vals]]""" result = {} s_names_in_cat = set(self.names_in_cat(from_category,names_only=True)) t_names_in_cat = set(self.names_in_cat(to_category,names_only=True)) can_project = s_names_in_cat & t_names_in_cat can_project -= set(skip_names) #already dealt with source_key = self[from_category]['_category.key_id'] print('Source dataname set: ' + repr(s_names_in_cat)) print('Target dataname set: ' + repr(t_names_in_cat)) print('Projecting through following datanames from %s to %s' % (from_category,to_category) + repr(can_project)) for project_name in can_project: full_from_name = self.cat_obj_lookup_table[(from_category.lower(),project_name.lower())][0] full_to_name = self.cat_obj_lookup_table[(to_category.lower(),project_name.lower())][0] if key_vals is None: try: result[full_to_name] = cifdata[full_from_name] except StarFile.StarDerivationError: pass else: all_key_vals = cifdata[source_key] filter_pos = [all_key_vals.index(a) for a in key_vals] try: all_data_vals = cifdata[full_from_name] except StarFile.StarDerivationError: pass result[full_to_name] = [all_data_vals[i] for i in filter_pos] return result def store_new_cat_values(self,cifdata,result,the_category): """Store the values in [[result]] into [[cifdata]]""" the_key = [a for a in result.keys() if self[a].get('_type.purpose','')=='Key'] double_names = [a for a in result.keys() if a in cifdata] if len(double_names)>0: already_present = [a for a in self.names_in_cat(the_category) if a in cifdata] if set(already_present) != set(result.keys()): print("Category %s not updated, mismatched datanames: %s" % (the_category, repr(set(already_present)^set(result.keys())))) return #check key values old_keys = set(cifdata[the_key]) common_keys = old_keys & set(result[the_key]) if len(common_keys)>0: print("Category %s not updated, key values in common:" % (common_keys)) return #extend result values with old values for one_name,one_value in result.items(): result[one_name].extend(cifdata[one_name]) for one_name, one_value in result.items(): try: self.store_new_looped_value(one_name,cifdata,one_value,False) except StarFile.StarError: print('%s: Not replacing %s with calculated %s' % (one_name,repr(cifdata[one_name]),repr(one_value))) #put the key as the first item print('Fixing item order for {}'.format(repr(the_key))) for one_key in the_key: #should only be one cifdata.ChangeItemOrder(one_key,0) def generate_default_packet(self,catname,catkey,keyvalue): """Return a StarPacket with items from ``catname`` and a key value of ``keyvalue``""" newpack = StarPacket() for na in self.names_in_cat(catname): def_val = self[na].get("_enumeration.default","") if def_val: final_val = self.change_type(na,def_val) newpack.extend(final_val) setattr(newpack,na,final_val) if len(newpack)>0: newpack.extend(keyvalue) setattr(newpack,catkey,keyvalue) return newpack def switch_numpy(self,to_val): pass def change_type(self,itemname,inval): if inval == "?": return inval change_function = convert_type(self[itemname]) if isinstance(inval,list) and not isinstance(inval,StarFile.StarList): #from a loop newval = list([change_function(a) for a in inval]) else: newval = change_function(inval) return newval def install_validation_functions(self): """Install the DDL-appropriate validation checks""" if self.diclang != 'DDLm': # functions which check conformance self.item_validation_funs = [ self.validate_item_type, self.validate_item_esd, self.validate_item_enum, self.validate_enum_range, self.validate_looping ] # functions checking loop values self.loop_validation_funs = [ self.validate_loop_membership, self.validate_loop_key, self.validate_loop_references ] # where we need to look at other values self.global_validation_funs = [ self.validate_exclusion, self.validate_parent, self.validate_child, self.validate_dependents, self.validate_uniqueness ] # where only a full block will do self.block_validation_funs = [ self.validate_mandatory_category ] # removal is quicker with special checks self.global_remove_validation_funs = [ self.validate_remove_parent_child ] elif self.diclang == 'DDLm': self.item_validation_funs = [ self.validate_item_enum, self.validate_item_esd_ddlm, ] self.loop_validation_funs = [ self.validate_looping_ddlm, self.validate_loop_key_ddlm, self.validate_loop_membership ] self.global_validation_funs = [] self.block_validation_funs = [ self.check_mandatory_items, self.check_prohibited_items ] self.global_remove_validation_funs = [] self.optimize = False # default value self.done_parents = [] self.done_children = [] self.done_keys = [] def validate_item_type(self,item_name,item_value): def mymatch(m,a): res = m.match(a) if res != None: return res.group() else: return "" target_type = self[item_name].get(self.type_spec) if target_type == None: # e.g. a category definition return {"result":True} # not restricted in any way matchexpr = self.typedic[target_type] item_values = listify(item_value) #for item in item_values: #print("Type match " + item_name + " " + item + ":",) #skip dots and question marks check_all = [a for a in item_values if a !="." and a != "?"] check_all = [a for a in check_all if mymatch(matchexpr,a) != a] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def decide(self,result_list): """Construct the return list""" if len(result_list)==0: return {"result":True} else: return {"result":False,"bad_values":result_list} def validate_item_container(self, item_name,item_value): container_type = self[item_name]['_type.container'] item_values = listify(item_value) if container_type == 'Single': okcheck = [a for a in item_values if not isinstance(a,(int,float,long,unicode))] return decide(okcheck) if container_type in ('Multiple','List'): okcheck = [a for a in item_values if not isinstance(a,StarList)] return decide(okcheck) if container_type == 'Array': #A list with numerical values okcheck = [a for a in item_values if not isinstance(a,StarList)] first_check = decide(okcheck) if not first_check['result']: return first_check #num_check = [a for a in item_values if len([b for b in a if not isinstance def validate_item_esd(self,item_name,item_value): if self[item_name].get(self.primitive_type) != 'numb': return {"result":None} can_esd = self[item_name].get(self.esd_spec,"none") == "esd" if can_esd: return {"result":True} #must be OK! item_values = listify(item_value) check_all = list([a for a in item_values if get_number_with_esd(a)[1] != None]) if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_item_esd_ddlm(self,item_name,item_value): if self[item_name].get('self.primitive_type') not in \ ['Count','Index','Integer','Real','Imag','Complex','Binary','Hexadecimal','Octal']: return {"result":None} can_esd = True if self[item_name].get('_type.purpose') != 'Measurand': can_esd = False item_values = listify(item_value) check_all = [get_number_with_esd(a)[1] for a in item_values] check_all = [v for v in check_all if (can_esd and v == None) or \ (not can_esd and v != None)] if len(check_all)>0: return {"result":False,"bad_values":check_all} return {"result":True} def validate_enum_range(self,item_name,item_value): if "_item_range.minimum" not in self[item_name] and \ "_item_range.maximum" not in self[item_name]: return {"result":None} minvals = self[item_name].get("_item_range.minimum",default = ["."]) maxvals = self[item_name].get("_item_range.maximum",default = ["."]) def makefloat(a): if a == ".": return a else: return float(a) maxvals = map(makefloat, maxvals) minvals = map(makefloat, minvals) rangelist = list(zip(minvals,maxvals)) item_values = listify(item_value) def map_check(rangelist,item_value): if item_value == "?" or item_value == ".": return True iv,esd = get_number_with_esd(item_value) if iv==None: return None #shouldn't happen as is numb type for lower,upper in rangelist: #check the minima if lower == ".": lower = iv - 1 if upper == ".": upper = iv + 1 if iv > lower and iv < upper: return True if upper == lower and iv == upper: return True # debug # print("Value %s fails range check %d < x < %d" % (item_value,lower,upper)) return False check_all = [a for a in item_values if map_check(rangelist,a) != True] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_item_enum(self,item_name,item_value): try: enum_list = self[item_name][self.enum_spec][:] except KeyError: return {"result":None} enum_list.append(".") #default value enum_list.append("?") #unknown item_values = listify(item_value) #print("Enum check: {!r} in {!r}".format(item_values, enum_list)) check_all = [a for a in item_values if a not in enum_list] if len(check_all)>0: return {"result":False,"bad_values":check_all} else: return {"result":True} def validate_looping(self,item_name,item_value): try: must_loop = self[item_name][self.must_loop_spec] except KeyError: return {"result":None} if must_loop == 'yes' and isinstance(item_value,(unicode,str)): # not looped return {"result":False} #this could be triggered if must_loop == 'no' and not isinstance(item_value,(unicode,str)): return {"result":False} return {"result":True} def validate_looping_ddlm(self,loop_names): """Check that all names are loopable""" truly_loopy = self.get_final_cats(loop_names) if len(truly_loopy)0: return {"result":False,"bad_items":bad_items} else: return {"result":True} def get_final_cats(self,loop_names): """Return a list of the uppermost parent categories for the loop_names. Names that are not from loopable categories are ignored.""" try: categories = [self[a][self.cat_spec].lower() for a in loop_names] except KeyError: #category_id is mandatory raise ValidCifError( "%s missing from dictionary %s for item in loop containing %s" % (self.cat_spec,self.dicname,loop_names[0])) truly_looped = [a for a in categories if a in self.parent_lookup.keys()] return [self.parent_lookup[a] for a in truly_looped] def validate_loop_key(self,loop_names): category = self[loop_names[0]][self.cat_spec] # find any unique values which must be present key_spec = self[category].get(self.key_spec,[]) for names_to_check in key_spec: if isinstance(names_to_check,unicode): #only one names_to_check = [names_to_check] for loop_key in names_to_check: if loop_key not in loop_names: #is this one of those dang implicit items? if self[loop_key].get(self.must_exist_spec,None) == "implicit": continue #it is virtually there... alternates = self.get_alternates(loop_key) if alternates == []: return {"result":False,"bad_items":loop_key} for alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":loop_key} # no alternates return {"result":True} def validate_loop_key_ddlm(self,loop_names): """Make sure at least one of the necessary keys are available""" final_cats = self.get_final_cats(loop_names) if len(final_cats)>0: poss_keys = self.cat_key_table[final_cats[0]][0] # found_keys = [a for a in poss_keys if a in loop_names] if len(found_keys)>0: return {"result":True} else: return {"result":False,"bad_items":poss_keys} else: return {"result":True} def validate_loop_references(self,loop_names): must_haves = [self[a].get(self.list_ref_spec,None) for a in loop_names] must_haves = [a for a in must_haves if a != None] # build a flat list. For efficiency we don't remove duplicates,as # we expect no more than the order of 10 or 20 looped names. def flat_func(a,b): if isinstance(b,unicode): a.append(b) #single name else: a.extend(b) #list of names return a flat_mh = [] [flat_func(flat_mh,a) for a in must_haves] group_mh = filter(lambda a:a[-1]=="_",flat_mh) single_mh = filter(lambda a:a[-1]!="_",flat_mh) res = [a for a in single_mh if a not in loop_names] def check_gr(s_item, name_list): nl = map(lambda a:a[:len(s_item)],name_list) if s_item in nl: return True return False res_g = [a for a in group_mh if check_gr(a,loop_names)] if len(res) == 0 and len(res_g) == 0: return {"result":True} # construct alternate list alternates = map(lambda a: (a,self.get_alternates(a)),res) alternates = [a for a in alternates if a[1] != []] # next line purely for error reporting missing_alts = [a[0] for a in alternates if a[1] == []] if len(alternates) != len(res): return {"result":False,"bad_items":missing_alts} #short cut; at least one #doesn't have an altern #loop over alternates for orig_name,alt_names in alternates: alt = [a for a in alt_names if a in loop_names] if len(alt) == 0: return {"result":False,"bad_items":orig_name}# no alternates return {"result":True} #found alternates def get_alternates(self,main_name,exclusive_only=False): alternates = self[main_name].get(self.related_func,None) alt_names = [] if alternates != None: alt_names = self[main_name].get(self.related_item,None) if isinstance(alt_names,unicode): alt_names = [alt_names] alternates = [alternates] together = zip(alt_names,alternates) if exclusive_only: alt_names = [a for a in together if a[1]=="alternate_exclusive" \ or a[1]=="replace"] else: alt_names = [a for a in together if a[1]=="alternate" or a[1]=="replace"] alt_names = list([a[0] for a in alt_names]) # now do the alias thing alias_names = listify(self[main_name].get("_item_aliases.alias_name",[])) alt_names.extend(alias_names) # print("Alternates for {}: {!r}".format(main_name, alt_names)) return alt_names def validate_exclusion(self,item_name,item_value,whole_block,provisional_items={},globals={}): alternates = [a.lower() for a in self.get_alternates(item_name,exclusive_only=True)] item_name_list = [a.lower() for a in whole_block.keys()] item_name_list.extend([a.lower() for a in provisional_items.keys()]) bad = [a for a in alternates if a in item_name_list] if len(bad)>0: print("Bad: %s, alternates %s" % (repr(bad),repr(alternates))) return {"result":False,"bad_items":bad} else: return {"result":True} # validate that parent exists and contains matching values def validate_parent(self,item_name,item_value,whole_block,provisional_items={},globals={}): parent_item = self[item_name].get(self.parent_spec) if not parent_item: return {"result":None} #no parent specified if isinstance(parent_item,list): parent_item = parent_item[0] if self.optimize: if parent_item in self.done_parents: return {"result":None} else: self.done_parents.append(parent_item) print("Done parents %s" % repr(self.done_parents)) # initialise parent/child values if isinstance(item_value,unicode): child_values = [item_value] else: child_values = item_value[:] #copy for safety # track down the parent # print("Looking for {} parent item {} in {!r}".format(item_name, parent_item, whole_block)) # if globals contains the parent values, we are doing a DDL2 dictionary, and so # we have collected all parent values into the global block - so no need to search # for them elsewhere. # print("Looking for {!r}".format(parent_item)) parent_values = globals.get(parent_item) if not parent_values: parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # go for alternates namespace = whole_block.keys() namespace.extend(provisional_items.keys()) namespace.extend(globals.keys()) alt_names = filter_present(self.get_alternates(parent_item),namespace) if len(alt_names) == 0: if len([a for a in child_values if a != "." and a != "?"])>0: return {"result":False,"parent":parent_item}#no parent available -> error else: return {"result":None} #maybe True is more appropriate?? parent_item = alt_names[0] #should never be more than one?? parent_values = provisional_items.get(parent_item,whole_block.get(parent_item)) if not parent_values: # check global block parent_values = globals.get(parent_item) if isinstance(parent_values,unicode): parent_values = [parent_values] #print("Checking parent %s against %s, values %r/%r" % (parent_item, # item_name, parent_values, child_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing) > 0: return {"result":False,"bad_values":missing,"parent":parent_item} return {"result":True} def validate_child(self,item_name,item_value,whole_block,provisional_items={},globals={}): try: child_items = self[item_name][self.child_spec][:] #copy except KeyError: return {"result":None} #not relevant # special case for dictionaries -> we check parents of children only if item_name in globals: #dictionary so skip return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] if isinstance(item_value,unicode): # single value parent_values = [item_value] else: parent_values = item_value[:] # expand child list with list of alternates for child_item in child_items[:]: child_items.extend(self.get_alternates(child_item)) # now loop over the children for child_item in child_items: if self.optimize: if child_item in self.done_children: return {"result":None} else: self.done_children.append(child_item) print("Done children %s" % repr(self.done_children)) if child_item in provisional_items: child_values = provisional_items[child_item][:] elif child_item in whole_block: child_values = whole_block[child_item][:] else: continue if isinstance(child_values,unicode): child_values = [child_values] # print("Checking child %s against %s, values %r/%r" % (child_item, # item_name, child_values, parent_values)) missing = self.check_parent_child(parent_values,child_values) if len(missing)>0: return {"result":False,"bad_values":missing,"child":child_item} return {"result":True} #could mean that no child items present #a generic checker: all child vals should appear in parent_vals def check_parent_child(self,parent_vals,child_vals): # shield ourselves from dots and question marks pv = parent_vals[:] pv.extend([".","?"]) res = [a for a in child_vals if a not in pv] #print("Missing: %s" % res) return res def validate_remove_parent_child(self,item_name,whole_block): try: child_items = self[item_name][self.child_spec] except KeyError: return {"result":None} if isinstance(child_items,unicode): # only one child child_items = [child_items] for child_item in child_items: if child_item in whole_block: return {"result":False,"child":child_item} return {"result":True} def validate_dependents(self,item_name,item_value,whole_block,prov={},globals={}): try: dep_items = self[item_name][self.dep_spec][:] except KeyError: return {"result":None} #not relevant if isinstance(dep_items,unicode): dep_items = [dep_items] actual_names = whole_block.keys() actual_names.extend(prov.keys()) actual_names.extend(globals.keys()) missing = [a for a in dep_items if a not in actual_names] if len(missing) > 0: alternates = map(lambda a:[self.get_alternates(a),a],missing) # compact way to get a list of alternative items which are # present have_check = [(filter_present(b[0],actual_names), b[1]) for b in alternates] have_check = list([a for a in have_check if len(a[0])==0]) if len(have_check) > 0: have_check = [a[1] for a in have_check] return {"result":False,"bad_items":have_check} return {"result":True} def validate_uniqueness(self,item_name,item_value,whole_block,provisional_items={}, globals={}): category = self[item_name].get(self.cat_spec) if category == None: print("No category found for %s" % item_name) return {"result":None} # print("Category {!r} for item {}".format(category, item_name)) # we make a copy in the following as we will be removing stuff later! unique_i = self[category].get("_category_key.name",[])[:] if isinstance(unique_i,unicode): unique_i = [unique_i] if item_name not in unique_i: #no need to verify return {"result":None} if isinstance(item_value,unicode): #not looped return {"result":None} # print("Checking %s -> %s -> %s ->Unique: %r" % (item_name,category,catentry, unique_i)) # check that we can't optimize by not doing this check if self.optimize: if unique_i in self.done_keys: return {"result":None} else: self.done_keys.append(unique_i) val_list = [] # get the matching data from any other data items unique_i.remove(item_name) other_data = [] if len(unique_i) > 0: # i.e. do have others to think about for other_name in unique_i: # we look for the value first in the provisional dict, then the main block # the logic being that anything in the provisional dict overrides the # main block if other_name in provisional_items: other_data.append(provisional_items[other_name]) elif other_name in whole_block: other_data.append(whole_block[other_name]) elif self[other_name].get(self.must_exist_spec)=="implicit": other_data.append([item_name]*len(item_value)) #placeholder else: return {"result":False,"bad_items":other_name}#missing data name # ok, so we go through all of our values # this works by comparing lists of strings to one other, and # so could be fooled if you think that '1.' and '1' are # identical for i in range(len(item_value)): #print("Value no. %d" % i, end=" ") this_entry = item_value[i] for j in range(len(other_data)): this_entry = " ".join([this_entry,other_data[j][i]]) #print("Looking for {!r} in {!r}: ".format(this_entry, val_list)) if this_entry in val_list: return {"result":False,"bad_values":this_entry} val_list.append(this_entry) return {"result":True} def validate_mandatory_category(self,whole_block): mand_cats = [self[a]['_category.id'] for a in self.keys() if self[a].get("_category.mandatory_code","no")=="yes"] if len(mand_cats) == 0: return {"result":True} # print("Mandatory categories - {!r}".format(mand_cats) # find which categories each of our datanames belongs to all_cats = [self[a].get(self.cat_spec) for a in whole_block.keys()] missing = set(mand_cats) - set(all_cats) if len(missing) > 0: return {"result":False,"bad_items":repr(missing)} return {"result":True} def check_mandatory_items(self,whole_block,default_scope='Item'): """Return an error if any mandatory items are missing""" if len(self.scopes_mandatory)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' missing = list([a for a in self.scopes_mandatory[scope] if a not in whole_block]) if len(missing)==0: return {"result":True} else: return {"result":False,"bad_items":missing} def check_prohibited_items(self,whole_block,default_scope='Item'): """Return an error if any prohibited items are present""" if len(self.scopes_naughty)== 0: return {"result":True} if default_scope == 'Datablock': return {"result":True} #is a data file scope = whole_block.get('_definition.scope',default_scope) if '_dictionary.title' in whole_block: scope = 'Dictionary' present = list([a for a in self.scopes_naughty[scope] if a in whole_block]) if len(present)==0: return {"result":True} else: return {"result":False,"bad_items":present} def run_item_validation(self,item_name,item_value): return {item_name:list([(f.__name__,f(item_name,item_value)) for f in self.item_validation_funs])} def run_loop_validation(self,loop_names): return {loop_names[0]:list([(f.__name__,f(loop_names)) for f in self.loop_validation_funs])} def run_global_validation(self,item_name,item_value,data_block,provisional_items={},globals={}): results = list([(f.__name__,f(item_name,item_value,data_block,provisional_items,globals)) for f in self.global_validation_funs]) return {item_name:results} def run_block_validation(self,whole_block,block_scope='Item'): results = list([(f.__name__,f(whole_block)) for f in self.block_validation_funs]) # fix up the return values return {"whole_block":results} def optimize_on(self): self.optimize = True self.done_keys = [] self.done_children = [] self.done_parents = [] def optimize_off(self): self.optimize = False self.done_keys = [] self.done_children = [] self.done_parents = [] class ValidCifBlock(CifBlock): """A `CifBlock` that is valid with respect to a given CIF dictionary. Methods of `CifBlock` are overridden where necessary to disallow addition of invalid items to the `CifBlock`. ## Initialisation * `dic` is a `CifDic` object to be used for validation. """ def __init__(self,dic = None, diclist=[], mergemode = "replace",*args,**kwords): CifBlock.__init__(self,*args,**kwords) if dic and diclist: print("Warning: diclist argument ignored when initialising ValidCifBlock") if isinstance(dic,CifDic): self.fulldic = dic else: raise TypeError( "ValidCifBlock passed non-CifDic type in dic argument") if len(diclist)==0 and not dic: raise ValidCifError( "At least one dictionary must be specified") if diclist and not dic: self.fulldic = merge_dic(diclist,mergemode) if not self.run_data_checks()[0]: raise ValidCifError( self.report()) def run_data_checks(self,verbose=False): self.v_result = {} self.fulldic.optimize_on() for dataname in self.keys(): update_value(self.v_result,self.fulldic.run_item_validation(dataname,self[dataname])) update_value(self.v_result,self.fulldic.run_global_validation(dataname,self[dataname],self)) for loop_names in self.loops.values(): update_value(self.v_result,self.fulldic.run_loop_validation(loop_names)) # now run block-level checks update_value(self.v_result,self.fulldic.run_block_validation(self)) # return false and list of baddies if anything didn't match self.fulldic.optimize_off() all_keys = list(self.v_result.keys()) #dictionary will change for test_key in all_keys: #print("%s: %r" % (test_key, self.v_result[test_key])) self.v_result[test_key] = [a for a in self.v_result[test_key] if a[1]["result"]==False] if len(self.v_result[test_key]) == 0: del self.v_result[test_key] isvalid = len(self.v_result)==0 #if not isvalid: # print("Baddies: {!r}".format(self.v_result)) return isvalid,self.v_result def single_item_check(self,item_name,item_value): #self.match_single_item(item_name) if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_item_validation(item_name,item_value) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def loop_item_check(self,loop_names): in_dic_names = list([a for a in loop_names if a in self.fulldic]) if len(in_dic_names)==0: result = {loop_names[0]:[]} else: result = self.fulldic.run_loop_validation(in_dic_names) baddies = list([a for a in result[in_dic_names[0]] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(loop_names, baddies)) return isvalid,baddies def global_item_check(self,item_name,item_value,provisional_items={}): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_global_validation(item_name, item_value,self,provisional_items = provisional_items) baddies = list([a for a in result[item_name] if a[1]["result"] is False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def remove_global_item_check(self,item_name): if item_name not in self.fulldic: result = {item_name:[]} else: result = self.fulldic.run_remove_global_validation(item_name,self,False) baddies = list([a for a in result[item_name] if a[1]["result"]==False]) # if even one false one is found, this should trigger isvalid = (len(baddies) == 0) # if not isvalid: print("Failures for {}: {!r}".format(item_name, baddies)) return isvalid,baddies def AddToLoop(self,dataname,loopdata): # single item checks paired_data = loopdata.items() for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems) # loop item checks; merge with current loop found = 0 for aloop in self.block["loops"]: if dataname in aloop: loopnames = aloop.keys() for new_name in loopdata.keys(): if new_name not in loopnames: loopnames.append(new_name) valid,problems = self.looped_item_check(loopnames) self.report_if_invalid(valid,problems) prov_dict = loopdata.copy() for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems) CifBlock.AddToLoop(self,dataname,loopdata) def AddCifItem(self,data): if isinstance(data[0],(unicode,str)): # single item valid,problems = self.single_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) valid,problems = self.global_item_check(data[0],data[1]) self.report_if_invalid(valid,problems,data[0]) elif isinstance(data[0],tuple) or isinstance(data[0],list): paired_data = list(zip(data[0],data[1])) for name,value in paired_data: valid,problems = self.single_item_check(name,value) self.report_if_invalid(valid,problems,name) valid,problems = self.loop_item_check(data[0]) self.report_if_invalid(valid,problems,data[0]) prov_dict = {} # for storing temporary items for name,value in paired_data: prov_dict[name]=value for name,value in paired_data: del prov_dict[name] # remove temporarily valid,problems = self.global_item_check(name,value,prov_dict) prov_dict[name] = value # add back in self.report_if_invalid(valid,problems,name) else: raise ValueError("Programming error: AddCifItem passed non-tuple,non-string item") super(ValidCifBlock,self).AddCifItem(data) def AddItem(self,key,value,**kwargs): """Set value of dataname `key` to `value` after checking for conformance with CIF dictionary""" valid,problems = self.single_item_check(key,value) self.report_if_invalid(valid,problems,key) valid,problems = self.global_item_check(key,value) self.report_if_invalid(valid,problems,key) super(ValidCifBlock,self).AddItem(key,value,**kwargs) # utility function def report_if_invalid(self,valid,bad_list,data_name): if not valid: bad_tests = [a[0] for a in bad_list] error_string = ",".join(bad_tests) error_string = repr(data_name) + " fails following validity checks: " + error_string raise ValidCifError( error_string) def __delitem__(self,key): # we don't need to run single item checks; we do need to run loop and # global checks. if key in self: try: loop_items = self.GetLoop(key) except TypeError: loop_items = [] if loop_items: #need to check loop conformance loop_names = [a[0] for a in loop_items if a[0] != key] valid,problems = self.loop_item_check(loop_names) self.report_if_invalid(valid,problems) valid,problems = self.remove_global_item_check(key) self.report_if_invalid(valid,problems) self.RemoveCifItem(key) def report(self): outstr = StringIO() outstr.write( "Validation results\n") outstr.write( "------------------\n") print("%d invalid items found\n" % len(self.v_result)) for item_name,val_func_list in self.v_result.items(): outstr.write("%s fails following tests:\n" % item_name) for val_func in val_func_list: outstr.write("\t%s\n") return outstr.getvalue() class ValidCifFile(CifFile): """A CIF file for which all datablocks are valid. Argument `dic` to initialisation specifies a `CifDic` object to use for validation.""" def __init__(self,dic=None,diclist=[],mergemode="replace",*args,**kwargs): if not diclist and not dic and not hasattr(self,'bigdic'): raise ValidCifError( "At least one dictionary is required to create a ValidCifFile object") if not dic and diclist: #merge here for speed self.bigdic = merge_dic(diclist,mergemode) elif dic and not diclist: self.bigdic = dic CifFile.__init__(self,*args,**kwargs) for blockname in self.keys(): self.dictionary[blockname]=ValidCifBlock(data=self.dictionary[blockname],dic=self.bigdic) def NewBlock(self,blockname,blockcontents,**kwargs): CifFile.NewBlock(self,blockname,blockcontents,**kwargs) # dictionary[blockname] is now a CifBlock object. We # turn it into a ValidCifBlock object self.dictionary[blockname] = ValidCifBlock(dic=self.bigdic, data=self.dictionary[blockname]) class ValidationResult: """Represents validation result. It is initialised with """ def __init__(self,results): """results is return value of validate function""" self.valid_result, self.no_matches = results def report(self,use_html): """Return string with human-readable description of validation result""" return validate_report((self.valid_result, self.no_matches),use_html) def is_valid(self,block_name=None): """Return True for valid CIF file, otherwise False""" if block_name is not None: block_names = [block_name] else: block_names = self.valid_result.iterkeys() for block_name in block_names: if not self.valid_result[block_name] == (True,{}): valid = False break else: valid = True return valid def has_no_match_items(self,block_name=None): """Return true if some items are not found in dictionary""" if block_name is not None: block_names = [block_name] else: block_names = self.no_matches.iter_keys() for block_name in block_names: if self.no_matches[block_name]: has_no_match_items = True break else: has_no_match_items = False return has_no_match_items def Validate(ciffile,dic = "", diclist=[],mergemode="replace",isdic=False): """Validate the `ciffile` conforms to the definitions in `CifDic` object `dic`, or if `dic` is missing, to the results of merging the `CifDic` objects in `diclist` according to `mergemode`. Flag `isdic` indicates that `ciffile` is a CIF dictionary meaning that save frames should be accessed for validation and that mandatory_category should be interpreted differently for DDL2.""" if not isinstance(ciffile,CifFile): check_file = CifFile(ciffile) else: check_file = ciffile if not dic: fulldic = merge_dic(diclist,mergemode) else: fulldic = dic no_matches = {} valid_result = {} if isdic: #assume one block only check_file.scoping = 'instance' #only data blocks visible top_level = check_file.keys()[0] check_file.scoping = 'dictionary' #all blocks visible # collect a list of parents for speed if fulldic.diclang == 'DDL2': poss_parents = fulldic.get_all("_item_linked.parent_name") for parent in poss_parents: curr_parent = listify(check_file.get(parent,[])) new_vals = check_file.get_all(parent) new_vals.extend(curr_parent) if len(new_vals)>0: check_file[parent] = new_vals print("Added %s (len %d)" % (parent,len(check_file[parent]))) # now run the validations for block in check_file.keys(): if isdic and block == top_level: block_scope = 'Dictionary' elif isdic: block_scope = 'Item' else: block_scope = 'Datablock' no_matches[block] = [a for a in check_file[block].keys() if a not in fulldic] # remove non-matching items print("Not matched: " + repr(no_matches[block])) for nogood in no_matches[block]: del check_file[block][nogood] print("Validating block %s, scope %s" % (block,block_scope)) valid_result[block] = run_data_checks(check_file[block],fulldic,block_scope=block_scope) return valid_result,no_matches def validate_report(val_result,use_html=False): valid_result,no_matches = val_result outstr = StringIO() if use_html: outstr.write("

Validation results

") else: outstr.write( "Validation results\n") outstr.write( "------------------\n") if len(valid_result) > 10: suppress_valid = True #don't clutter with valid messages if use_html: outstr.write("

For brevity, valid blocks are not reported in the output.

") else: suppress_valid = False for block in valid_result.keys(): block_result = valid_result[block] if block_result[0]: out_line = "Block '%s' is VALID" % block else: out_line = "Block '%s' is INVALID" % block if use_html: if (block_result[0] and (not suppress_valid or len(no_matches[block])>0)) or not block_result[0]: outstr.write( "

%s

" % out_line) else: outstr.write( "\n %s\n" % out_line) if len(no_matches[block])!= 0: if use_html: outstr.write( "

The following items were not found in the dictionary") outstr.write(" (note that this does not invalidate the data block):

") outstr.write("

\n") [outstr.write("" % it) for it in no_matches[block]] outstr.write("
%s
\n") else: outstr.write( "\n The following items were not found in the dictionary:\n") outstr.write("Note that this does not invalidate the data block\n") [outstr.write("%s\n" % it) for it in no_matches[block]] # now organise our results by type of error, not data item... error_type_dic = {} for error_item, error_list in block_result[1].items(): for func_name,bad_result in error_list: bad_result.update({"item_name":error_item}) try: error_type_dic[func_name].append(bad_result) except KeyError: error_type_dic[func_name] = [bad_result] # make a table of test name, test message info_table = {\ 'validate_item_type':\ "The following data items had badly formed values", 'validate_item_esd':\ "The following data items should not have esds appended", 'validate_enum_range':\ "The following data items have values outside permitted range", 'validate_item_enum':\ "The following data items have values outside permitted set", 'validate_looping':\ "The following data items violate looping constraints", 'validate_loop_membership':\ "The following looped data names are of different categories to the first looped data name", 'validate_loop_key':\ "A required dataname for this category is missing from the loop\n containing the dataname", 'validate_loop_key_ddlm':\ "A loop key is missing for the category containing the dataname", 'validate_loop_references':\ "A dataname required by the item is missing from the loop", 'validate_parent':\ "A parent dataname is missing or contains different values", 'validate_child':\ "A child dataname contains different values to the parent", 'validate_uniqueness':\ "One or more data items do not take unique values", 'validate_dependents':\ "A dataname required by the item is missing from the data block", 'validate_exclusion': \ "Both dataname and exclusive alternates or aliases are present in data block", 'validate_mandatory_category':\ "A required category is missing from this block", 'check_mandatory_items':\ "A required data attribute is missing from this block", 'check_prohibited_items':\ "A prohibited data attribute is present in this block"} for test_name,test_results in error_type_dic.items(): if use_html: outstr.write(html_error_report(test_name,info_table[test_name],test_results)) else: outstr.write(error_report(test_name,info_table[test_name],test_results)) outstr.write("\n\n") return outstr.getvalue() # A function to lay out a single error report. We are passed # the name of the error (one of our validation functions), the # explanation to print out, and a dictionary with the error # information. We print no more than 50 characters of the item def error_report(error_name,error_explanation,error_dics): retstring = "\n\n " + error_explanation + ":\n\n" headstring = "%-32s" % "Item name" bodystring = "" if "bad_values" in error_dics[0]: headstring += "%-20s" % "Bad value(s)" if "bad_items" in error_dics[0]: headstring += "%-20s" % "Bad dataname(s)" if "child" in error_dics[0]: headstring += "%-20s" % "Child" if "parent" in error_dics[0]: headstring += "%-20s" % "Parent" headstring +="\n" for error in error_dics: bodystring += "\n%-32s" % error["item_name"] if "bad_values" in error: out_vals = [repr(a)[:50] for a in error["bad_values"]] bodystring += "%-20s" % out_vals if "bad_items" in error: bodystring += "%-20s" % repr(error["bad_items"]) if "child" in error: bodystring += "%-20s" % repr(error["child"]) if "parent" in error: bodystring += "%-20s" % repr(error["parent"]) return retstring + headstring + bodystring # This lays out an HTML error report def html_error_report(error_name,error_explanation,error_dics,annotate=[]): retstring = "

" + error_explanation + ":

" retstring = retstring + "" headstring = "" bodystring = "" if "bad_values" in error_dics[0]: headstring += "" if "bad_items" in error_dics[0]: headstring += "" if "child" in error_dics[0]: headstring += "" if "parent" in error_dics[0]: headstring += "" headstring +="\n" for error in error_dics: bodystring += "" % error["item_name"] if "bad_values" in error: bodystring += "" % error["bad_values"] if "bad_items" in error: bodystring += "" % error["bad_items"] if "child" in error: bodystring += "" % error["child"] if "parent" in error: bodystring += "" % error["parent"] bodystring += "\n" return retstring + headstring + bodystring + "
Item nameBad value(s)Bad dataname(s)ChildParent
%s%s%s%s%s
\n" def run_data_checks(check_block,fulldic,block_scope='Item'): v_result = {} for key in check_block.keys(): update_value(v_result, fulldic.run_item_validation(key,check_block[key])) update_value(v_result, fulldic.run_global_validation(key,check_block[key],check_block)) for loopnames in check_block.loops.values(): update_value(v_result, fulldic.run_loop_validation(loopnames)) update_value(v_result,fulldic.run_block_validation(check_block,block_scope=block_scope)) # return false and list of baddies if anything didn't match all_keys = list(v_result.keys()) for test_key in all_keys: v_result[test_key] = [a for a in v_result[test_key] if a[1]["result"]==False] if len(v_result[test_key]) == 0: del v_result[test_key] # if even one false one is found, this should trigger # print("Baddies: {!r}".format(v_result)) isvalid = len(v_result)==0 return isvalid,v_result def get_number_with_esd(numstring): numb_re = '((-?(([0-9]*[.]([0-9]+))|([0-9]+)[.]?))([(][0-9]+[)])?([eEdD][+-]?[0-9]+)?)|(\?)|(\.)' our_match = re.match(numb_re,numstring) if our_match: a,base_num,b,c,dad,dbd,esd,exp,q,dot = our_match.groups() # print("Debug: {} -> {!r}".format(numstring, our_match.groups())) else: return None,None if dot or q: return None,None #a dot or question mark if exp: #has exponent exp = exp.replace("d","e") # mop up old fashioned numbers exp = exp.replace("D","e") base_num = base_num + exp # print("Debug: have %s for base_num from %s" % (base_num,numstring)) base_num = float(base_num) # work out esd, if present. if esd: esd = float(esd[1:-1]) # no brackets if dad: # decimal point + digits esd = esd * (10 ** (-1* len(dad))) if exp: esd = esd * (10 ** (float(exp[1:]))) return base_num,esd def float_with_esd(inval): if isinstance(inval,unicode): j = inval.find("(") if j>=0: return float(inval[:j]) return float(inval) def convert_type(definition): """Convert value to have the type given by definition""" #extract the actual required type information container = definition['_type.container'] dimension = definition.get('_type.dimension',StarFile.StarList([])) structure = interpret_structure(definition['_type.contents']) if container == 'Single': #a single value to convert return convert_single_value(structure) elif container == 'List': #lots of the same value return convert_list_values(structure,dimension) elif container == 'Multiple': #no idea return None elif container in ('Array','Matrix'): #numpy array return convert_matrix_values(structure) return lambda a:a #unable to convert def convert_single_value(type_spec): """Convert a single item according to type_spec""" if type_spec == 'Real': return float_with_esd if type_spec in ('Count','Integer','Index','Binary','Hexadecimal','Octal'): return int if type_spec == 'Complex': return complex if type_spec == 'Imag': return lambda a:complex(0,a) if type_spec in ('Code','Name','Tag'): #case-insensitive -> lowercase return lambda a:a.lower() return lambda a:a #can't do anything numeric class convert_simple_list(object): """\ Callable object that converts values in a simple list according to the specified element structure. """ def __init__(self, structure): self.converters = [convert_single_value(tp) for tp in structure] return def __call__(self, element): if len(element) != len(self.converters): emsg = "Expected iterable of %i values, got %i." % ( (len(self.converters), len(element))) raise ValueError(emsg) rv = [f(e) for f, e in zip(self.converters, element)] return rv # End of class convert_single_value def convert_list_values(structure, dimension): """Convert the values according to the element structure given in [[structure]]""" # simple repetition if isinstance(structure, (unicode, str)): fcnv = convert_single_value(structure) # assume structure is a list of types else: fcnv = convert_simple_list(structure) rv = fcnv # setup nested conversion function when dimension differs from 1. if len(dimension) > 0 and int(dimension[0]) != 1: rv = lambda args : [fcnv(a) for a in args] return rv def convert_matrix_values(valtype): """Convert a dREL String or Float valued List structure to a numpy matrix structure""" # first convert to numpy array, then let numpy do the work try: import numpy except ImportError: return lambda a:a #cannot do it if valtype == 'Real': dtype = float elif valtype == 'Integer': dtype = int elif valtype == 'Complex': dtype = complex else: raise ValueError('Unknown matrix value type') fcnv = lambda a : numpy.asarray(a, dtype=dtype) return fcnv def interpret_structure(struc_spec): """Interpret a DDLm structure specification""" from . import TypeContentsParser as t p = t.TypeParser(t.TypeParserScanner(struc_spec)) return getattr(p,"input")() # A utility function to append to item values rather than replace them def update_value(base_dict,new_items): for new_key in new_items.keys(): if new_key in base_dict: base_dict[new_key].extend(new_items[new_key]) else: base_dict[new_key] = new_items[new_key] #Transpose the list of lists passed to us def transpose(base_list): new_lofl = [] full_length = len(base_list) opt_range = range(full_length) for i in range(len(base_list[0])): new_packet = [] for j in opt_range: new_packet.append(base_list[j][i]) new_lofl.append(new_packet) return new_lofl # listify strings - used surprisingly often def listify(item): if isinstance(item,(unicode,str)): return [item] else: return item # given a list of search items, return a list of items # actually contained in the given data block def filter_present(namelist,datablocknames): return [a for a in namelist if a in datablocknames] # Make an item immutable, used if we want a list to be a key def make_immutable(values): """Turn list of StarList values into a list of immutable items""" if not isinstance(values[0],StarList): return values else: return [tuple(a) for a in values] # merge ddl dictionaries. We should be passed filenames or CifFile # objects def merge_dic(diclist,mergemode="replace",ddlspec=None): dic_as_cif_list = [] for dic in diclist: if not isinstance(dic,CifFile) and \ not isinstance(dic,(unicode,str)): raise TypeError("Require list of CifFile names/objects for dictionary merging") if not isinstance(dic,CifFile): dic_as_cif_list.append(CifFile(dic)) else: dic_as_cif_list.append(dic) # we now merge left to right basedic = dic_as_cif_list[0] if "on_this_dictionary" in basedic: #DDL1 style only for dic in dic_as_cif_list[1:]: basedic.merge(dic,mode=mergemode,match_att=["_name"]) elif len(basedic.keys()) == 1: #One block: DDL2/m style old_block = basedic[basedic.keys()[0]] for dic in dic_as_cif_list[1:]: new_block = dic[dic.keys()[0]] basedic.merge(dic,mode=mergemode, single_block=[basedic.keys()[0],dic.keys()[0]], match_att=["_item.name"],match_function=find_parent) return CifDic(basedic) def find_parent(ddl2_def): if "_item.name" not in ddl2_def: return None if isinstance(ddl2_def["_item.name"],unicode): return ddl2_def["_item.name"] if "_item_linked.child_name" not in ddl2_def: raise CifError("Asked to find parent in block with no child_names") if "_item_linked.parent_name" not in ddl2_def: raise CifError("Asked to find parent in block with no parent_names") result = list([a for a in ddl2_def["_item.name"] if a not in ddl2_def["_item_linked.child_name"]]) if len(result)>1 or len(result)==0: raise CifError("Unable to find single unique parent data item") return result[0] def ReadCif(filename,grammar='auto',scantype='standard',scoping='instance',standard='CIF', permissive=False): """ Read in a CIF file, returning a `CifFile` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `grammar` chooses the CIF grammar variant. `1.0` is the original 1992 grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for properly-formed CIF2.0 files. Note that only Unicode characters in the basic multilingual plane are recognised (this will be fixed when PyCIFRW is ported to Python 3). * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines, but is not available for CIF2/STAR2 files. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument (and does not yet support CIF2). * `scoping` is only relevant where nested save frames are expected (STAR2 only). `instance` scoping makes nested save frames invisible outside their hierarchy, allowing duplicate save frame names in separate hierarchies. `dictionary` scoping makes all save frames within a data block visible to each other, thereby restricting all save frames to have unique names. Currently the only recognised value for `standard` is `CIF`, which when set enforces a maximum length of 75 characters for datanames and has no other effect. """ finalcif = CifFile(scoping=scoping,standard=standard) return StarFile.ReadStar(filename,prepared=finalcif,grammar=grammar,scantype=scantype, permissive=permissive) #return StarFile.StarFile(filename,maxlength,scantype=scantype,grammar=grammar,**kwargs) class CifLoopBlock(StarFile.LoopBlock): def __init__(self,data=(),**kwargs): super(CifLoopBlock,self).__init__(data,**kwargs) #No documentation flags pycifrw-4.4/src/Makefile000066400000000000000000000033631345362224200153030ustar00rootroot00000000000000# Makefile for Python based Cif handling modules # Customizable variables: # # Instance of Python to be used for the yapps2.py script. PYTHON = python2 package: CifFile_module.py StarFile.py Parsers documentation # %.py : %.nw notangle $< > $@ # documentation: CifFile_module.nw YappsStarParser.nw StarFile.nw noweave -html -index -filter l2h CifFile_module.nw > CifFile.html noweave -html -index -filter l2h StarFile.nw > StarFile.html noweave -html -index -filter l2h YappsStarParser.nw > YappsStarParser.html noweave -html -index -filter l2h TypeContentsParser.nw > TypeContentsParser.html # Parsers: YappsStarParser_STAR2.py YappsStarParser_1_1.py YappsStarParser_1_0.py TypeContentsParser.py \ YappsStarParser_2_0.py # clean: rm -f *.pyc *.g rm -f YappsStarParser_*.py TypeContentsParser.py # Local helper variables: # # Command to execute the yapps2.py script. Note it sets the PYTHONPATH # environment variable as yapps requires its runtime to run. YAPPS2CMD = PYTHONPATH=. $(PYTHON) ./yapps3/yapps2.py YappsStarParser_1_0.py: YappsStarParser.nw notangle -R1.0_syntax YappsStarParser.nw > YappsStarParser_1_0.g $(YAPPS2CMD) YappsStarParser_1_0.g # YappsStarParser_1_1.py: YappsStarParser.nw notangle -R1.1_syntax YappsStarParser.nw > YappsStarParser_1_1.g $(YAPPS2CMD) YappsStarParser_1_1.g # YappsStarParser_2_0.py: YappsStarParser.nw notangle -RCIF2_syntax YappsStarParser.nw > YappsStarParser_2_0.g $(YAPPS2CMD) YappsStarParser_2_0.g # YappsStarParser_STAR2.py: YappsStarParser.nw notangle -RSTAR2_syntax YappsStarParser.nw > YappsStarParser_STAR2.g $(YAPPS2CMD) YappsStarParser_STAR2.g # TypeContentsParser.py: TypeContentsParser.nw notangle -RTypeContents_syntax TypeContentsParser.nw > TypeContentsParser.g $(YAPPS2CMD) TypeContentsParser.g pycifrw-4.4/src/Programs/000077500000000000000000000000001345362224200154305ustar00rootroot00000000000000pycifrw-4.4/src/Programs/README000066400000000000000000000013471345362224200163150ustar00rootroot00000000000000This directory contains small example programs showing some uses of PyCIFRW. Of interest: validate_cif.py Validate against DDL1/DDL2 dictionaries. For example: python validate_cif.py ../drel/testing/data/nacltest.cif -c -n cif_core.dic -u 2.3.1 will validate nacltest.cif using the information in cif_core.dic as distributed by the IUCr ftp site. loop_example.py Small program demonstrating how to pick particular columns from a loop add_spoke.py Add a hub-spoke structure to a CIF file by creating a 'hub' category and keys in spoke categories that refer to the hub category. output_asciidoc.py Convert a dictionary into an Ascii Doc file that can then be posted on the web. pycifrw-4.4/src/Programs/Web.README000066400000000000000000000011501345362224200170210ustar00rootroot00000000000000This directory, apart from a few example programs, also contains a simple web-server for validation. To install, first install PyCifRW. Following this: 1. install validate-process.py into your web server's CGI directory and give it the appropriate permissions. 2. install cifv_server.html into your server's tree in an appropriate place (this file is simply the form for entry of user data). 3. Put all CIF dictionaries into directory /usr/local/lib/cif. For correct naming, check the HTML source in cifv_server.html. Note that this web server has not been tested for almost a decade but should work. pycifrw-4.4/src/Programs/add_key_names.py000066400000000000000000000017551345362224200205750ustar00rootroot00000000000000# A program to add key names to CIF categories that # only have _category.key_id from CifFile import CifDic def add_keynames(inname): """ Add '_category_key.name' loop to all categories that only have '_category.key_id' """ outname = inname + ".out" indic = CifDic(inname,grammar="2.0",do_minimum=True) cats = [k for k in indic.keys() if indic[k].get("_definition.scope","Item") == "Category"] cats = [k for k in cats if indic[k].get("_definition.class","Set") == "Loop"] bad_cats = [c for c in cats if len(indic[c].get('_category_key.name',[])) == 0] for b in bad_cats: print 'Adding to category %s'% b indic[b]['_category_key.name'] = [indic[b]['_category.key_id']] indic[b].CreateLoop(['_category_key.name']) indic.SetTemplate("dic_template.dic") outtext = indic.WriteOut() p = open(outname,"w") p.write(outtext) p.close() if __name__=="__main__": import sys dicname = sys.argv[1] add_keynames(dicname) pycifrw-4.4/src/Programs/add_spoke.py000066400000000000000000000075631345362224200177460ustar00rootroot00000000000000# This program adds a dataname to every category that is a child # dataname of the nominated category for the provided dictionary # All spoke categories are then linked via the hub category. from __future__ import print_function from CifFile import CifDic def update_one_category(dic,cat_name,hub_id,obj_id,extra_text=""): """Add a dataname to category cat_name which is a child_item of category hub_id and an additional key for cat_name""" blockname = dic.block_from_catname(cat_name) hub_blockname = dic.block_from_catname(hub_id) hub_keyname = dic[hub_blockname]['_category_key.name'][0] def_text = "This dataname indicates which member of the " + hub_id.upper() def_text += \ """ category a member of this category belongs to. It does not need to be explicitly included in datafiles if the key for the """ + hub_id + \ """ category takes a single (or default) value. """ + extra_text new_bname = dic.add_definition(obj_id,blockname,def_text) dic[new_bname]['_name.linked_item_id'] = hub_keyname dic[new_bname]['_type.purpose'] = 'Link' # now update the category information as well current_keys = dic[blockname].get('_category_key.name',[]) dic[blockname]['_category_key.name'] = current_keys + ["_"+new_bname] dic[blockname].CreateLoop(['_category_key.name']) return new_bname def add_spoke(sem_dic,hub_cat,specific_cats = set(),extra_text=''): all_cats = set([a.get('_definition.id','').lower() for a in sem_dic if a.get('_definition.scope','Item'=='Category') and a.get('_definition.class','') != 'Head']) if specific_cats is None: specific_cats = all_cats actual_cats = (all_cats & set(specific_cats)) - set([hub_cat]) print('Will update categories in following list') print(repr(actual_cats)) for one_cat in actual_cats: print('Updating %s' % one_cat) new_bname = update_one_category(sem_dic,one_cat,hub_cat,hub_cat+'_id',extra_text) print('New definition:\n') print(str(sem_dic[new_bname])) def add_hub(indic,hub_cat,key_id,extra_text=''): """Add a hub category with hub key id """ def_text = \ """This category is a central category for logically connecting together a group of categories.""" blockname = indic.add_category(hub_cat) indic[blockname].overwrite = True indic[blockname]['_description.text'] = def_text indic[blockname]['_definition.class'] = 'Loop' indic[blockname]['_category_key.name'] = ['_'+hub_cat.lower()+"."+key_id] indic[blockname].CreateLoop(['_category_key.name']) # add the single entry for the key new_def_text = """This dataname must have a unique value for each distinct row in this category. As a default value is defined, it need not explicitly appear in datafiles unless there is more than entry in the category.""" new_def = indic.add_definition(key_id,hub_cat,new_def_text) indic[new_def]['_enumeration.default'] = "." def add_hub_spoke(indic,hub_cat,only_cats=None,force=False): """Edit to add a new hub category, with linked ids in all other categories, unless only_cats is specified""" sem_dic = CifDic(indic,grammar="2.0",do_minimum=True) if hub_cat.lower() not in sem_dic.cat_map.keys() or force==True: print(hub_cat + ' not found in dictionary, adding') add_hub(sem_dic, hub_cat, 'id') add_spoke(sem_dic,hub_cat,only_cats) outfile = indic + '.updated' outfile = open(outfile,"w") outfile.write(str(sem_dic)) return sem_dic if __name__ == '__main__': import sys if len(sys.argv)>2: indic = sys.argv[1] hub_cat = sys.argv[2].lower() if len(sys.argv)>3: all_cats = [a.lower() for a in sys.argv[3:]] else: all_cats = None new_dic = add_hub_spoke(indic,hub_cat,all_cats) else: print('Usage: add_spoke ...') pycifrw-4.4/src/Programs/cif2cell.py000066400000000000000000000051471345362224200174740ustar00rootroot00000000000000# A simple and limited example program for converting CIF files # to PowderCell .cel files, according to the specification at # http://www.ccp14.ac.uk/ccp/web-mirrors/powdcell/a_v/v_1/powder/details/powcell.htm # Limitations: # 1. Non-standard settings not handled # 2. No occupancy or isotropic displacement included # 3. Only atoms with an entry in the table below can be handled. atom_type_table = { 'H':1, 'He':2, 'C':6, 'O':8, 'Al':13, #Add more as necessary } from CifFile import CifFile, get_number_with_esd import sys gnwe = get_number_with_esd # for brevity def transform(my_cif_file): """Turn a CIF file into a CEL file""" cf = CifFile(my_cif_file).first_block() outfile = open(my_cif_file+".cel","w") # Write out the cell parameters outfile.write("CELL ") cell_parms = ['_cell_length_a', '_cell_length_b', '_cell_length_c', '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma'] #Strip off any appended SU cell_parms = [gnwe(cf[a])[0] for a in cell_parms] [outfile.write("%f " % a) for a in cell_parms] outfile.write("\n") # Write out the atomic parameters atom_loop = cf.GetLoop('_atom_site_label') for a in atom_loop: # Work out atomic number at_type = a._atom_site_type_symbol t = atom_type_table.get(at_type) if t is None: print "Error: do not know atomic number for " + at_type print "Please add an entry to the table." return outfile.write("%s %d %f %f %f\n" % (a._atom_site_label, t, gnwe(a._atom_site_fract_x)[0], gnwe(a._atom_site_fract_y)[0], gnwe(a._atom_site_fract_z)[0])) # Write out the space group information outfile.write("RGNR ") sg = cf.get('_space_group_IT_number', cf.get('_symmetry_Int_Tables_number')) if sg is None: sgname = cf.get('_space_group_name_H-M', cf.get('_symmetry_space_group_name_H-M','?')) print """No space group number in file, you will have to insert the number by hand for space group """ + sgname print "Software will put xxx instead" outfile.write("xxx") else: outfile.write("%s" % sg) outfile.write("\n") outfile.close() if __name__ == '__main__': if len(sys.argv)< 2: print "Usage: cif2cel " else: transform(sys.argv[1]) pycifrw-4.4/src/Programs/cifv_server.html000066400000000000000000000051011345362224200206300ustar00rootroot00000000000000 CIF Validation web interface

CIF file validation server


CIF or dictionary file for validation: Is this file a DDL2 dictionary?
Dictionary/dictionaries to validate against:

*: Uses a DDL1 dictionary where the _type_construct attribute for _enumeration_range has been changed from (_sequence_minimum):((_sequence_maximum)?) to [A-Za-z0-9+%^#@-]+:[A-Za-z0-9+%^#@-]?. The as-written attribute value is not machine-readable and therefore produces a lot of validation warnings.


Background

This is an experimental web-based interface for validating CIF files using the PyCIFRW package described in J. Appl. Cryst. 39, 621---625. Using this interface, you should be able to:

  • Validate a DDL1-style data CIF against the listed DDL1 dictionaries;
  • Validate a DDL2-style data CIF against the listed DDL2 dictionaries;
  • Validate a DDL1-style data CIF against the listed DDL2 dictionaries;
  • Validate a DDL1/2 CIF dictionary against an appropriate DDL1/2 dictionary;

Please note that there is a file-size limit of 100K, and that files containing syntax errors will not currently produce an informative error report.

If you are able to produce an error message using a syntactically correct CIF file, or if you disagree with an assessment of block validity, please forward your comments and the error to jrh at anbf2 dot kek dot jp. pycifrw-4.4/src/Programs/ddl.dic.html000066400000000000000000001722351345362224200176310ustar00rootroot00000000000000 DDL_DIC

This dictionary contains the definitions of attributes that make up the DDLm dictionary definition language. It provides the meta meta data for all CIF dictionaries.

Generated from ddl.dic, version 3.11.09

Categories that may have more than one row are called loop categories. Datanames making up the keys for these categories have a dot suffix.

Table of Contents

ATTRIBUTES

This category is parent of all other categories in the DDLm dictionary.

ALIAS

The attributes used to specify the aliased names of definitions.

_alias.definition_id•

Identifier tag of an aliased definition.

_alias.deprecation_date

Date that the aliased tag was deprecated as a definition tag.

_alias.dictionary_uri

Dictionary URI in which the aliased definition belongs.

CATEGORY

The attributes used to specify the properties of a "category" of data items.

_category.key_id

Tag of a single data item in a Loop category which is the generic key to access other items in the category. The value of this item must be unique in order to provide unambiguous access to a packet (row) in the table of values. This may be assumed to be a function of the datanames listed in category_key.name.

CATEGORY_KEY

The attributes used to specify (possibly multiple) keys for a given category.

_category_key.name•

A minimal list of tag(s) that together constitute a compound key to access other items in a Loop category. In other words, the combined values of the datanames listed in this loop must be unique, so that unambiguous access to a packet (row) in the table of values is possible. The dataname associated with category.key_id is only included in this loop if no other set of datanames can form a compound key.

DEFINITION

The attributes for classifying dictionary definitions.

_definition.class

The nature and the function of a definition or definitions.

Possible values(default in bold):
Attribute

Item used as an attribute in the definition of other data items in DDLm dictionaries. These items never appear in data instance files.

Functions

Category of items that are transient function definitions used only in dREL methods scripts. These items never appear in data instance files.

Datum

Item defined in a domain-specific dictionary. These items appear only in data instance files.

Head

Category of items that is the parent of all other categories in the dictionary.

Loop

Category of items that in a data file must reside in a loop-list with a key item defined.

Set

Category of items that form a set (but not a loopable list). These items may be referenced as a class of items in a dREL methods expression.

Ref-loop

A category containing one item that identifies the a category of items that is repeated in a sequence of save frames. The item, which is specifies as a as a Ref-table value (see type.container), is looped. This construction is for loop categories that contain child categories. If in the instance file, the child items have only one set of values, the Ref-loop item need not be used and child items need not be placed in a save frame.

_definition.id

Identifier name of the Item or Category definition contained within a save frame.

_definition.scope

The extent to which a definition affects other definitions.

Possible values(default in bold):
Dictionary

applies to all defined items in the dictionary

Category

applies to all defined items in the category

Item

applies to a single item definition

_definition.update

The date that a definition was last changed.

_definition.xref_code

Code identifying the equivalent definition in the dictionary referenced by the DICTIONARY_XREF attributes.

DESCRIPTION

The attributes of descriptive (non-machine parseable) parts of definitions.

_description.common

Commonly-used identifying name for the item.

_description.key_words

List of key-words categorising the item.

_description.text

The text description of the defined item.

DESCRIPTION_EXAMPLE

The attributes of descriptive (non-machine parseable) examples of values of the defined items.

_description_example.case•

An example case of the defined item.

_description_example.detail

A description of an example case for the defined item.

DICTIONARY

Attributes for identifying and registering the dictionary. The items in this category are NOT used as attributes of INDIVIDUAL data items.

_dictionary.class

The nature, or field of interest, of data items defined in the dictionary.

Possible values(default in bold):
Reference

DDLm reference attribute definitions

Instance

domain-specific data instance definitions

Template

domain-specific attribute/enumeration templates

Function

domain-specific method function scripts

_dictionary.date

The date that the last dictionary revision took place.

_dictionary.ddl_conformance

The version number of the DDL dictionary that this dictionary conforms to.

_dictionary.namespace

The namespace code that may be prefixed (with a trailing colon ":") to an item tag defined in the defining dictionary when used in particular applications. Because tags must be unique, namespace codes are unlikely to be used in data files.

_dictionary.title

The common title of the dictionary. Will usually match the name attached to the data_ statement of the dictionary file.

_dictionary.uri

The universal resource indicator of this dictionary.

_dictionary.version

A unique version identifier for the dictionary.

DICTIONARY_AUDIT

Attributes for identifying and registering the dictionary. The items in this category are NOT used as attributes of individual data items.

_dictionary_audit.date

The date of each dictionary revision.

_dictionary_audit.revision

A description of the revision applied for the _dictionary_audit.version.

_dictionary_audit.version•

A unique version identifier for each revision of the dictionary.

DICTIONARY_VALID

Data items which are used to specify the contents of definitions in the dictionary in terms of the _definition.scope and the required and prohibited attributes.

_dictionary_valid.application•

Provides the information identifying the definition scope ( from the _definition.scope enumeration list) and the validity options (from the _dictionary_valid.option enumeration list), as a two element list. This list signals the validity of applying the attributes given in _dictionary_valid.attributes.

_dictionary_valid.attributes

A list of the attribute names and categories that are assessed for application in the item, category and dictionary definitions.

_dictionary_valid.option

Option codes for applicability of attributes in definitions.

Possible values(default in bold):
Mandatory

attribute must be present in definition frame

Recommended

attribute is usually in definition frame

Prohibited

attribute must not be used in definition frame

_dictionary_valid.scope

The scope to which the specified restriction on usable attributes applies.

Possible values:
Dictionary

restriction applies to dictionary definition data frame

Category

restriction applies to a category definition save frame

Item

restriction applies to an item definition save frame

DICTIONARY_XREF

Data items which are used to cross reference other dictionaries that have defined the same data items. Data items in this category are NOT o used as attributes of individual data items.

_dictionary_xref.code•

A code identifying the cross-referenced dictionary.

_dictionary_xref.date

Date of the cross-referenced dictionary.

_dictionary_xref.format

Format of the cross referenced dictionary.

_dictionary_xref.name

The name and description of the cross-referenced dictionary.

_dictionary_xref.uri

The source URI of the cross referenced dictionary data.

ENUMERATION

The attributes for restricting the values of defined data items.

_enumeration.def_index_id

Specifies the data name with a value used as an index to the DEFAULT enumeration list (in category enumeration_default) in order to select the default enumeration value for the defined item. The value of the identified data item must match one of the _enumeration_default.index values.

_enumeration.default

The default value for the defined item if it is not specified explicitly.

_enumeration.mandatory

Yes or No flag on whether the enumerate states specified for an item in the current definition (in which item appears) MUST be used on instantiation.

Possible values(default in bold):
Yes

Use of state is mandatory

No

Use of state is unnecessary

_enumeration.range

The inclusive range of values "from:to" allowed for the defined item.

ENUMERATION_DEFAULT

Loop of pre-determined default enumeration values indexed to a data item by the item _enumeration.def_index_id.

_enumeration_default.index•

Index key in the list default values referenced to by the value of _enumeration.def_index_id .

_enumeration_default.value

Default enumeration value in the list referenced by the value of _enumeration.def_index_id. The reference index key is given by the value of _enumeration_default.index value.

ENUMERATION_SET

Attributes of data items which are used to define a set of unique pre-determined values.

_enumeration_set.detail

The meaning of the code (identified by _enumeration_set.state) in terms of the value of the quantity it describes.

_enumeration_set.state•

Permitted value state for the defined item.

_enumeration_set.xref_code

Identity of the equivalent item in the dictionary referenced by the DICTIONARY_XREF attributes.

_enumeration_set.xref_dictionary

Code identifying the dictionary in the DICTIONARY_XREF list.

IMPORT

Used to import the values of specific attributes from other dictionary definitions within and without the current dictionary.

_import.get

A list of tables of attributes defined individually in the category IMPORT_DETAILS, used to import definitions from other dictionaries.

IMPORT_DETAILS

Items in IMPORT_DETAILS describe individual attributes of an import operation.

_import_details.file_id

The file name/URI of the source dictionary

_import_details.frame_id

The framecode of the definition frame to be imported.

_import_details.if_dupl

Code identifying the action taken if the requested definition block already exists within the importing dictionary.

Possible values(default in bold):
Ignore

ignore imported definitions if id conflict

Replace

replace existing with imported definitions

Exit

issue error exception and exit

_import_details.if_miss

Code identifying the action taken if the requested definition block is missing from the source dictionary.

Possible values(default in bold):
Ignore

ignore import

Exit

issue error exception and exit

_import_details.mode

Code identifying how a definition save frame is to be imported. "Full" imports the entire definition frame including the leading and trailing save statements. "Contents" imports only the lines within the save frame.

Possible values(default in bold):
Full

import requested definition with frame

Contents

import contents of requested defn frame

_import_details.order•

The order in which the import described by the referenced row should be executed.

_import_details.single

A Table mapping attributes defined individually in category IMPORT to their values; used to import definitions from other dictionaries.

_import_details.single_index

One of the indices permitted in the entries of values of attribute _import_details.single.

Possible values:
file

filename/URI of source dictionary

save

save framecode of source definition

mode

mode for including save frames

dupl

option for duplicate entries

miss

option for missing duplicate entries

LOOP

Attributes for looped lists.

_loop.level

Specifies the level of the loop structure in which a defined item must reside if it used in a looped list.

METHOD

Methods used for evaluating, validating and defining items.

_method.expression

The method expression for the defined item.

_method.purpose•

The purpose and scope of the method expression.

Possible values(default in bold):
Evaluation

method evaluates an item from related item values

Definition

method generates attribute value(s) in the definition

Validation

method compares an evaluation with existing item value

NAME

Attributes for identifying items and item categories.

_name.category_id

The name of the category in which a category or item resides.

_name.linked_item_id

Dataname of an equivalent item in another category which has a common set of values, or, in the definition of a type Su item is the name of the associated Measurement item to which the standard uncertainty applies.

_name.object_id

The object name of a category or name unique within the category or family of categories.

TYPE

Attributes which specify the typing of data items.

_type.container

The CONTAINER type of the defined data item value.

Possible values(default in bold):
Single

single value

Multiple

values as List or by boolean ,|&!* or range : ops

List

ordered set of values bounded by [] and separated by commas. Elements need not be of same contents type.

Array

ordered set of numerical values bounded by [] and separated by commas. Operations across arrays are equivalent to operations across elements of the Array.

Matrix

ordered set of numerical values for a tensor bounded by [] and separated by commas. Tensor operations such as dot and cross products, are valid cross matrix objects.

Table

id:value elements bounded by {}; separated by commas

Ref-table

a STAR construction with key:value elements bounded by ${..}$ and separated by commas. The id tags below are privileged and optional. source - filename or URI block - data blockname frame - framecode or [framecode,framecode,..] item - dataname or [dataname,dataname,..] key - key value if item is in a list

_type.contents

Syntax of the value elements within the container type. This may be a single enumerated code, or, in the case of a list, a comma-delimited sequence of codes, or, if there are alternate types, a boolean-linked (or range) sequence of codes. The typing of elements is determined by the replication of the minimum set of states declared. Where the definition is of a Table container this attribute describes the construction of the value elements within those (Table) values.

Possible values(default in bold):
Text

case-sens strings or lines of STAR characters

Code

case-insens contig. string of STAR characters

Name

case-insens contig. string of alpha-num chars or underscore

Tag

case-insens contig. STAR string with leading underscore

Filename

case-sens string indentifying an external file

Uri

case-sens string as universal resource indicator of a file

Date

ISO standard date format <yyyy>-<mm>-<dd>

Version

version digit string of the form <major>.<version>.<update>

Dimension

integer limits of an Array/Matrix/List in square brackets

Range

inclusive range of numerical values min:max

Count

unsigned integer number

Index

unsigned non-zero integer number

Integer

positive or negative integer number

Real

floating-point real number

Imag

floating-point imaginary number

Complex

complex number <R>+j<I>

Binary

binary number \b<N>

Hexadecimal

hexadecimal number \x<N>

Octal

octal number \o<N>

Implied

implied by the context of the attribute

ByReference

The contents have the same form as those of the attribute referenced by _type.contents_referenced_id.

_type.contents_referenced_id

The value of the _definition.id attribute of an attribute definition whose type is to be used also as the type of this item. Meaningful only when this item’s _type.contents attribute has value ByReference.

_type.dimension

The dimensions of a list or matrix of elements as a text string within bounding square brackets.

_type.indices

Used to specify the syntax construction of indices of the entries in the defined object when the defined object has Table as its _type.container attribute. Values are a subset of the codes and constructions defined for attribute _type.contents, accounting for the fact that syntactically, indices are always case-sensitive quoted strings.

Meaningful only when the defined item has _type.container Table.

Possible values(default in bold):
Text

a case-sensitive string/lines of text

Filename

name of an external file

Code

code used for indexing data or referencing data resources

Date

ISO date format yyyy-mm-dd

Uri

an universal resource identifier string, per RFC 3986

Version

version digit string of the form <major>.<version>.<update>

ByReference

Indices have the same form as the contents of the attribute identified by _type.indices_referenced_id

_type.indices_referenced_id

The _definition.id attribute of a definition whose type describes the form and construction of the indices of entries in values of the present item.

Meaningful only when the defined item’s _type.container attribute has value Table, and its _type.indices attribute has value ByReference.

_type.purpose

The primary purpose or function the defined data item serves in a dictionary or a specific data instance.

Possible values(default in bold):
Import

>>> Applied ONLY in the DDLm Reference Dictionary <<< Used to type the SPECIAL attribute "_import.get" that is present in dictionaries to instigate the importation of external dictionary definitions.

Method

>>> Applied ONLY in the DDLm Reference Dictionary <<< Used to type the attribute "_method.expression" that is present in dictionary definitions to provide the text method expressing the defined item in terms of other defined items.

Audit

>>> Applied ONLY in the DDLm Reference Dictionary <<< Used to type attributes employed to record the audit definition information (creation date, update version and cross reference codes) of items, categories and files.

Identify

>>> Applied ONLY in the DDLm Reference Dictionary <<< Used to type attributes that identify an item tag (or part thereof), save frame or the URI of an external file.

Extend
  • Used to EXTEND the DDLm Reference Dictionary * Used in a definition, residing in the "extensions" save frame of a domain dictionary, to specify a new enumeration state using an Evaluation method.

Describe

Used to type items with values that are descriptive text intended for human interpretation.

Encode

Used to type items with values that are text or codes that are formatted to be machine parsible.

State

Used to type items with values that are restricted to codes present in their "enumeration_set.state" lists.

Key

Used to type an item with a value that is unique within the looped list of these items, and may be used as a reference "key" to identify a specific packet of items within the category.

Link

Used to type an item with a value that is unique within a looped list of items belonging to another category. The definition of this item must contain the attribute "_name.linked_item_id" specifying the data name of the key item for this list. The defined item represents a a foreign key linking packets in this category list to packets in another category.

Composite

Used to type items with value strings composed of separate parts. These will usually need to be separated and parsed for complete interpretation and application.

Number

Used to type items that are numerical and exact (i.e. no standard uncertainty value).

Measurand

Used to type an item with a numerically estimated value that has been recorded by measurement or derivation. This value must be accompanied by its standard uncertainty (SU) value, expressed either as: 1) appended integers, in parentheses (), at the precision of the trailing digits, or 2) a separately defined item with the same name as the measurand item but with an additional suffix _su.

SU

Used to type an item with a numerical value that is the standard uncertainty of an item with the identical name except for the suffix _su. The definition of an SU item must include the attribute "_name.linked_item_id" which explicitly identifies the associated measurand item.

Internal

Used to type items that serve only internal purposes of the dictionary in which they appear. The particular purpose served is not defined by this state.

_type.source

The origin or source of the defined data item, indicating by what recording process it has been added to the domain instance.

Possible values(default in bold):
Recorded

A value (numerical or otherwise) recorded by observation or measurement during the experimental collection of data. This item is PRIMITIVE.

Assigned

A value (numerical or otherwise) assigned as part of the data collection, analysis or modelling required for a specific domain instance. These assignments often represent a decision made that determines the course of the experiment (and therefore may be deemed PRIMITIVE) or a particular choice in the way the data was analysed (and therefore may be considered NOT PRIMITIVE).

Related

A value or tag used in the construction of looped lists of data. Typically identifying an item whose unique value is the reference key for a loop category and/or an item which as values in common with those of another loop category and is considered a Link between these lists.

Derived

A quantity derived from other data items within the domain instance. This item is NOT PRIMITIVE.

UNITS

The attributes for specifying units of measure.

_units.code

A code which identifies the units of measurement.


pycifrw-4.4/src/Programs/loop_example.cif000066400000000000000000000010551345362224200206000ustar00rootroot00000000000000# An example cif file: we wish to output only a subset of # the following items. # data_some_stuff loop_ _atom_site_label _atom_site_type_symbol _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_U_iso_or_equiv _atom_site_adp_type _atom_site_occupancy _atom_site_symmetry_multiplicity _atom_site_calc_flag _atom_site_refinement_flags _atom_site_disorder_assembly _atom_site_disorder_group Ge1 Ge 0.44769(7) 0.92488(4) 0.20378(2) 0.01230(15) Uani 1 1 d . . . Ge2 Ge 0.55025(7) 1.22809(4) 0.17280(2) 0.01193(15) Uani 1 1 d . . . pycifrw-4.4/src/Programs/loop_example.py000066400000000000000000000023611345362224200204700ustar00rootroot00000000000000# # An example of how to output a subset of looped items. # from __future__ import print_function import sys from CifFile import CifFile, CifBlock cf = CifFile("loop_example.cif")["some_stuff"] # open and parse our cif, #we want data block named "some_stuff". # --- Optional section # Check that all our data # items exist before attempting to access them needed_items = [ "_atom_site_label", "_atom_site_type_symbol", "_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z"] loopkeys = cf.GetLoopNames("_atom_site_label") #get co-looped names if len(filter(lambda a,b=loopkeys:a not in b, needed_items)) != 0: print("Error: one or more items missing from atom_site_label loop") sys.exit() # # ----- End of optional section nb = CifBlock() # create a new block map(lambda a:nb.AddItem(a,cf[a]),needed_items) #set new values nb.CreateLoop(needed_items) # create the loop df = CifFile() # create a new cif object df.NewBlock("changed",nb) # and add our new block outfile = open("updated.cif",'w') #open a file to write to outfile.write (df.WriteOut(comment="# This file has been updated")) pycifrw-4.4/src/Programs/output_asciidoc.py000066400000000000000000000220611345362224200212010ustar00rootroot00000000000000# A program to produce an Ascii-doc ready presentation of a CIF dictionary # from CifFile import CifDic,CifFile import cStringIO,os,re def make_asciidoc(indic): """Make a nice asciidoc document from a CIF dictionary""" template_files = {} base_directory = os.path.dirname(indic) #for later sem_dic = CifDic(indic,grammar="2.0",do_imports="Contents",do_dREL=False) dep_dics = analyse_deps(sem_dic,in_directory=base_directory) blockorder = sem_dic.get_full_child_list() outstring = cStringIO.StringIO() categories = sem_dic.get_categories() # The first block is the one with the dictionary information top_block = sem_dic.master_block dicname = top_block['_dictionary.title'] dicversion = top_block['_dictionary.version'] dicdate = top_block['_dictionary.date'] dicdes = prepare_text(top_block['_description.text']) outstring.write(":toc-placement: manual\n") outstring.write(":toc:\n") outstring.write("= " + dicname + "\n") outstring.write(":date: " + dicdate + "\n") outstring.write(":version: " + dicversion + "\n\n") outstring.write(dicdes + "\n\n") if len(dep_dics)>0: outstring.write("This dictionary builds on definitions found in ") for pos,one_dic in enumerate(dep_dics): outstring.write(one_dic.master_block["_dictionary.title"]) if pos < len(dep_dics)-2: outstring.write(", ") elif pos == len(dep_dics)-2: outstring.write(" and ") else: outstring.write(".") outstring.write("\n\n") outstring.write("Generated from %s, version {version}\n\n" % indic) outstring.write(""" Categories that may have more than one row are called loop categories. Datanames making up the keys for these categories have a dot suffix.""") outstring.write("\n\n"); outstring.write("toc::[]\n") #only works if toc2 removed # store a list of ids for us to reference later section_ids = map(make_id,blockorder[1:]) for adef in blockorder[1:]: onedef = sem_dic[adef] def_type = onedef.get('_definition.scope','Item') def_text = onedef.get('_description.text','Empty') out_def_header = def_header = onedef.get('_definition.id','None') def_id = make_id(def_header) aliases = onedef.get('_alias.definition_id',[]) cat_id = onedef.get('_name.category_id',None) if def_header in aliases: aliases.remove(def_header) alternates = onedef.get('_enumeration_set.state',[]) alt_defs = [prepare_text(a,section_ids) for a in onedef.get('_enumeration_set.detail',[])] if len(alt_defs) != len(alternates): # definitions omitted as obvious? alt_defs = [None]*len(alternates) if def_type == 'Category': outstring.write("[[%s]]\n== " % def_id) current_cat = def_header out_def_header = def_header.upper() elif def_type == 'Item': if cat_id.lower() != current_cat.lower(): #from an external source # find external source dic_source = locate_category(dep_dics,cat_id) outstring.write("\n== " + cat_id.upper() + " (Original category from: %s) \n" % dic_source) outstring.write("This category is defined in external dictionary %s\n" % dic_source) current_cat = cat_id outstring.write("[[%s]]\n=== " % def_id) if is_key(sem_dic,def_header): out_def_header = def_header + r"•" else: continue outstring.write(out_def_header + "\n") if def_text == 'Empty': #check for template def_text = get_template_text(onedef,indic,template_files) else: def_text = prepare_text(def_text,section_ids) outstring.write("\n"+def_text+"\n") # further information for item definitions if len(alternates)>0: # check for default default = onedef.get('_enumeration.default',None) outstring.write("\n.Possible values") if default is not None: outstring.write('(default in bold)') outstring.write(":\n\n[horizontal]\n*****\n") for n,d in zip(alternates,alt_defs): if n == default: outstring.write("*"+vb_text(n)+"*") else: outstring.write(vb_text(n)) outstring.write("::\n\n") if d is not None: outstring.write(d+"\n") outstring.write("*****\n") if len(aliases)>0: outstring.write("\n.Aliases\n*****\n") for a in aliases: outstring.write("`"+a+"` +\n") outstring.write("*****\n") outfile = open(indic+".adoc","w") outfile.write(outstring.getvalue()) def prepare_text(textstring,link_ids=[]): """Transform text for better presentation""" # Make sure paragraphs are not set verbatim outstring = re.sub(r"\n +",r"\n",textstring.strip()) # Try and catch some greek letters outstring = re.sub(r"\\alpha",r"α",outstring) outstring = re.sub(r"\\\\a",r"α",outstring) outstring = re.sub(r"\\a",r"α",outstring) outstring = re.sub(r"\\b",r"β",outstring) outstring = re.sub(r"\\\\q",r"θ",outstring) outstring = re.sub(r"\\q",r"θ",outstring) outstring = re.sub(r"\\l",r"λ",outstring) outstring = re.sub(r"\\\\m",r"μ",outstring) outstring = re.sub(r"\\\\n",r"ν",outstring) outstring = re.sub(r"\\n",r"ν",outstring) # Subscripting should just work # Assume underscores signal a dataname and format as literal # as well as linking outstring = re.sub(r"([\s(,]|^)(_[A-Za-z0-9.%_-]+)",match_to_id,outstring) # Assume that single apostrophes become double outstring = re.sub(r"'s",r"\'s",outstring) outstring = re.sub(r"'([\S])'",r'"\1"',outstring) # Also catch constructions of the form "*_" and "*_" outstring = re.sub(r"(\*_[\w]+|[\w]+_\*)",r"`\1`",outstring) # Asterisks are probably reciprocal space, not emphasis outstring = re.sub(r"\*(?!_)",r"\*",outstring) return outstring def match_to_id(one_match): """ Used by prepare_text to format cross-references correctly""" to_id = make_id(one_match.group(2)) return one_match.group(1)+"xref:"+to_id+"[`"+one_match.group(2)+"`]" def vb_text(textstring): """Escape characters that might be interpreted by asciidoc""" import re outstring = re.sub(r"~(.+)",r"\~\1",textstring) return "+" + outstring + "+" def make_id(textstring): """Convert the provided text into an ID for cross-referencing. Substitute all non-alphanumerics with underscore""" outstring = re.sub(r"\W","_",textstring) return outstring def is_key(dic,name): """Check whether or not name is a key in its category""" cat_name = dic[name]['_name.category_id'] cat_block = dic.get(cat_name) if cat_block is not None: cat_keys = cat_block.get('_category_key.name',[]) single_key = cat_block.get('_category.key_id','') return name in cat_keys or name == single_key print 'Warning: no block found for %s' % cat_name return False def get_template_text(onedef,mainfilename,template_cache={}): """Return some text stating that the definition comes from a template""" import os.path def_text = 'Empty' if onedef.has_key('_import.get'): filename = onedef['_import.get'][0]['file'] blockname = onedef['_import.get'][0]['save'] if filename not in template_cache: basename = os.path.join(os.path.dirname(mainfilename),filename) template_cache[filename] = CifFile(basename,grammar="2.0") def_text = '(Generic definition) '+ prepare_text(template_cache[filename][blockname]['_description.text']) else: print 'No import found' return def_text def analyse_deps(indic,in_directory="."): """Determine which dictionaries this builds on by looking for imports in the top category""" import os cats = indic.get_categories() head_cat = [a for a in cats if indic[a].get('_definition.class',None)=='Head'][0] imports = indic[head_cat].get('_import.get') if imports is None: return [] full_imports = [d['file'] for d in imports if d.get("mode") == 'Full'] return [CifDic(os.path.join(in_directory,i),grammar="2.0",do_minimum=True) for i in full_imports] def locate_category(diclist,catname): """Find out which dictionary has catname as a category""" found_id = [d for d in diclist if catname.lower() in d.get_categories()] found_id = [d.master_block['_dictionary.title'] for d in found_id] if len(found_id)==1: return found_id[0] elif len(found_id) == 0: return 'External category not found' else: return 'Ambiguous external dictionary' if __name__ == "__main__": import sys if len(sys.argv) > 1: indic = sys.argv[1] final_doc = make_asciidoc(indic) else: print 'Usage: output_asciidoc . Output file will be .adoc' pycifrw-4.4/src/Programs/parsetab.py000066400000000000000000002441721345362224200176150ustar00rootroot00000000000000 # parsetab.py # This file is automatically generated. Do not edit. _tabversion = '3.2' _lr_method = 'LALR' _lr_signature = 'o\xd9\xd0\xb2N5\x1eK\x05\x8c\x06\xb8\xb9\xe1\xae&' _lr_action_items = {'REAL':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,73,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,194,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,48,48,-155,-154,-63,48,-78,-58,-158,48,-129,-77,-60,-128,48,-126,-80,-23,-131,-67,48,-65,48,-127,-76,-70,-68,-5,48,-52,-32,-81,-158,-30,48,-21,-72,-46,117,-71,-64,48,-61,-1,-79,-57,-133,-69,-27,48,-132,-73,-24,-43,-130,-62,-59,-66,-2,-157,-145,-158,-137,48,-141,48,-3,-62,48,48,48,-54,-148,-42,-40,48,48,-39,48,-34,-35,-41,-37,-38,48,-31,-99,-97,48,48,-83,-74,-75,-53,48,48,48,48,48,48,48,48,48,48,-158,48,48,-149,-4,48,-84,-134,48,-26,-25,48,-33,-44,-45,-36,-86,-92,-56,-98,48,-82,48,-28,-29,-50,-47,-48,-51,-49,-114,48,-67,48,-8,-150,-142,48,-158,48,48,-102,48,-101,-100,48,48,-115,-158,-22,-9,-158,-158,-139,48,48,-85,-158,48,-91,48,48,48,-138,48,-143,48,-151,48,-146,-135,-136,-140,48,48,-144,-147,48,48,]),'DO':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,64,64,-155,-154,-63,64,-78,-58,-158,64,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,64,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,64,-141,-3,-62,64,-54,-148,-31,-99,-97,-83,-74,-75,-53,64,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,64,-8,-150,-142,64,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,64,-85,-91,64,-138,64,-143,64,-151,-146,-135,-136,-140,-144,-147,]),'*':([7,11,12,17,18,23,28,30,34,35,36,40,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,131,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,131,131,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'PRINT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,29,29,-155,-154,-63,29,-78,-58,-158,29,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,29,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,29,-141,29,-3,-62,29,-54,-148,-31,-99,-97,-83,-74,-75,-53,29,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,29,-8,-150,-142,29,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,29,-85,-91,29,-138,29,-143,29,-151,-146,-135,-136,-140,-144,-147,]),'^':([7,11,12,17,18,23,28,30,34,35,36,40,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,134,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,134,134,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'AUGOP':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,47,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,77,88,99,115,117,118,122,124,125,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,222,224,234,238,],[-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-62,-59,-66,139,-62,-54,-31,-99,-97,-83,-74,-75,-53,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-22,-85,-91,]),';':([6,7,10,11,12,16,17,18,20,23,24,27,28,30,34,35,36,40,41,42,44,47,48,49,50,51,52,54,56,57,58,60,62,63,65,66,69,70,71,73,75,76,77,88,95,99,115,117,118,122,124,125,127,144,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,191,193,194,211,214,215,219,222,224,234,238,],[-6,-63,-15,-78,-58,86,-77,-60,-10,-80,-23,-20,-67,-65,-76,-70,-68,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-79,-13,-11,-57,-14,-69,-27,-12,-73,-24,-43,-62,-59,-66,-17,-62,-16,-54,-31,-99,-97,-83,-74,-75,-53,-7,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-19,-18,-67,-102,-101,-100,-119,-115,-22,-85,-91,]),'BININT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,51,51,-155,-154,-63,51,-78,-58,-158,51,-129,-77,-60,-128,51,-126,-80,-23,-131,-67,51,-65,51,-127,-76,-70,-68,-5,51,-52,-32,-81,-158,-30,51,-21,-72,-46,-55,-71,-64,51,-61,-1,-79,-57,-133,-69,-27,51,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,51,-141,51,-3,-62,51,51,51,-54,-148,-42,-40,51,51,-39,51,-34,-35,-41,-37,-38,51,-31,-99,-97,51,51,-83,-74,-75,-53,51,51,51,51,51,51,51,51,51,51,-158,51,51,-149,-4,51,-84,-134,51,-26,-25,51,-33,-44,-45,-36,-86,-92,-56,-98,51,-82,51,-28,-29,-50,-47,-48,-51,-49,-114,51,51,-8,-150,-142,51,-158,51,51,-102,51,-101,-100,51,51,-115,-158,-22,-9,-158,-158,-139,51,51,-85,-158,51,-91,51,51,51,-138,51,-143,51,-151,51,-146,-135,-136,-140,51,51,-144,-147,51,51,]),'.':([7,11,12,17,18,23,28,30,34,35,36,42,48,50,51,52,54,56,60,63,69,73,75,76,88,117,118,122,124,125,135,146,161,165,167,175,188,194,211,214,215,218,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-81,-72,120,-71,-64,-61,-79,-57,-69,-73,-62,-59,-66,-62,-99,-97,-83,-74,-75,186,-84,-86,-92,-98,-82,-114,-67,-102,-101,-100,245,-115,-85,-91,]),'WITH':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,9,9,-155,-154,-63,9,-78,-58,-158,9,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,9,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,9,-141,-3,-62,9,-54,-148,-31,-99,-97,-83,-74,-75,-53,9,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,9,-8,-150,-142,9,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,9,-85,-91,9,-138,9,-143,9,-151,-146,-135,-136,-140,-144,-147,]),'NEQ':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,111,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,111,]),'POWER':([7,11,12,17,18,23,28,30,34,35,36,42,48,50,51,52,54,56,60,63,69,73,75,76,88,117,118,122,124,125,146,161,165,167,175,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-81,-72,119,-71,-64,-61,-79,-57,-69,-73,-62,-59,-66,-62,-99,-97,-83,-74,-75,-84,-86,-92,-98,-82,-114,-67,-102,-101,-100,-115,-85,-91,]),'&':([7,11,12,17,18,23,28,30,34,35,36,40,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,130,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,130,130,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'+':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,73,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,194,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,32,32,-155,-154,-63,32,-78,-58,-158,32,-129,-77,-60,-128,32,-126,-80,-23,-131,-67,32,-65,32,-127,-76,-70,-68,-5,32,-52,105,-81,-158,-30,32,-21,-72,-46,-55,-71,-64,32,-61,-1,-79,-57,-133,-69,-27,32,-132,-73,-24,-43,-130,-62,-59,-66,-2,-157,-145,-158,-137,32,-141,32,-3,-62,32,32,32,-54,-148,-42,-40,32,32,-39,32,-34,-35,-41,-37,-38,32,-31,-99,-97,32,32,-83,-74,-75,-53,32,32,32,32,32,32,32,32,32,32,-158,32,32,-149,-4,32,-84,-134,32,-26,-25,32,105,-44,-45,-36,-86,-92,-56,-98,32,-82,32,-28,-29,-50,-47,-48,-51,-49,-114,32,-67,32,-8,-150,-142,32,-158,32,32,-102,32,-101,-100,32,32,-115,-158,-22,-9,-158,-158,-139,32,32,-85,-158,32,-91,32,32,32,-138,32,-143,32,-151,32,-146,-135,-136,-140,32,32,-144,-147,32,32,]),'NEWLINE':([0,1,5,6,7,10,11,12,13,16,17,18,20,23,24,27,28,30,34,35,36,40,41,42,43,44,46,47,48,49,50,51,52,54,56,57,58,60,62,63,65,66,69,70,71,73,75,76,77,79,81,86,87,88,95,99,115,117,118,122,124,125,127,138,143,144,146,149,150,156,157,158,160,161,162,164,165,166,167,175,177,178,179,180,181,182,183,188,191,193,194,197,206,207,211,214,215,219,222,223,224,226,227,234,235,237,238,257,258,259,268,269,282,283,],[1,-156,79,-6,-63,-15,-78,-58,1,1,-77,-60,-10,-80,-23,-20,-67,-65,-76,-70,-68,-52,-32,-81,1,-30,1,-21,-72,-46,-55,-71,-64,-61,-79,-13,-11,-57,-14,-69,-27,-12,-73,-24,-43,-62,-59,-66,-17,-157,1,1,79,-62,-16,-54,-31,-99,-97,-83,-74,-75,-53,1,79,-7,-84,-26,-25,-33,-44,-45,1,-86,-90,1,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-19,-18,-67,-150,-87,1,-102,-101,-100,-119,-115,1,-22,1,1,-85,1,-96,-91,-88,-93,1,-89,1,-94,-95,]),'-':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,73,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,194,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,67,67,-155,-154,-63,67,-78,-58,-158,67,-129,-77,-60,-128,67,-126,-80,-23,-131,-67,67,-65,67,-127,-76,-70,-68,-5,67,-52,107,-81,-158,-30,67,-21,-72,-46,-55,-71,-64,67,-61,-1,-79,-57,-133,-69,-27,67,-132,-73,-24,-43,-130,-62,-59,-66,-2,-157,-145,-158,-137,67,-141,67,-3,-62,67,67,67,-54,-148,-42,-40,67,67,-39,67,-34,-35,-41,-37,-38,67,-31,-99,-97,67,67,-83,-74,-75,-53,67,67,67,67,67,67,67,67,67,67,-158,67,67,-149,-4,67,-84,-134,67,-26,-25,67,107,-44,-45,-36,-86,-92,-56,-98,67,-82,67,-28,-29,-50,-47,-48,-51,-49,-114,67,-67,67,-8,-150,-142,67,-158,67,67,-102,67,-101,-100,67,67,-115,-158,-22,-9,-158,-158,-139,67,67,-85,-158,67,-91,67,67,67,-138,67,-143,67,-151,67,-146,-135,-136,-140,67,67,-144,-147,67,67,]),',':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,47,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,77,88,89,96,98,99,115,117,118,122,123,124,125,127,146,149,150,154,156,157,158,161,162,165,166,167,168,169,170,171,172,173,174,175,177,178,179,180,181,182,183,184,185,187,188,189,191,193,194,200,201,203,206,211,213,214,215,216,217,222,224,234,237,238,239,240,241,242,243,247,252,255,257,258,260,262,268,274,278,282,283,],[-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-62,-59,-66,138,-62,138,153,-124,-54,-31,-99,-97,-83,138,-74,-75,-53,-84,-26,-25,153,-33,-44,-45,-86,207,-92,-56,-98,210,-103,-105,-104,-113,-110,-112,-82,-28,-29,-50,-47,-48,-51,-49,-116,218,221,-114,-118,138,138,-67,138,-125,231,235,-102,-107,-101,-100,-108,244,-115,-22,-85,259,-91,-113,-111,-112,-109,-106,-117,138,-152,-88,269,271,-120,-89,-153,-121,-94,-95,]),'/':([7,11,12,17,18,23,28,30,34,35,36,40,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,132,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,132,132,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'NEXT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,10,10,-155,-154,-63,10,-78,-58,-158,10,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,10,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,10,-141,10,-3,-62,10,-54,-148,-31,-99,-97,-83,-74,-75,-53,10,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,10,-8,-150,-142,10,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,10,-85,-91,10,-138,10,-143,10,-151,-146,-135,-136,-140,-144,-147,]),'SHORTSTRING':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,59,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,116,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,259,260,263,264,266,269,270,271,272,273,275,277,280,281,],[-158,-156,34,34,-155,-154,-63,34,-78,-58,-158,34,-129,-77,-60,-128,34,-126,-80,-23,-131,-67,34,-65,34,-127,-76,-70,-68,-5,34,-52,-32,-81,-158,-30,34,-158,-21,-72,-46,-55,-71,-64,34,-61,-1,-79,124,-57,-133,-69,-27,34,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,34,-141,34,-3,-62,34,34,34,-54,-148,-42,-40,34,34,-39,34,-34,-35,-41,-37,-38,34,-31,34,-99,-97,34,34,-83,-74,-75,-53,34,34,34,34,34,34,34,34,34,34,-158,34,34,-149,-4,34,-84,-134,34,-26,-25,34,-33,-44,-45,-36,-86,-92,-56,-98,34,-82,34,-28,-29,-50,-47,-48,-51,-49,-114,34,34,-8,-150,-142,34,-158,34,34,-102,34,-101,-100,34,34,-115,-158,-22,-9,-158,-158,-139,34,34,-85,-158,34,-91,34,34,34,-138,34,-143,34,-151,34,-158,-146,-135,-136,-140,-158,34,34,34,-144,34,-147,34,34,]),'OPEN_PAREN':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,73,74,75,76,78,79,80,81,82,84,85,86,87,88,90,91,92,93,99,100,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,53,53,-155,-154,-63,53,-78,-58,-158,53,-129,-77,-60,-128,53,-126,-80,-23,-131,-67,53,-65,53,-127,-76,-70,-68,-5,53,-52,-32,-81,-158,-30,53,-21,-72,-46,-55,-71,-64,53,-61,-1,-79,-57,-133,-69,-27,53,-132,-73,-24,-43,-130,135,136,-59,-66,-2,-157,-145,-158,-137,53,-141,53,-3,145,53,148,53,53,-54,155,-148,-42,-40,53,53,-39,53,-34,-35,-41,-37,-38,53,-31,-99,-97,53,53,-83,-74,-75,-53,53,53,53,53,53,53,53,53,53,53,-158,53,53,-149,-4,53,-84,-134,53,-26,-25,53,-33,-44,-45,-36,-86,-92,-56,-98,53,-82,53,-28,-29,-50,-47,-48,-51,-49,-114,53,53,-8,-150,-142,53,-158,53,53,-102,53,-101,-100,53,53,-115,-158,-22,-9,-158,-158,-139,53,53,-85,-158,53,-91,53,53,53,-138,53,-143,53,-151,53,-146,-135,-136,-140,53,53,-144,-147,53,53,]),'OCTINT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,35,35,-155,-154,-63,35,-78,-58,-158,35,-129,-77,-60,-128,35,-126,-80,-23,-131,-67,35,-65,35,-127,-76,-70,-68,-5,35,-52,-32,-81,-158,-30,35,-21,-72,-46,-55,-71,-64,35,-61,-1,-79,-57,-133,-69,-27,35,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,35,-141,35,-3,-62,35,35,35,-54,-148,-42,-40,35,35,-39,35,-34,-35,-41,-37,-38,35,-31,-99,-97,35,35,-83,-74,-75,-53,35,35,35,35,35,35,35,35,35,35,-158,35,35,-149,-4,35,-84,-134,35,-26,-25,35,-33,-44,-45,-36,-86,-92,-56,-98,35,-82,35,-28,-29,-50,-47,-48,-51,-49,-114,35,35,-8,-150,-142,35,-158,35,35,-102,35,-101,-100,35,35,-115,-158,-22,-9,-158,-158,-139,35,35,-85,-158,35,-91,35,35,35,-138,35,-143,35,-151,35,-146,-135,-136,-140,35,35,-144,-147,35,35,]),'STRPREFIX':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,116,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,259,260,263,264,266,269,270,271,272,273,275,277,280,281,],[-158,-156,59,59,-155,-154,-63,59,-78,-58,-158,59,-129,-77,-60,-128,59,-126,-80,-23,-131,-67,59,-65,59,-127,-76,-70,-68,-5,59,-52,-32,-81,-158,-30,59,-158,-21,-72,-46,-55,-71,-64,59,-61,-1,-79,-57,-133,-69,-27,59,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,59,-141,59,-3,-62,59,59,59,-54,-148,-42,-40,59,59,-39,59,-34,-35,-41,-37,-38,59,-31,59,-99,-97,59,59,-83,-74,-75,-53,59,59,59,59,59,59,59,59,59,59,-158,59,59,-149,-4,59,-84,-134,59,-26,-25,59,-33,-44,-45,-36,-86,-92,-56,-98,59,-82,59,-28,-29,-50,-47,-48,-51,-49,-114,59,59,-8,-150,-142,59,-158,59,59,-102,59,-101,-100,59,59,-115,-158,-22,-9,-158,-158,-139,59,59,-85,-158,59,-91,59,59,59,-138,59,-143,59,-151,59,-158,-146,-135,-136,-140,-158,59,59,59,-144,59,-147,59,59,]),'INTEGER':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,36,36,-155,-154,-63,36,-78,-58,-158,36,-129,-77,-60,-128,36,-126,-80,-23,-131,-67,36,-65,36,-127,-76,-70,-68,-5,36,-52,-32,-81,-158,-30,36,-21,-72,-46,-55,-71,-64,36,-61,-1,-79,-57,-133,-69,-27,36,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,36,-141,36,-3,-62,36,36,36,-54,-148,-42,-40,36,36,-39,36,-34,-35,-41,-37,-38,36,-31,-99,-97,36,36,-83,-74,-75,-53,36,36,36,36,36,36,36,36,36,36,-158,36,36,-149,-4,36,-84,-134,36,-26,-25,36,-33,-44,-45,-36,-86,-92,-56,-98,36,-82,36,-28,-29,-50,-47,-48,-51,-49,-114,36,36,-8,-150,-142,36,-158,36,36,-102,36,-101,-100,36,36,-115,-158,-22,-9,-158,-158,-139,36,36,-85,-158,36,-91,36,36,36,-138,36,-143,36,-151,36,-146,-135,-136,-140,36,36,-144,-147,36,36,]),'IMAGINARY':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,69,69,-155,-154,-63,69,-78,-58,-158,69,-129,-77,-60,-128,69,-126,-80,-23,-131,-67,69,-65,69,-127,-76,-70,-68,-5,69,-52,-32,-81,-158,-30,69,-21,-72,-46,-55,-71,-64,69,-61,-1,-79,-57,-133,-69,-27,69,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,69,-141,69,-3,-62,69,69,69,-54,-148,-42,-40,69,69,-39,69,-34,-35,-41,-37,-38,69,-31,-99,-97,69,69,-83,-74,-75,-53,69,69,69,69,69,69,69,69,69,69,-158,69,69,-149,-4,69,-84,-134,69,-26,-25,69,-33,-44,-45,-36,-86,-92,-56,-98,69,-82,69,-28,-29,-50,-47,-48,-51,-49,-114,69,69,-8,-150,-142,69,-158,69,69,-102,69,-101,-100,69,69,-115,-158,-22,-9,-158,-158,-139,69,69,-85,-158,69,-91,69,69,69,-138,69,-143,69,-151,69,-146,-135,-136,-140,69,69,-144,-147,69,69,]),':':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,75,76,88,99,115,117,118,121,122,124,125,127,146,149,150,156,157,158,161,163,165,166,167,169,170,174,175,177,178,179,180,181,182,183,188,194,199,204,210,211,213,214,215,216,222,234,238,241,243,253,276,279,],[-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-59,-66,-62,-54,-31,-99,-97,170,-83,-74,-75,-53,-84,-26,-25,-33,-44,-45,-86,208,-92,-56,-98,212,-105,216,-82,-28,-29,-50,-47,-48,-51,-49,-114,208,228,233,170,-102,-107,-101,-100,-108,-115,-85,-91,216,-106,267,280,281,]),'=':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,47,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,77,88,99,115,117,118,122,124,125,126,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,220,222,224,234,238,261,],[-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-62,-59,-66,137,-62,-54,-31,-99,-97,-83,-74,-75,176,-53,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,246,-115,-22,-85,-91,272,]),'<':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,112,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,112,]),'$end':([1,3,4,5,15,19,22,25,33,37,55,61,68,72,78,79,80,82,85,87,101,142,143,147,226,229,249,254,263,264,266,],[-156,0,-155,-154,-129,-128,-126,-131,-127,-5,-1,-133,-132,-130,-2,-157,-145,-137,-141,-3,-148,-149,-4,-134,-158,-139,-138,-151,-135,-136,-140,]),'FUNCTION':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,38,38,-155,-154,-63,38,-78,-58,-158,38,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,38,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,38,-141,-3,-62,38,-54,-148,-31,-99,-97,-83,-74,-75,-53,38,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,38,-8,-150,-142,38,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,38,-85,-91,38,-138,38,-143,38,-151,-146,-135,-136,-140,-144,-147,]),'REPEAT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,39,39,-155,-154,-63,39,-78,-58,-158,39,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,39,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,39,-141,-3,-62,39,-54,-148,-31,-99,-97,-83,-74,-75,-53,39,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,39,-8,-150,-142,39,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,39,-85,-91,39,-138,39,-143,39,-151,-146,-135,-136,-140,-144,-147,]),'GTE':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,106,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,106,]),'FOR':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,31,31,-155,-154,-63,31,-78,-58,-158,31,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,31,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,31,-141,-3,-62,31,-54,-148,-31,-99,-97,-83,-74,-75,-53,31,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,31,-8,-150,-142,31,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,31,-85,-91,31,-138,31,-143,31,-151,-146,-135,-136,-140,-144,-147,]),'BADAND':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-32,-81,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,129,-43,-62,-59,-66,-62,-54,-31,-99,-97,-83,-74,-75,-53,-84,129,129,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'ELSEIF':([1,4,5,15,19,22,25,33,37,61,68,72,79,80,82,85,87,101,142,143,147,226,229,249,254,263,264,266,],[-156,-155,-154,-129,-128,91,-131,-127,-5,-133,-132,-130,-157,-145,-137,-141,-3,-148,-149,-4,-134,-158,-139,-138,-151,-135,-136,-140,]),'LONGSTRING':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,59,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,116,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,259,260,263,264,266,269,270,271,272,273,275,277,280,281,],[-158,-156,17,17,-155,-154,-63,17,-78,-58,-158,17,-129,-77,-60,-128,17,-126,-80,-23,-131,-67,17,-65,17,-127,-76,-70,-68,-5,17,-52,-32,-81,-158,-30,17,-158,-21,-72,-46,-55,-71,-64,17,-61,-1,-79,125,-57,-133,-69,-27,17,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,17,-141,17,-3,-62,17,17,17,-54,-148,-42,-40,17,17,-39,17,-34,-35,-41,-37,-38,17,-31,17,-99,-97,17,17,-83,-74,-75,-53,17,17,17,17,17,17,17,17,17,17,-158,17,17,-149,-4,17,-84,-134,17,-26,-25,17,-33,-44,-45,-36,-86,-92,-56,-98,17,-82,17,-28,-29,-50,-47,-48,-51,-49,-114,17,17,-8,-150,-142,17,-158,17,17,-102,17,-101,-100,17,17,-115,-158,-22,-9,-158,-158,-139,17,17,-85,-158,17,-91,17,17,17,-138,17,-143,17,-151,17,-158,-146,-135,-136,-140,-158,17,17,17,-144,17,-147,17,17,]),'NOT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,68,69,70,71,72,73,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,114,115,117,118,121,122,124,125,127,128,129,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,194,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,45,45,-155,-154,-63,45,-78,-58,-158,45,-129,-77,-60,-128,45,-126,-80,-23,-131,-67,45,-65,-127,-76,-70,-68,-5,45,-52,110,-81,-158,-30,45,-21,-72,-46,-55,-71,-64,45,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-62,-59,-66,-2,-157,-145,-158,-137,45,-141,45,-3,-62,45,45,45,-54,-148,45,-31,-99,-97,45,-83,-74,-75,-53,45,45,45,45,45,-158,45,45,-149,-4,45,-84,-134,45,-26,-25,45,-33,-44,-45,-86,-92,-56,-98,45,-82,45,-28,-29,-50,-47,-48,-51,-49,-114,45,-67,45,-8,-150,-142,45,-158,45,45,-102,45,-101,-100,45,45,-115,-158,-22,-9,-158,-158,-139,45,45,-85,-158,45,-91,45,45,45,-138,45,-143,45,-151,45,-146,-135,-136,-140,45,45,-144,-147,45,45,]),'AS':([83,94,],[141,151,]),'LTE':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,103,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,103,]),'IN':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,96,98,99,110,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,201,202,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,109,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,152,-124,-54,159,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-125,230,-102,-101,-100,-115,-85,-91,]),'[':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,31,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,73,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,194,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,233,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,267,271,272,273,277,280,281,],[-158,-156,43,43,-155,-154,-63,43,-78,-58,-158,43,-129,-77,-60,-128,43,-126,-80,-23,-131,-67,43,-65,97,43,-127,-76,-70,-68,-5,43,-52,-32,-81,-158,-30,43,-21,-72,-46,121,-71,-64,43,-61,-1,-79,-57,-133,-69,-27,43,-132,-73,-24,-43,-130,-62,-59,-66,-2,-157,-145,-158,-137,43,-141,43,-3,-62,43,43,43,-54,-148,-42,-40,43,43,-39,43,-34,-35,-41,-37,-38,43,-31,-99,-97,43,43,-83,-74,-75,-53,43,43,43,43,43,43,43,43,43,43,-158,43,43,-149,-4,43,-84,-134,43,-26,-25,43,-33,-44,-45,-36,-86,-92,-56,-98,43,-82,43,-28,-29,-50,-47,-48,-51,-49,-114,43,-67,43,-8,-150,-142,43,-158,43,43,-102,43,-101,-100,43,43,-115,-158,-22,-9,-158,-158,-139,43,43,43,-85,-158,43,-91,43,43,43,-138,43,-143,43,-151,43,-146,-135,-136,-140,43,43,43,-144,-147,43,43,]),'ELSE':([1,4,5,15,19,22,25,33,37,61,68,72,79,80,82,85,87,101,142,143,147,226,229,249,254,263,264,266,],[-156,-155,-154,-129,-128,90,-131,-127,-5,-133,-132,-130,-157,-145,-137,-141,-3,-148,-149,-4,-134,-158,-139,-138,-151,-135,-136,-140,]),']':([1,4,5,7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,43,44,48,49,50,51,52,54,56,60,63,65,69,70,71,75,76,79,88,98,99,114,115,117,118,122,124,125,127,146,149,150,154,156,157,158,160,161,162,165,166,167,168,169,170,171,172,173,174,175,177,178,179,180,181,182,183,188,201,205,206,211,213,214,215,216,222,234,238,239,240,241,242,243,257,268,],[-156,-155,-154,-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-158,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-59,-66,-157,-62,-124,-54,161,-31,-99,-97,-83,-74,-75,-53,-84,-26,-25,202,-33,-44,-45,-158,-86,-90,-92,-56,-98,211,-103,-105,-104,214,-110,215,-82,-28,-29,-50,-47,-48,-51,-49,-114,-125,234,-87,-102,-107,-101,-100,-108,-115,-85,-91,-113,-111,-112,-109,-106,-88,-89,]),'ID':([0,1,2,3,4,5,7,8,9,11,12,13,14,15,17,18,19,21,22,23,24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,64,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,97,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,120,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,145,146,147,148,149,150,151,152,153,155,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,186,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,228,229,230,231,232,234,235,236,238,244,245,246,248,249,250,251,252,254,256,260,263,264,265,266,271,272,273,277,280,281,],[-158,-156,73,73,-155,-154,-63,73,83,-78,-58,-158,73,-129,-77,-60,-128,88,-126,-80,-23,-131,94,-67,88,-65,98,88,-127,-76,-70,-68,-5,100,73,-52,-32,-81,-158,-30,88,-21,-72,-46,-55,-71,-64,88,-61,-1,-79,-57,-133,-69,126,-27,88,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,73,-141,73,-3,-62,73,88,88,98,-54,-148,-42,-40,88,88,-39,88,-34,-35,-41,-37,-38,88,-31,-99,-97,88,167,88,-83,-74,-75,-53,88,88,88,88,88,88,88,88,88,88,-158,88,73,197,-149,-4,88,-84,-134,88,-26,-25,199,88,201,204,-33,-44,-45,-36,-86,-92,-56,-98,88,-82,88,-28,-29,-50,-47,-48,-51,-49,220,-114,88,73,-8,-150,-142,73,-158,88,88,-102,88,-101,-100,88,88,-115,-158,-22,-9,-158,-158,251,-139,88,253,73,-85,-158,88,-91,88,261,88,73,-138,73,-143,73,-151,88,-146,-135,-136,273,-140,88,88,-144,-147,88,88,]),'CLOSE_PAREN':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,47,48,49,50,51,52,53,54,56,60,63,65,69,70,71,75,76,88,99,115,117,118,122,123,124,125,127,135,145,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,184,185,187,188,189,190,198,203,211,214,215,222,224,234,238,247,255,262,274,278,],[-63,-78,-58,-77,-60,-80,-23,-67,-65,-76,-70,-68,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,122,-61,-79,-57,-69,-27,-73,-24,-43,-59,-66,-62,-54,-31,-99,-97,-83,175,-74,-75,-53,188,188,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-116,219,222,-114,-118,223,227,232,-102,-101,-100,-115,-22,-85,-91,-117,-152,-120,-153,-121,]),'IF':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,74,74,-155,-154,-63,74,-78,-58,-158,74,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,74,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,74,-141,-3,-62,74,-54,-148,-31,-99,-97,-83,-74,-75,-53,74,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,74,-8,-150,-142,74,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,74,-85,-91,74,-138,74,-143,74,-151,-146,-135,-136,-140,-144,-147,]),'AND':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-32,-81,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,128,-43,-62,-59,-66,-62,-54,-31,-99,-97,-83,-74,-75,-53,-84,128,128,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'`':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,89,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,21,21,-155,-154,-63,21,-78,-58,-158,21,-129,-77,-60,-128,21,-126,-80,-23,-131,-67,21,-65,21,-127,-76,-70,-68,-5,21,-52,-32,-81,-158,-30,21,-21,-72,-46,-55,-71,-64,21,-61,-1,-79,-57,-133,-69,-27,21,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,21,-141,21,-3,-62,146,21,21,21,-54,-148,-42,-40,21,21,-39,21,-34,-35,-41,-37,-38,21,-31,-99,-97,21,21,-83,-74,-75,-53,21,21,21,21,21,21,21,21,21,21,-158,21,21,-149,-4,21,-84,-134,21,-26,-25,21,-33,-44,-45,-36,-86,-92,-56,-98,21,-82,21,-28,-29,-50,-47,-48,-51,-49,-114,21,21,-8,-150,-142,21,-158,21,21,-102,21,-101,-100,21,21,-115,-158,-22,-9,-158,-158,-139,21,21,-85,-158,21,-91,21,21,21,-138,21,-143,21,-151,21,-146,-135,-136,-140,21,21,-144,-147,21,21,]),'BADOR':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,92,-67,-65,-76,-70,-68,-52,-32,-81,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-62,-59,-66,-62,-54,-31,-99,-97,-83,-74,-75,-53,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'OR':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,146,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,93,-67,-65,-76,-70,-68,-52,-32,-81,-30,-72,-46,-55,-71,-64,-61,-79,-57,-69,-27,-73,-24,-43,-62,-59,-66,-62,-54,-31,-99,-97,-83,-74,-75,-53,-84,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'BREAK':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,62,62,-155,-154,-63,62,-78,-58,-158,62,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,62,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,62,-141,62,-3,-62,62,-54,-148,-31,-99,-97,-83,-74,-75,-53,62,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,62,-8,-150,-142,62,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,62,-85,-91,62,-138,62,-143,62,-151,-146,-135,-136,-140,-144,-147,]),'HEXINT':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,63,63,-155,-154,-63,63,-78,-58,-158,63,-129,-77,-60,-128,63,-126,-80,-23,-131,-67,63,-65,63,-127,-76,-70,-68,-5,63,-52,-32,-81,-158,-30,63,-21,-72,-46,-55,-71,-64,63,-61,-1,-79,-57,-133,-69,-27,63,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,63,-141,63,-3,-62,63,63,63,-54,-148,-42,-40,63,63,-39,63,-34,-35,-41,-37,-38,63,-31,-99,-97,63,63,-83,-74,-75,-53,63,63,63,63,63,63,63,63,63,63,-158,63,63,-149,-4,63,-84,-134,63,-26,-25,63,-33,-44,-45,-36,-86,-92,-56,-98,63,-82,63,-28,-29,-50,-47,-48,-51,-49,-114,63,63,-8,-150,-142,63,-158,63,63,-102,63,-101,-100,63,63,-115,-158,-22,-9,-158,-158,-139,63,63,-85,-158,63,-91,63,63,63,-138,63,-143,63,-151,63,-146,-135,-136,-140,63,63,-144,-147,63,63,]),'ISEQUAL':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,102,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,102,]),'ITEM_TAG':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,76,76,-155,-154,-63,76,-78,-58,-158,76,-129,-77,-60,-128,76,-126,-80,-23,-131,-67,76,-65,76,-127,-76,-70,-68,-5,76,-52,-32,-81,-158,-30,76,-21,-72,-46,-55,-71,-64,76,-61,-1,-79,-57,-133,-69,-27,76,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,76,-141,76,-3,-62,76,76,76,-54,-148,-42,-40,76,76,-39,76,-34,-35,-41,-37,-38,76,-31,-99,-97,76,76,-83,-74,-75,-53,76,76,76,76,76,76,76,76,76,76,-158,76,76,-149,-4,76,-84,-134,76,-26,-25,76,-33,-44,-45,-36,-86,-92,-56,-98,76,-82,76,-28,-29,-50,-47,-48,-51,-49,-114,76,76,-8,-150,-142,76,-158,76,76,-102,76,-101,-100,76,76,-115,-158,-22,-9,-158,-158,-139,76,76,-85,-158,76,-91,76,76,76,-138,76,-143,76,-151,76,-146,-135,-136,-140,76,76,-144,-147,76,76,]),'{':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,21,22,23,24,25,28,29,30,32,33,34,35,36,37,39,40,41,42,43,44,45,47,48,49,50,51,52,53,54,55,56,60,61,63,65,67,68,69,70,71,72,75,76,78,79,80,81,82,84,85,86,87,88,90,92,93,99,101,102,103,104,105,106,107,108,109,111,112,113,114,115,117,118,119,121,122,124,125,127,128,129,130,131,132,133,134,135,136,137,138,139,140,142,143,145,146,147,148,149,150,152,156,157,158,159,161,165,166,167,170,175,176,177,178,179,180,181,182,183,188,192,195,196,197,199,200,207,208,210,211,212,214,215,216,221,222,223,224,225,226,227,229,230,232,234,235,236,238,244,246,248,249,250,251,252,254,256,260,263,264,266,271,272,273,277,280,281,],[-158,-156,46,46,-155,-154,-63,81,-78,-58,-158,81,-129,-77,-60,-128,46,-126,-80,-23,-131,-67,46,-65,46,-127,-76,-70,-68,-5,81,-52,-32,-81,-158,-30,46,-21,-72,-46,-55,-71,-64,46,-61,-1,-79,-57,-133,-69,-27,46,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,81,-141,46,-3,-62,81,46,46,-54,-148,-42,-40,46,46,-39,46,-34,-35,-41,-37,-38,46,-31,-99,-97,46,46,-83,-74,-75,-53,46,46,46,46,46,46,46,46,46,46,-158,46,46,-149,-4,46,-84,-134,46,-26,-25,46,-33,-44,-45,-36,-86,-92,-56,-98,46,-82,46,-28,-29,-50,-47,-48,-51,-49,-114,46,46,-8,-150,-142,81,-158,46,46,-102,46,-101,-100,46,46,-115,-158,-22,-9,-158,-158,-139,46,81,-85,-158,46,-91,46,46,81,-138,81,-143,81,-151,46,-146,-135,-136,-140,46,46,-144,-147,46,46,]),'>':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,251,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,113,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,-43,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,-44,-45,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,113,]),'}':([1,4,5,7,11,12,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,40,41,42,44,46,48,49,50,51,52,54,56,60,61,63,65,68,69,70,71,72,75,76,79,80,81,82,85,87,88,99,101,115,116,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,164,165,166,167,175,177,178,179,180,181,182,183,188,195,196,209,211,214,215,222,225,226,229,234,237,238,249,254,258,263,264,266,282,283,],[-156,-155,-154,-63,-78,-58,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,-52,-32,-81,-30,-158,-72,-46,-55,-71,-64,-61,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-157,-145,-158,-137,-141,-3,-62,-54,-148,-31,165,-99,-97,-83,-74,-75,-53,165,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-158,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,226,-8,238,-102,-101,-100,-115,-9,-158,-139,-85,-96,-91,-138,-151,-93,-135,-136,-140,-94,-95,]),'|':([7,11,12,17,18,23,28,30,34,35,36,40,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,146,157,158,161,165,166,167,175,179,180,181,182,183,188,194,211,214,215,222,234,238,],[-63,-78,-58,-77,-60,-80,-67,-65,-76,-70,-68,-52,-81,-72,-46,-55,-71,-64,-61,-79,-57,-69,-73,133,-62,-59,-66,-62,-54,-99,-97,-83,-74,-75,-53,-84,133,133,-86,-92,-56,-98,-82,-50,-47,-48,-51,-49,-114,-67,-102,-101,-100,-115,-85,-91,]),'LOOP':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,140,142,143,146,147,149,150,156,157,158,161,165,166,167,175,177,178,179,180,181,182,183,188,195,196,197,199,200,211,214,215,222,223,224,225,226,227,229,232,234,238,248,249,250,251,252,254,260,263,264,266,273,277,],[-158,-156,26,26,-155,-154,-63,26,-78,-58,-158,26,-129,-77,-60,-128,-126,-80,-23,-131,-67,-65,-127,-76,-70,-68,-5,26,-52,-32,-81,-30,-21,-72,-46,-55,-71,-64,-61,-1,-79,-57,-133,-69,-27,-132,-73,-24,-43,-130,-59,-66,-2,-157,-145,-158,-137,26,-141,-3,-62,26,-54,-148,-31,-99,-97,-83,-74,-75,-53,26,-149,-4,-84,-134,-26,-25,-33,-44,-45,-86,-92,-56,-98,-82,-28,-29,-50,-47,-48,-51,-49,-114,26,-8,-150,-142,26,-102,-101,-100,-115,-158,-22,-9,-158,-158,-139,26,-85,-91,26,-138,26,-143,26,-151,-146,-135,-136,-140,-144,-147,]),} _lr_action = { } for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_action: _lr_action[_x] = { } _lr_action[_x][_k] = _y del _lr_action_items _lr_goto_items = {'statements':([140,],[195,]),'comp_operator':([41,],[104,]),'small_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[6,6,6,6,6,6,144,6,6,6,6,6,6,6,6,]),'fancy_drel_assignment_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,]),'primary':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,]),'stringliteral':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,116,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,270,271,272,275,280,281,],[28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,163,28,28,28,28,28,28,28,28,28,28,28,28,28,194,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,276,28,28,279,28,28,]),'item_tag':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'not_test':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[65,65,65,65,65,65,65,115,65,65,65,65,65,65,65,65,177,178,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,]),'listmaker':([114,],[160,]),'do_stmt_head':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'func_arg':([135,145,221,],[184,184,247,]),'enclosure':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,]),'newlines':([0,13,16,43,46,81,86,138,160,164,207,223,226,227,235,259,269,],[5,5,87,5,5,5,143,5,5,5,5,5,5,5,5,5,5,]),'break_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,]),'dotlist':([135,],[185,]),'arglist':([155,],[203,]),'long_slice':([121,210,],[171,171,]),'repeat_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[68,68,68,68,68,68,68,68,68,68,68,68,68,68,]),'u_expr':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[49,49,49,49,49,49,99,49,49,49,127,49,49,49,49,49,49,49,49,49,166,49,49,49,179,180,181,182,183,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,]),'if_else_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[33,33,33,33,33,33,33,33,33,33,33,33,33,33,]),'parenth_form':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'literal':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,]),'attributeref':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'call':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,]),'argument_list':([135,145,],[187,187,]),'statement':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[55,78,82,82,82,82,82,196,225,82,82,82,82,82,]),'string_conversion':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,]),'with_head':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'input':([0,],[3,]),'loop_head':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'do_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'next_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[57,57,57,57,57,57,57,57,57,57,57,57,57,57,57,]),'empty':([0,13,43,46,81,138,160,164,207,223,226,227,235,259,269,],[4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,]),'listmaker2':([162,],[206,]),'short_slice':([121,210,],[169,169,]),'power':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,]),'a_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,114,121,128,129,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[41,41,41,41,41,41,41,41,41,41,41,41,41,41,156,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,]),'print_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,]),'maybe_nline':([0,13,43,46,81,138,160,164,207,223,226,227,235,259,269,],[2,84,114,116,140,192,205,209,236,248,249,250,256,270,275,]),'tablemaker2':([237,],[258,]),'slicing':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,]),'for_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[19,19,19,19,19,19,19,19,19,19,19,19,19,19,]),'m_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,105,107,114,121,128,129,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,157,158,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,]),'and_test':([2,3,8,14,21,29,39,53,84,86,90,92,93,114,121,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[70,70,70,70,70,70,70,70,70,70,70,149,150,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,]),'restricted_comp_operator':([41,251,],[108,265,]),'atom':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,]),'funcdef':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[61,61,61,61,61,61,61,61,61,61,61,61,61,61,]),'expr_stmt':([2,3,8,14,39,84,86,90,140,195,200,232,248,250,252,],[20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,]),'slice_list':([121,],[168,]),'subscription':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,]),'comparison':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,]),'attribute_tag':([50,],[118,]),'if_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[22,22,22,22,22,22,22,22,22,22,22,22,22,22,]),'id_list':([31,97,],[96,154,]),'proper_slice':([121,210,],[172,239,]),'list_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,233,236,244,246,248,250,252,256,267,271,272,280,281,],[23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,255,23,23,23,23,23,23,23,274,23,23,23,23,]),'loop_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[72,72,72,72,72,72,72,72,72,72,72,72,72,72,]),'or_test':([2,3,8,14,21,29,39,53,84,86,90,114,121,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,]),'compound_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[37,37,37,37,37,37,37,37,37,37,37,37,37,37,]),'with_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[25,25,25,25,25,25,25,25,25,25,25,25,25,25,]),'tablemaker':([116,140,],[164,164,]),'table_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,]),'suite':([8,14,39,84,90,200,232,248,250,252,],[80,85,101,142,147,229,254,263,264,266,]),'simple_stmt':([2,3,8,14,39,84,90,140,195,200,232,248,250,252,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'testlist_star_expr':([2,3,8,14,21,39,53,84,86,90,137,139,140,152,195,200,230,232,248,250,252,],[77,77,77,77,89,77,123,77,77,77,191,193,77,200,77,77,252,77,77,77,77,]),'slice_item':([121,210,],[173,240,]),'expression':([2,3,8,14,21,29,39,53,84,86,90,114,121,135,136,137,139,140,145,148,152,170,176,192,195,200,208,210,212,216,221,230,232,236,244,246,248,250,252,256,271,272,280,281,],[47,47,47,47,47,95,47,47,47,47,47,162,174,189,190,47,47,47,189,198,47,213,217,224,47,47,237,241,242,243,189,47,47,257,260,262,47,47,47,268,277,278,282,283,]),} _lr_goto = { } for _k, _v in _lr_goto_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_goto: _lr_goto[_x] = { } _lr_goto[_x][_k] = _y del _lr_goto_items _lr_productions = [ ("S' -> input","S'",1,None,None,None), ('input -> maybe_nline statement','input',2,'p_input','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',19), ('input -> input statement','input',2,'p_input','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',20), ('statement -> simple_stmt newlines','statement',2,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',36), ('statement -> simple_stmt ; newlines','statement',3,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',37), ('statement -> compound_stmt','statement',1,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',38), ('simple_stmt -> small_stmt','simple_stmt',1,'p_simple_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',44), ('simple_stmt -> simple_stmt ; small_stmt','simple_stmt',3,'p_simple_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',45), ('statements -> statement','statements',1,'p_statements','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',55), ('statements -> statements statement','statements',2,'p_statements','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',56), ('small_stmt -> expr_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',61), ('small_stmt -> print_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',62), ('small_stmt -> break_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',63), ('small_stmt -> next_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',64), ('break_stmt -> BREAK','break_stmt',1,'p_break_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',68), ('next_stmt -> NEXT','next_stmt',1,'p_next_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',72), ('print_stmt -> PRINT expression','print_stmt',2,'p_print_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',76), ('expr_stmt -> testlist_star_expr','expr_stmt',1,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',84), ('expr_stmt -> testlist_star_expr AUGOP testlist_star_expr','expr_stmt',3,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',85), ('expr_stmt -> testlist_star_expr = testlist_star_expr','expr_stmt',3,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',86), ('expr_stmt -> fancy_drel_assignment_stmt','expr_stmt',1,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',87), ('testlist_star_expr -> expression','testlist_star_expr',1,'p_testlist_star_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',96), ('testlist_star_expr -> testlist_star_expr , maybe_nline expression','testlist_star_expr',4,'p_testlist_star_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',97), ('expression -> or_test','expression',1,'p_expression','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',107), ('or_test -> and_test','or_test',1,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',115), ('or_test -> or_test OR and_test','or_test',3,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',116), ('or_test -> or_test BADOR and_test','or_test',3,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',117), ('and_test -> not_test','and_test',1,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',122), ('and_test -> and_test AND not_test','and_test',3,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',123), ('and_test -> and_test BADAND not_test','and_test',3,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',124), ('not_test -> comparison','not_test',1,'p_not_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',129), ('not_test -> NOT not_test','not_test',2,'p_not_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',130), ('comparison -> a_expr','comparison',1,'p_comparison','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',135), ('comparison -> a_expr comp_operator a_expr','comparison',3,'p_comparison','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',136), ('comp_operator -> restricted_comp_operator','comp_operator',1,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',142), ('comp_operator -> IN','comp_operator',1,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',143), ('comp_operator -> NOT IN','comp_operator',2,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',144), ('restricted_comp_operator -> <','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',150), ('restricted_comp_operator -> >','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',151), ('restricted_comp_operator -> GTE','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',152), ('restricted_comp_operator -> LTE','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',153), ('restricted_comp_operator -> NEQ','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',154), ('restricted_comp_operator -> ISEQUAL','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',155), ('a_expr -> m_expr','a_expr',1,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',159), ('a_expr -> a_expr + m_expr','a_expr',3,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',160), ('a_expr -> a_expr - m_expr','a_expr',3,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',161), ('m_expr -> u_expr','m_expr',1,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',168), ('m_expr -> m_expr * u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',169), ('m_expr -> m_expr / u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',170), ('m_expr -> m_expr ^ u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',171), ('m_expr -> m_expr & u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',172), ('m_expr -> m_expr | u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',173), ('u_expr -> power','u_expr',1,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',180), ('u_expr -> - u_expr','u_expr',2,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',181), ('u_expr -> + u_expr','u_expr',2,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',182), ('power -> primary','power',1,'p_power','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',189), ('power -> primary POWER u_expr','power',3,'p_power','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',190), ('primary -> atom','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',198), ('primary -> attributeref','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',199), ('primary -> subscription','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',200), ('primary -> slicing','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',201), ('primary -> call','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',202), ('atom -> ID','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',207), ('atom -> item_tag','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',208), ('atom -> literal','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',209), ('atom -> enclosure','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',210), ('item_tag -> ITEM_TAG','item_tag',1,'p_item_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',215), ('literal -> stringliteral','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',219), ('literal -> INTEGER','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',220), ('literal -> HEXINT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',221), ('literal -> OCTINT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',222), ('literal -> BININT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',223), ('literal -> REAL','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',224), ('literal -> IMAGINARY','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',225), ('stringliteral -> STRPREFIX SHORTSTRING','stringliteral',2,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',230), ('stringliteral -> STRPREFIX LONGSTRING','stringliteral',2,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',231), ('stringliteral -> SHORTSTRING','stringliteral',1,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',232), ('stringliteral -> LONGSTRING','stringliteral',1,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',233), ('enclosure -> parenth_form','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',238), ('enclosure -> string_conversion','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',239), ('enclosure -> list_display','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',240), ('enclosure -> table_display','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',241), ('parenth_form -> OPEN_PAREN testlist_star_expr CLOSE_PAREN','parenth_form',3,'p_parenth_form','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',245), ('parenth_form -> OPEN_PAREN CLOSE_PAREN','parenth_form',2,'p_parenth_form','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',246), ('string_conversion -> ` testlist_star_expr `','string_conversion',3,'p_string_conversion','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',253), ('list_display -> [ maybe_nline listmaker maybe_nline ]','list_display',5,'p_list_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',258), ('list_display -> [ maybe_nline ]','list_display',3,'p_list_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',259), ('listmaker -> expression listmaker2','listmaker',2,'p_listmaker','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',267), ('listmaker2 -> , maybe_nline expression','listmaker2',3,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',272), ('listmaker2 -> listmaker2 , maybe_nline expression','listmaker2',4,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',273), ('listmaker2 -> ','listmaker2',0,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',274), ('table_display -> { maybe_nline tablemaker maybe_nline }','table_display',5,'p_table_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',284), ('table_display -> { maybe_nline }','table_display',3,'p_table_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',285), ('tablemaker -> stringliteral : expression tablemaker2','tablemaker',4,'p_tablemaker','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',292), ('tablemaker2 -> , maybe_nline stringliteral : expression','tablemaker2',5,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',296), ('tablemaker2 -> tablemaker2 , maybe_nline stringliteral : expression','tablemaker2',6,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',297), ('tablemaker2 -> ','tablemaker2',0,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',298), ('attributeref -> primary attribute_tag','attributeref',2,'p_attributeref','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',312), ('attribute_tag -> . ID','attribute_tag',2,'p_attribute_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',316), ('attribute_tag -> REAL','attribute_tag',1,'p_attribute_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',317), ('subscription -> primary [ expression ]','subscription',4,'p_subscription','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',324), ('slicing -> primary [ proper_slice ]','slicing',4,'p_slicing','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',328), ('slicing -> primary [ slice_list ]','slicing',4,'p_slicing','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',329), ('proper_slice -> short_slice','proper_slice',1,'p_proper_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',333), ('proper_slice -> long_slice','proper_slice',1,'p_proper_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',334), ('short_slice -> :','short_slice',1,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',345), ('short_slice -> expression : expression','short_slice',3,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',346), ('short_slice -> : expression','short_slice',2,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',347), ('short_slice -> expression :','short_slice',2,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',348), ('long_slice -> short_slice : expression','long_slice',3,'p_long_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',357), ('slice_list -> slice_item','slice_list',1,'p_slice_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',364), ('slice_list -> slice_list , slice_item','slice_list',3,'p_slice_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',365), ('slice_item -> expression','slice_item',1,'p_slice_item','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',372), ('slice_item -> proper_slice','slice_item',1,'p_slice_item','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',373), ('call -> ID OPEN_PAREN CLOSE_PAREN','call',3,'p_call','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',377), ('call -> ID OPEN_PAREN argument_list CLOSE_PAREN','call',4,'p_call','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',378), ('argument_list -> func_arg','argument_list',1,'p_argument_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',388), ('argument_list -> argument_list , func_arg','argument_list',3,'p_argument_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',389), ('func_arg -> expression','func_arg',1,'p_func_arg','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',396), ('fancy_drel_assignment_stmt -> ID OPEN_PAREN dotlist CLOSE_PAREN','fancy_drel_assignment_stmt',4,'p_fancy_drel_assignment_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',400), ('dotlist -> . ID = expression','dotlist',4,'p_dotlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',407), ('dotlist -> dotlist , . ID = expression','dotlist',6,'p_dotlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',408), ('exprlist -> a_expr','exprlist',1,'p_exprlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',415), ('exprlist -> exprlist , a_expr','exprlist',3,'p_exprlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',416), ('id_list -> ID','id_list',1,'p_id_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',423), ('id_list -> id_list , ID','id_list',3,'p_id_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',424), ('compound_stmt -> if_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',435), ('compound_stmt -> if_else_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',436), ('compound_stmt -> for_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',437), ('compound_stmt -> do_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',438), ('compound_stmt -> loop_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',439), ('compound_stmt -> with_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',440), ('compound_stmt -> repeat_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',441), ('compound_stmt -> funcdef','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',442), ('if_else_stmt -> if_stmt ELSE suite','if_else_stmt',3,'p_if_else_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',449), ('if_stmt -> IF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',6,'p_if_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',455), ('if_stmt -> if_stmt ELSEIF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',7,'p_if_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',456), ('suite -> statement','suite',1,'p_suite','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',475), ('suite -> { maybe_nline statements } maybe_nline','suite',5,'p_suite','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',476), ('for_stmt -> FOR id_list IN testlist_star_expr suite','for_stmt',5,'p_for_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',483), ('for_stmt -> FOR [ id_list ] IN testlist_star_expr suite','for_stmt',7,'p_for_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',484), ('loop_stmt -> loop_head suite','loop_stmt',2,'p_loop_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',491), ('loop_head -> LOOP ID AS ID','loop_head',4,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',497), ('loop_head -> LOOP ID AS ID : ID','loop_head',6,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',498), ('loop_head -> LOOP ID AS ID : ID restricted_comp_operator ID','loop_head',8,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',499), ('do_stmt -> do_stmt_head suite','do_stmt',2,'p_do_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',510), ('do_stmt_head -> DO ID = expression , expression','do_stmt_head',6,'p_do_stmt_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',517), ('do_stmt_head -> DO ID = expression , expression , expression','do_stmt_head',8,'p_do_stmt_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',518), ('repeat_stmt -> REPEAT suite','repeat_stmt',2,'p_repeat_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',527), ('with_stmt -> with_head maybe_nline suite','with_stmt',3,'p_with_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',531), ('with_head -> WITH ID AS ID','with_head',4,'p_with_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',535), ('funcdef -> FUNCTION ID OPEN_PAREN arglist CLOSE_PAREN suite','funcdef',6,'p_funcdef','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',539), ('arglist -> ID : list_display','arglist',3,'p_arglist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',543), ('arglist -> arglist , ID : list_display','arglist',5,'p_arglist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',544), ('maybe_nline -> newlines','maybe_nline',1,'p_maybe_nline','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',551), ('maybe_nline -> empty','maybe_nline',1,'p_maybe_nline','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',552), ('newlines -> NEWLINE','newlines',1,'p_newlines','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',559), ('newlines -> newlines NEWLINE','newlines',2,'p_newlines','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',560), ('empty -> ','empty',0,'p_empty','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',564), ] pycifrw-4.4/src/Programs/process-validate.py000077500000000000000000000034341345362224200212560ustar00rootroot00000000000000#!/usr/bin/env python # This script processes the input from the web form requesting validation # against one or more CIF dictionaries from __future__ import print_function import CifFile import validate_cif import tempfile import cgi import os import cgitb; cgitb.enable() # for debugging # output header print("Content-Type: text/html\n\n") formdata = cgi.FieldStorage() # some constants dic_directory = "/usr/local/lib/cif/" input_cif = formdata["cif_file"] if input_cif.file: filename = input_cif.filename else: filename = "" is_dic = formdata.has_key("is_dic") input_dics = formdata.getlist("dic_list") # Save file data to temporary file tmpfile = tempfile.mkstemp()[1] jj = open(tmpfile,"w") jj.write(input_cif.file.read()) jj.close() try: cf = CifFile.ReadCif(tmpfile,scantype="flex") os.remove(tmpfile) except CifFile.StarFile.StarError: os.remove(tmpfile) import sys print("

File reading error

") print("

File %s appears to have one or more syntax errors

" % input_cif.filename) print("

The detailed error message is as follows:

")
    etype,eval,trace = sys.exc_info()
    unsafe_str = str(eval)
    unsafe_str = unsafe_str.replace("&","&")
    unsafe_str = unsafe_str.replace("<","<")
    safe_str = unsafe_str.replace(">",">")
    print(safe_str)
    print("
") except: os.remove(tmpfile) print("Unspecified error reading file %s. This is most likely a CIF syntax error.") # Now do the validation... else: diclist = map(lambda a:os.path.join(dic_directory,a),input_dics) merged_dics = CifFile.merge_dic(diclist) validate_cif.output_header(True,filename,input_dics) print(CifFile.validate_report(CifFile.Validate(cf,dic= merged_dics,isdic=is_dic),use_html=True)) validate_cif.output_footer(True) pycifrw-4.4/src/Programs/star2_to_cif2.py000066400000000000000000000055331345362224200204500ustar00rootroot00000000000000""" Convert a data/dictionary file in STAR2 to CIF2 format. STAR2 and CIF2 differ most markedly in their compound data value delimiters (comma and whitespace respectively). We read in the file using STAR2 grammar, then output with CIF2 grammar. Two optional files may be provided, one giving a top-level comment, and another the descriptive text for the dictionary. """ from __future__ import print_function from CifFile import ReadCif,CifDic import time def do_conversion(infile,outfile,desc=None,comment=None): startread = time.clock() incif = ReadCif(infile,grammar="STAR2") print('Finished reading %s in %f seconds' % (infile,time.clock() - startread)) try: incif = CifDic(incif,do_minimum=True) except: print('Failed to read as CIF dictionary') incif.set_grammar("2.0") incif.standard = 'Dic' incif.SetTemplate("dic_template.dic") if comment is None: comment_header = \ """############################################################################### # # # CIF Dictionary # # -------------- # # # # CIF data definitions in DDLm format. # # # ################################################################################ """ else: comment_header = open(comment).read() if desc is not None: incif.master_block.overwrite=True incif.master_block['_description.text'] = open(desc).read() incif.master_block.overwrite=False # print('Master template: {!r}'.format(incif.dic_as_cif.master_template)) print('check: {!r}'.format(incif.recurse_child_list('enumeration'))) of = open(outfile,"w") of.write(incif.WriteOut(comment=comment_header,saves_after='_description.text')) of.close() print('Finished writing %s in %f seconds' % (outfile,time.clock() - startread)) if __name__ == "__main__": import sys comment_file = None description_file = None if len(sys.argv)<2 or len(sys.argv)>4: print('Usage: python star2_to_cif2.py infile description_file comment_file') print(""" should be in STAR2 format. The output file will be +'.cif2' Optional description_file contains a description for the dictionary, and optional comment_file contains a comment for the dictionary header.""") infile = sys.argv[1] outfile = infile + '.cif2' if len(sys.argv)>2: description_file = sys.argv[2] if len(sys.argv)>3: comment_file = sys.argv[3] do_conversion(infile,outfile,desc=description_file,comment=comment_file) pycifrw-4.4/src/Programs/syd_example.py000066400000000000000000000032641345362224200203210ustar00rootroot00000000000000# Example python program: as close to ciftbx as we can make it from __future__ import print_function from CifFile import CifDic, ValidCifFile, ValidCifError, CifError #import definitions from CifFile import get_number_with_esd import sys #to finish early # change the following as required by your installation test_dic = "../dictionaries/cif_core.dic" test_data = "../drel/testing/data/nacltest.cif" # open our dictionary try: my_dict = CifDic(test_dic) except IOError: print("Cannot open " + test_dic) sys.exit() # open our CIF try: my_cif = ValidCifFile(datasource=test_data,dic=my_dict)#read our CIF file except IOError: print("Cannot open " + test_data) sys.exit() except ValidCifError as error_message: print(test_data + " failed validity checks:") print(error_message) sys.exit() except CifError as error_message: print("Syntax error in " + test_data +":") print(error_message) sys.exit() # get the first blockname my_data_block = my_cif.first_block() # get some data cela,siga = get_number_with_esd(my_data_block["_cell_length_a"])#cell dimension name = my_data_block["_symmetry_cell_setting"] #cell setting # get a random data name which is not one of the above allnames = my_data_block.keys() allnames.remove("_cell_length_a") allnames.remove("_symmetry_cell_setting") data = my_data_block[allnames[0]] # to print, don't need to check type print("%s %s" % (allnames[0],data)) # loop atom sites names = my_data_block["_atom_site_label"] xsxs = my_data_block["_atom_site_fract_x"] as_numbers = map(get_number_with_esd,xsxs) processed = zip(names,as_numbers) for label, data in processed: print("%s %f " % (label,data[0])) pycifrw-4.4/src/Programs/syd_example_2.py000066400000000000000000000017231345362224200205400ustar00rootroot00000000000000# Example python program: native use from __future__ import print_function from CifFile import ValidCifFile,CifDic #import definitions # change the following as required by your installation test_dic = "../dictionaries/cif_core.dic" test_data = "../drel/testing/data/nacltest.cif" # only proceed if CifFile is valid my_cif = ValidCifFile(datasource=test_data,dic=CifDic(test_dic)) # get the first blockname my_data_block = my_cif.first_block() # get some data cela = my_data_block["_cell_length_a"] #cell dimension name = my_data_block["_symmetry_cell_setting"] #cell setting allnames = my_data_block.keys() # get a random data name which may be one of the above data = my_data_block[allnames[0]] # to print, don't need to check type print("%s %s" % (allnames[0],data)) # loop atom sites names = my_data_block["_atom_site_label"] xsxs = my_data_block["_atom_site_fract_x"] processed = zip(names,xsxs) for label, data in processed: print("%s %s" % (label,data)) pycifrw-4.4/src/Programs/templ_attr.cif000066400000000000000000000662211345362224200202750ustar00rootroot00000000000000#\#CIF_2.0 ############################################################################## # # # TEMPLATE DEFINITION ATTRIBUTES DICTIONARY # # # ############################################################################## data_TEMPL_ATTR _dictionary.title TEMPL_ATTR _dictionary.class Template _dictionary.version 1.4.08 _dictionary.date 2014-06-27 _dictionary.uri www.iucr.org/cif/dic/com_att.dic _dictionary.ddl_conformance 3.11.04 _description.text ; This dictionary contains definition attribute sets that are common to other CIF dictionaries and is imported by them. ; #--------------------------------------------------------------------------- save_atom_site_label _definition.update 2012-10-16 _description.text ; This label is a unique identifier for a particular site in the asymmetric unit of the crystal unit cell. It is made up of components, _atom_site.label_component_0 to *_6, which may be specified as separate data items. Component 0 usually matches one of the specified _atom_type.symbol codes. This is not mandatory if an _atom_site.type_symbol item is included in the atom site list. The _atom_site.type_symbol always takes precedence over an _atom_site.label in the identification of the atom type. The label components 1 to 6 are optional, and normally only components 0 and 1 are used. Note that components 0 and 1 are concatenated, while all other components, if specified, are separated by an underline character. Underline separators are only used if higher-order components exist. If an intermediate component is not used it may be omitted provided the underline separators are inserted. For example the label 'C233__ggg' is acceptable and represents the components C, 233, '', and ggg. Each label may have a different number of components. ; _name.linked_item_id '_atom_site.label' _type.purpose Encode _type.source Assigned _type.container Single _type.contents Code loop_ _description_example.case C12 Ca3g28 Fe3+17 H*251 C_a_phe_83_a_0 Zn_Zn_301_A_0 save_ save_restr_label _definition.update 2014-06-29 _description.text ; Labels of atom sites subtending distance or angle. Atom 2 is the apex for angular restraints. ; _name.linked_item_id '_atom_site.label' _type.purpose Encode _type.source Assigned _type.container Single _type.contents Code save_ save_atom_site_id _definition.update 2014-06-16 _description.text ; This label is a unique identifier for a particular site in the asymmetric unit of the crystal unit cell. ; _name.linked_item_id '_atom_site.label' _type.purpose Encode _type.source Assigned _type.container Single _type.contents Code save_ save_rho_coeff _definition.update 2014-06-20 _description.text ; Specifies a multipole population coefficientxs P(l,m) for the atom identified in atom_rho_multipole.atom_label. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real save_ save_rho_kappa _definition.update 2014-06-20 _description.text ; A radial function expansion-contraction coefficient (kappa = atom_rho_multipole_kappa.base and kappa'(l) = atom_rho_multipole_kappa.prime[l]) for the atom specified in atom_rho_multipole.atom_label. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real save_ save_rho_slater _definition.update 2014-06-20 _description.text ; Items used when the radial dependence of the valence electron density, R(kappa'(l),l,r), of the atom specified in atom_rho_multipole.atom_label is expressed as a Slater-type function [Hansen & Coppens (1978), equation (3)] ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real save_ save_matrix_pdb _definition.update 2014-07-02 _description.text ; Element of the PDM ORIGX matrix or vector. ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real _enumeration.default 1.0 save_ save_matrix_w _definition.update 2014-06-27 _description.text ; Element of the matrix W defined by van Smaalen (1991); (1995) ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real _name.category_id cell_subsystem save_ save_ms_index _definition.update 2014-06-27 _description.text ; Additional Miller indices needed to write the reciprocal vector in the definition of _diffrn_refln_index.m_list, _diffrn_standard_refln.index_m_list, _exptl_crystal_face.index_m_list. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Integer save_ save_index_limit_max _definition.update 2014-06-27 _description.text ; Maximum value of the additional Miller indices appearing in _diffrn_reflns.index_m_* and _reflns.index_m_*. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Integer save_ save_index_limit_min _definition.update 2014-06-27 _description.text ; Minimum value of the additional Miller indices appearing in _diffrn_reflns.index_m_* and _reflns.index_m_*. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Integer save_ save_cell_angle _definition.update 2014-06-08 _description.text ; The angle between the bounding cell axes. ; _type.purpose Measurand _type.source Recorded _type.container Single _type.contents Real _enumeration.range 0.0:180.0 _enumeration.default 90.0 _units.code degrees save_ save_cell_angle_su _definition.update 2014-06-08 _description.text ; Standard uncertainty of the angle between the bounding cell axes. ; _type.purpose SU _type.source Recorded _type.container Single _type.contents Real _units.code degrees save_ save_cell_length _definition.update 2014-06-08 _description.text ; The length of each cell axis. ; _type.purpose Measurand _type.source Recorded _type.container Single _type.contents Real _enumeration.range 1.: _units.code angstroms save_ save_cell_length_su _definition.update 2014-06-08 _description.text ; Standard uncertainty of the length of each cell axis. ; _type.purpose SU _type.source Recorded _type.container Single _type.contents Real _units.code angstroms save_ save_site_symmetry _definition.update 2014-06-29 _description.text ; The set of data items which specify the symmetry operation codes which must be applied to the atom sites involved in the geometry angle. The symmetry code of each atom site as the symmetry-equivalent position number 'n' and the cell translation number 'klm'. These numbers are combined to form the code 'n klm' or n_klm. The character string n_klm is composed as follows: n refers to the symmetry operation that is applied to the coordinates stored in _atom_site.fract_xyz. It must match a number given in _symmetry_equiv.pos_site_id. k, l and m refer to the translations that are subsequently applied to the symmetry transformed coordinates to generate the atom used in calculating the angle. These translations (x,y,z) are related to (k,l,m) by the relations k = 5 + x l = 5 + y m = 5 + z ; _type.purpose Composite _type.source Derived _type.container Single _type.contents Symop loop_ _description_example.case _description_example.detail '4' '4th symmetry operation applied' '7_645' '7th symm. posn.; +a on x; -b on y' . 'no symmetry or translation to site' _enumeration.default '1_555' save_ save_Cartn_coord _definition.update 2012-05-07 _description.text ; The atom site coordinates in angstroms specified according to a set of orthogonal Cartesian axes related to the cell axes as specified by the _atom_sites_Cartn_transform.axes description. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.range -1000.:1000. _units.code angstroms save_ save_Cartn_coord_su _definition.update 2014-06-08 _description.text ; Standard uncertainty values of the atom site coordinates in angstroms specified according to a set of orthogonal Cartesian axes related to the cell axes as specified by the _atom_sites_Cartn_transform.axes description. ; _type.purpose SU _type.source Derived _type.container Single _type.contents Real _units.code angstroms save_ save_fract_coord _definition.update 2012-05-07 _description.text ; Atom site coordinates as fractions of the cell length values. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.range -1.:1. _units.code none save_ save_fract_coord_su _definition.update 2014-06-08 _description.text ; Standard uncertainty value of the atom site coordinates as fractions of the cell length values. ; _type.purpose SU _type.source Derived _type.container Single _type.contents Real _units.code none save_ save_label_component _definition.update 2012-05-07 _description.text ; Component_0 is normally a code which matches identically with one of the _atom_type.symbol codes. If this is the case then the rules governing the _atom_type.symbol code apply. If, however, the data item _atom_site.type_symbol is also specified in the atom site list, component 0 need not match this symbol or adhere to any of the _atom_type.symbol rules. Component_1 is referred to as the "atom number". When component 0 is the atom type code, it is used to number the sites with the same atom type. This component code must start with at least one digit which is not followed by a + or - sign (to distinguish it from the component 0 rules). Components_2 to 6 contain the identifier, residue, sequence, asymmetry identifier and alternate codes, respectively. These codes may be composed of any characters except an underline. ; _type.purpose Encode _type.source Assigned _type.container Single _type.contents Code save_ save_label_comp _definition.update 2012-05-07 _description.text ; See label_component_0 description. ; _type.purpose Encode _type.source Assigned _type.container Single _type.contents Code save_ save_Cartn_matrix _definition.update 2012-12-11 _description.text ; Matrix used to transform fractional coordinates in the ATOM_SITE category to Cartesian coordinates. The axial alignments of this transformation are described in _atom_sites_Cartn_transform.axes. The 3x1 translation is defined in _atom_sites_Cartn_transform.vector. x' |11 12 13| x | 1 | ( y' )Cartesian = mat|21 22 23| * ( y )fractional + vec| 2 | z' |31 32 33| z | 3 | The default transformation matrix uses Rollet's axial assignments with cell vectors a,b,c aligned with orthogonal axes X,Y,Z so that c||Z and b in plane YZ. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.default ? _units.code reciprocal_angstroms save_ save_ncs_matrix_IJ _definition.update 2014-06-12 _description.text ; The [I][J] element of the 3x3 matrix component of a non-crystallographic symmetry operation. ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real save_ save_rot_matrix_IJ _definition.update 2014-06-12 _description.text ; The [I][J] element of the matrix used to rotate the subset of the Cartesian coordinates in the ATOM_SITE category identified in the STRUCT_BIOL_GEN category to give a view useful for describing the structure. The conventions used in the rotation are described in _struct_biol_view.details. |x'| |11 12 13| |x| |y'|~reoriented Cartesian~ = |21 22 23| |y|~Cartesian~ |z'| |31 32 33| |z| ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real save_ save_fract_matrix _definition.update 2012-12-11 _description.text ; Matrix used to transform Cartesian coordinates in the ATOM_SITE category to fractional coordinates. The axial alignments of this transformation are described in _atom_sites_fract_transform.axes. The 3x1 translation is defined in _atom_sites_fract_transform.vector. x' |11 12 13| x | 1 | ( y' )fractional = mat|21 22 23| * ( y )Cartesian + vec| 2 | z' |31 32 33| z | 3 | The default transformation matrix uses Rollet's axial assignments with cell vectors a,b,c aligned with orthogonal axes X,Y,Z so that c||Z and b in plane YZ. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.default ? _units.code none save_ save_aniso_BIJ _definition.update 2013-03-08 _description.text ; These are the standard anisotropic atomic displacement components in angstroms squared which appear in the structure factor term: T = exp{-1/4 sum~i~ [ sum~j~ (B^ij^ h~i~ h~j~ a*~i~ a*~j~) ] } h = the Miller indices a* = the reciprocal-space cell lengths The unique elements of the real symmetric matrix are entered by row. The IUCr Commission on Nomenclature recommends against the use of B for reporting atomic displacement parameters. U, being directly proportional to B, is preferred. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.default ? _units.code angstrom_squared save_ save_aniso_BIJ2 _definition.update 2014-06-12 _description.text ; The [I][J] tdf elements define the overall anisotropic displacement model if one was refined for this structure. ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real _enumeration.default ? _units.code angstrom_squared save_ save_aniso_BIJ_su _definition.update 2014-06-08 _description.text ; These are the standard uncertainty values (SU) for the standard form of the Bij anisotropic atomic displacement components (see _aniso_BIJ. Because these values are TYPE measurand, the su values may in practice be auto generated as part of the Bij calculation. ; _type.purpose SU _type.source Derived _type.container Single _type.contents Real _units.code angstrom_squared save_ save_aniso_UIJ _definition.update 2013-03-08 _description.text ; These are the standard anisotropic atomic displacement components in angstroms squared which appear in the structure factor term: T = exp{-2pi^2^ sum~i~ [sum~j~ (U^ij^ h~i~ h~j~ a*~i~ a*~j~) ] } h = the Miller indices a* = the reciprocal-space cell lengths The unique elements of the real symmetric matrix are entered by row. ; _type.purpose Measurand _type.source Derived _type.container Single _type.contents Real _enumeration.default ? _units.code angstrom_squared save_ save_aniso_UIJ_su _definition.update 2014-06-08 _description.text ; These are the standard uncertainty values (SU) for the standard form of the Uij anisotropic atomic displacement components (see _aniso_UIJ. Because these values are TYPE measurand, the su values may in practice be auto generated as part of the Uij calculation. ; _type.purpose SU _type.source Derived _type.container Single _type.contents Real _units.code angstrom_squared save_ save_Cromer_Mann_coeff _definition.update 2012-11-29 _description.text ; The set of data items used to define Cromer-Mann coefficients for generation of X-ray scattering factors. Ref: International Tables for X-ray Crystallography, Vol. IV (1974) Table 2.2B or International Tables for Crystallography, Vol. C (1991) Tables 6.1.1.4 and 6.1.1.5 ; _type.purpose Number _type.source Assigned _type.container Single _type.contents Real _enumeration.def_index_id '_atom_type.symbol' _units.code none save_ save_hi_ang_Fox_coeffs _definition.update 2012-11-29 _description.text ; The set of data items used to define Fox et al. coefficients for generation of high angle (s >2.0) X-ray scattering factors. Ref: International Tables for Crystallography, Vol. C (1991) Table 6.1.1.5 ; _type.purpose Number _type.source Assigned _type.container Single _type.contents Real _enumeration.def_index_id '_atom_type.symbol' _units.code none save_ save_Miller_index _definition.update 2013-04-16 _description.text ; The index of a reciprocal space vector. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Integer _enumeration.range -1000:1000 _units.code none save_ save_orient_matrix _definition.update 2012-05-07 _description.text ; The set of data items which specify the elements of the matrix of the orientation of the crystal axes to the diffractometer goniometer. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Real _enumeration.default ? _units.code none save_ save_transf_matrix _definition.update 2012-05-07 _description.text ; The set of data items which specify the elements of the matrix used to transform the reflection indices _diffrn_refln.hkl into _refln.hkl. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Real _enumeration.default ? _units.code none save_ save_face_angle _definition.update 2013-04-15 _description.text ; Diffractometer angle setting when the perpendicular to the specified crystal face is aligned along a specified direction (e.g. the bisector of the incident and reflected beams in an optical goniometer. ; _type.purpose Measurand _type.source Recorded _type.container Single _type.contents Real _enumeration.range -180.:180. _units.code degrees save_ save_orient_angle _definition.update 2013-04-15 _description.text ; Diffractometer angle of a reflection measured at the centre of the diffraction peak and used to determine _diffrn_orient_matrix.UBIJ. ; _type.purpose Measurand _type.source Recorded _type.container Single _type.contents Real _enumeration.range -180.:180. _units.code degrees save_ save_diffr_angle _definition.update 2013-04-15 _description.text ; Diffractometer angle at which the intensity is measured. This was calculated from the specified orientation matrix and the original measured cell dimensions before any subsequent transformations. ; _type.purpose Number _type.source Derived _type.container Single _type.contents Real _enumeration.range -180.:180. _units.code degrees save_ save_diffr_counts _definition.update 2012-10-16 _description.text ; The set of data items which specify the diffractometer counts. Background counts before the peak, background after the peak, net counts after background removed, counts for peak scan or position, and the total counts (background plus peak). ; _type.purpose Measurand _type.source Recorded _type.container Single _type.contents Count _enumeration.range 0: _units.code none save_ save_display_colour _definition.update 2012-10-16 _description.text ; Integer value between 0 and 255 giving the intensity of a specific colour component (red, green or blue) for the RBG display colour code. ; _type.purpose Number _type.source Recorded _type.container Single _type.contents Integer _enumeration.range 0:255 _units.code none save_ #============================================================================= # The dictionary's attribute creation history. #============================================================================ loop_ _dictionary_audit.version _dictionary_audit.date _dictionary_audit.revision 1.0.0 2005-12-12 ; Initial version of the TEMPLATES dictionary created from the definitions used in CORE_3 dictionary version 3.5.02 ; 1.0.1 2006-02-12 ; Remove dictionary attributes from a save frame. Change category core_templates to template ; 1.2.01 2006-02-21 ; File structure to conform with prototype version dictionaries. ; 1.2.02 2006-03-07 ; Added the template _template.relational_id for the ddl3 dictionary. ; 1.2.03 2006-06-17 ; Apply DDL 3.6.01 changes ; 1.2.04 2006-06-29 ; Remove "_template" from the definition names. Apply DDL 3.6.05 changes. Change file name from templates.dic to com_att.dic ; 1.2.05 2006-09-07 ; Apply DDL 3.6.08 changes ; 1.2.06 2006-11-13 ; Apply DDL 3.6.10 changes ; 1.2.07 2006-12-14 ; Apply DDL 3.7.01 changes ; 1.2.08 2008-06-18 ; Change _type.purpose for Miller_index from Observed to Identify ; 1.3.01 2011-08-03 ; Remove definition.id lines in keeping with nested imports. ; 1.3.02 2011-12-01 ; Update the DDL version. No Matrix types present. ; 1.3.03 2012-05-07 ; Apply changes of 3.10.01 DDL version. ; 1.3.04 2012-10-16 ; Apply changes of 3.10.02 DDL version. "Label" becomes "Code". ; 1.3.05 2012-11-29 ; Add "_enumeration.def_index_id '_atom_type.symbol' " to Cromer_Mann_coeff and hi_ang_Fox_coeffs. ; 1.3.06 2012-12-11 ; Add the templates Cartn_matrix and fract_matrix ; 1.4.01 2013-03-08 ; Changes arising from alerts issued by ALIGN. ; 1.4.02 2013-04-15 ; Removed desription.common from all defs; inconsistent invocations Changed types for 'diffrn_angle' Added new frame for 'orient_angle' ; 1.4.03 2013-04-16 ; Changed type.source 'Measured' to 'Recorded' Changed type.source 'Assigned' to 'Recorded' in Miller_index ; 1.4.04 2013-04-17 ; Changed type.source 'Quantity' to 'Number' or 'Encode' ; 1.4.05 2014-06-08 ; Added aniso_BIJ_su and aniso_UIJ_su Added atom_site_fract_su and atom_site_cartn_su ; 1.4.06 2014-06-09 ; dummy top line added to all frames; this is skipped on import. ; 1.4.07 2014-06-12 ; Added attribute save frame "aniso_BIJ2" Added attribute save frame "rot_matrix_IJ" Added attribute save frame "ncs_matrix_IJ" Added attribute save frame "atom_site_id" Added attribute save frame "label_comp" ; 1.4.08 2014-06-27 ; Added attribute save frame "ms_index" Added attribute save frame "matrix_w" ; pycifrw-4.4/src/Programs/templ_enum.cif000066400000000000000000003505441345362224200202730ustar00rootroot00000000000000#\#CIF_2.0 ############################################################################## # # # TEMPLATE DICTIONARY OF COMMONLY USER ENUMERATIONS # # # ############################################################################## data_COM_VAL _dictionary.title COM_VAL _dictionary.class Template _dictionary.version 1.4.05 _dictionary.date 2016-05-09 _dictionary.uri www.iucr.org/cif/dic/com_val.dic _dictionary.ddl_conformance 3.11.04 _description.text ; This dictionary contains commonly used enumeration value sets that are imported into CIF dictionaries. ; #--------------------------------------------------------------------------- save_H_M_ref loop_ _enumeration_set.state _enumeration_set.detail 'P 1' ' 1 C1.1' 'P -1' ' 2 Ci.1' 'P 2' ' 3 C2.1' 'P 21' ' 4 C2.2' 'C 2' ' 5 C2.3' 'P m' ' 6 Cs.1' 'P c' ' 7 Cs.2' 'C m' ' 8 Cs.3' 'C c' ' 9 Cs.4' 'P 2/m' ' 10 C2h.1' 'P 21/m' ' 11 C2h.2' 'C 2/m' ' 12 C2h.3' 'P 2/c' ' 13 C2h.4' 'P 21/c' ' 14 C2h.5' 'C 2/c' ' 15 C2h.6' 'P 2 2 2' ' 16 D2.1' 'P 2 2 21' ' 17 D2.2' 'P 21 21 2' ' 18 D2.3' 'P 21 21 21' ' 19 D2.4' 'C 2 2 21' ' 20 D2.5' 'C 2 2 2' ' 21 D2.6' 'F 2 2 2' ' 22 D2.7' 'I 2 2 2' ' 23 D2.8' 'I 21 21 21' ' 24 D2.9' 'P m m 2' ' 25 C2v.1' 'P m c 21' ' 26 C2v.2' 'P c c 2' ' 27 C2v.3' 'P m a 2' ' 28 C2v.4' 'P c a 21' ' 29 C2v.5' 'P n c 2' ' 30 C2v.6' 'P m n 21' ' 31 C2v.7' 'P b a 2' ' 32 C2v.8' 'P n a 21' ' 33 C2v.9' 'P n n 2' ' 34 C2v.10' 'C m m 2' ' 35 C2v.11' 'C m c 21' ' 36 C2v.12' 'C c c 2' ' 37 C2v.13' 'A m m 2' ' 38 C2v.14' 'A e m 2' ' 39 C2v.15' 'A m a 2' ' 40 C2v.16' 'A e a 2' ' 41 C2v.17' 'F m m 2' ' 42 C2v.18' 'F d d 2' ' 43 C2v.19' 'I m m 2' ' 44 C2v.20' 'I b a 2' ' 45 C2v.21' 'I m a 2' ' 46 C2v.22' 'P m m m' ' 47 D2h.1' 'P n n n' ' 48 D2h.2' 'P c c m' ' 49 D2h.3' 'P b a n' ' 50 D2h.4' 'P m m a' ' 51 D2h.5' 'P n n a' ' 52 D2h.6' 'P m n a' ' 53 D2h.7' 'P c c a' ' 54 D2h.8' 'P b a m' ' 55 D2h.9' 'P c c n' ' 56 D2h.10' 'P b c m' ' 57 D2h.11' 'P n n m' ' 58 D2h.12' 'P m m n' ' 59 D2h.13' 'P b c n' ' 60 D2h.14' 'P b c a' ' 61 D2h.15' 'P n m a' ' 62 D2h.16' 'C m c m' ' 63 D2h.17' 'C m c e' ' 64 D2h.18' 'C m m m' ' 65 D2h.19' 'C c c m' ' 66 D2h.20' 'C m m e' ' 67 D2h.21' 'C c c e' ' 68 D2h.22' 'F m m m' ' 69 D2h.23' 'F d d d' ' 70 D2h.24' 'I m m m' ' 71 D2h.25' 'I b a m' ' 72 D2h.26' 'I b c a' ' 73 D2h.27' 'I m m a' ' 74 D2h.28' 'P 4' ' 75 C4.1' 'P 41' ' 76 C4.2' 'P 42' ' 77 C4.3' 'P 43' ' 78 C4.4' 'I 4' ' 79 C4.5' 'I 41' ' 80 C4.6' 'P -4' ' 81 S4.1' 'I -4' ' 82 S4.2' 'P 4/m' ' 83 C4h.1' 'P 42/m' ' 84 C4h.2' 'P 4/n' ' 85 C4h.3' 'P 42/n' ' 86 C4h.4' 'I 4/m' ' 87 C4h.5' 'I 41/a' ' 88 C4h.6' 'P 4 2 2' ' 89 D4.1' 'P 4 21 2' ' 90 D4.2' 'P 41 2 2' ' 91 D4.3' 'P 41 21 2' ' 92 D4.4' 'P 42 2 2' ' 93 D4.5' 'P 42 21 2' ' 94 D4.6' 'P 43 2 2' ' 95 D4.7' 'P 43 21 2' ' 96 D4.8' 'I 4 2 2' ' 97 D4.9' 'I 41 2 2' ' 98 D4.10' 'P 4 m m' ' 99 C4v.1' 'P 4 b m' '100 C4v.2' 'P 42 c m' '101 C4v.3' 'P 42 n m' '102 C4v.4' 'P 4 c c' '103 C4v.5' 'P 4 n c' '104 C4v.6' 'P 42 m c' '105 C4v.7' 'P 42 b c' '106 C4v.8' 'I 4 m m' '107 C4v.9' 'I 4 c m' '108 C4v.10' 'I 41 m d' '109 C4v.11' 'I 41 c d' '110 C4v.12' 'P -4 2 m' '111 D2d.1' 'P -4 2 c' '112 D2d.2' 'P -4 21 m' '113 D2d.3' 'P -4 21 c' '114 D2d.4' 'P -4 m 2' '115 D2d.5' 'P -4 c 2' '116 D2d.6' 'P -4 b 2' '117 D2d.7' 'P -4 n 2' '118 D2d.8' 'I -4 m 2' '119 D2d.9' 'I -4 c 2' '120 D2d.10' 'I -4 2 m' '121 D2d.11' 'I -4 2 d' '122 D2d.12' 'P 4/m m m' '123 D4h.1' 'P 4/m c c' '124 D4h.2' 'P 4/n b m' '125 D4h.3' 'P 4/n n c' '126 D4h.4' 'P 4/m b m' '127 D4h.5' 'P 4/m n c' '128 D4h.6' 'P 4/n m m' '129 D4h.7' 'P 4/n c c' '130 D4h.8' 'P 42/m m c' '131 D4h.9' 'P 42/m c m' '132 D4h.10' 'P 42/n b c' '133 D4h.11' 'P 42/n n m' '134 D4h.12' 'P 42/m b c' '135 D4h.13' 'P 42/m n m' '136 D4h.14' 'P 42/n m c' '137 D4h.15' 'P 42/n c m' '138 D4h.16' 'I 4/m m m' '139 D4h.17' 'I 4/m c m' '140 D4h.18' 'I 41/a m d' '141 D4h.19' 'I 41/a c d' '142 D4h.20' 'P 3' '143 C3.1' 'P 31' '144 C3.2' 'P 32' '145 C3.3' 'R 3' '146 C3.4' 'P -3' '147 C3i.1' 'R -3' '148 C3i.2' 'P 3 1 2' '149 D3.1' 'P 3 2 1' '150 D3.2' 'P 31 1 2' '151 D3.3' 'P 31 2 1' '152 D3.4' 'P 32 1 2' '153 D3.5' 'P 32 2 1' '154 D3.6' 'R 3 2' '155 D3.7' 'P 3 m 1' '156 C3v.1' 'P 3 1 m' '157 C3v.2' 'P 3 c 1' '158 C3v.3' 'P 3 1 c' '159 C3v.4' 'R 3 m' '160 C3v.5' 'R 3 c' '161 C3v.6' 'P -3 1 m' '162 D3d.1' 'P -3 1 c' '163 D3d.2' 'P -3 m 1' '164 D3d.3' 'P -3 c 1' '165 D3d.4' 'R -3 m' '166 D3d.5' 'R -3 c' '167 D3d.6' 'P 6' '168 C6.1' 'P 61' '169 C6.2' 'P 65' '170 C6.3' 'P 62' '171 C6.4' 'P 64' '172 C6.5' 'P 63' '173 C6.6' 'P -6' '174 C3h.1' 'P 6/m ' '175 C6h.1' 'P 63/m' '176 C6h.2' 'P 6 2 2' '177 D6.1' 'P 61 2 2' '178 D6.2' 'P 65 2 2' '179 D6.3' 'P 62 2 2' '180 D6.4' 'P 64 2 2' '181 D6.5' 'P 63 2 2' '182 D6.6' 'P 6 m m' '183 C6v.1' 'P 6 c c' '184 C6v.2' 'P 63 c m' '185 C6v.3' 'P 63 m c' '186 C6v.4' 'P -6 m 2' '187 D3h.1' 'P -6 c 2' '188 D3h.2' 'P -6 2 m' '189 D3h.3' 'P -6 2 c' '190 D3h.4' 'P 6/m m m' '191 D6h.1' 'P 6/m c c' '192 D6h.2' 'P 63/m c m' '193 D6h.3' 'P 63/m m c' '194 D6h.4' 'P 2 3' '195 T.1' 'F 2 3' '196 T.2' 'I 2 3' '197 T.3' 'P 21 3' '198 T.4' 'I 21 3' '199 T.5' 'P m -3' '200 Th.1' 'P n -3' '201 Th.2' 'F m -3' '202 Th.3' 'F d -3' '203 Th.4' 'I m -3' '204 Th.5' 'P a -3' '205 Th.6' 'I a -3' '206 Th.7' 'P 4 3 2' '207 O.1' 'P 42 3 2' '208 O.2' 'F 4 3 2' '209 O.3' 'F 41 3 2' '210 O.4' 'I 4 3 2' '211 O.5' 'P 43 3 2' '212 O.6' 'P 41 3 2' '213 O.7' 'I 41 3 2' '214 O.8' 'P -4 3 m' '215 Td.1' 'F -4 3 m' '216 Td.2' 'I -4 3 m' '217 Td.3' 'P -4 3 n' '218 Td.4' 'F -4 3 c' '219 Td.5' 'I -4 3 d' '220 Td.6' 'P m -3 m' '221 Oh.1' 'P n -3 n' '222 Oh.2' 'P m -3 n' '223 Oh.3' 'P n -3 m' '224 Oh.4' 'F m -3 m' '225 Oh.5' 'F m -3 c' '226 Oh.6' 'F d -3 m' '227 Oh.7' 'F d -3 c' '228 Oh.8' 'I m -3 m' '229 Oh.9' 'I a -3 d' '230 Oh.10' save_ save_ref_set loop_ _enumeration_set.state _enumeration_set.detail '001:P 1' 'C1.1 P 1' '002:-P 1' 'Ci.1 P -1' '003:P 2y' 'C2.1 P 1 2 1' '004:P 2yb' 'C2.2 P 1 21 1' '005:C 2y' 'C2.3 C 1 2 1' '006:P -2y' 'Cs.1 P 1 m 1' '007:P -2yc' 'Cs.2 P 1 c 1' '008:C -2y' 'Cs.3 C 1 m 1' '009:C -2yc' 'Cs.4 C 1 c 1' '010:-P 2y' 'C2h.1 P 1 2/m 1' '011:-P 2yb' 'C2h.2 P 1 21/m 1' '012:-C 2y' 'C2h.3 C 1 2/m 1' '013:-P 2yc' 'C2h.4 P 1 2/c 1' '014:-P 2ybc' 'C2h.5 P 1 21/c 1' '015:-C 2yc' 'C2h.6 C 1 2/c 1' '016:P 2 2' 'D2.1 P 2 2 2' '017:P 2c 2' 'D2.2 P 2 2 21' '018:P 2 2ab' 'D2.3 P 21 21 2' '019:P 2ac 2ab' 'D2.4 P 21 21 21' '020:C 2c 2' 'D2.5 C 2 2 21' '021:C 2 2' 'D2.6 C 2 2 2' '022:F 2 2' 'D2.7 F 2 2 2' '023:I 2 2' 'D2.8 I 2 2 2' '024:I 2b 2c' 'D2.9 I 21 21 21' '025:P 2 -2' 'C2v.1 P m m 2' '026:P 2c -2' 'C2v.2 P m c 21' '027:P 2 -2c' 'C2v.3 P c c 2' '028:P 2 -2a' 'C2v.4 P m a 2' '029:P 2c -2ac' 'C2v.5 P c a 21' '030:P 2 -2bc' 'C2v.6 P n c 2' '031:P 2ac -2' 'C2v.7 P m n 21' '032:P 2 -2ab' 'C2v.8 P b a 2' '033:P 2c -2n' 'C2v.9 P n a 21' '034:P 2 -2n' 'C2v.10 P n n 2' '035:C 2 -2' 'C2v.11 C m m 2' '036:C 2c -2' 'C2v.12 C m c 21' '037:C 2 -2c' 'C2v.13 C c c 2' '038:A 2 -2' 'C2v.14 A m m 2' '039:A 2 -2b' 'C2v.15 A e m 2' '040:A 2 -2a' 'C2v.16 A m a 2' '041:A 2 -2ab' 'C2v.17 A e a 2' '042:F 2 -2' 'C2v.18 F m m 2' '043:F 2 -2d' 'C2v.19 F d d 2' '044:I 2 -2' 'C2v.20 I m m 2' '045:I 2 -2c' 'C2v.21 I b a 2' '046:I 2 -2a' 'C2v.22 I m a 2' '047:-P 2 2' 'D2h.1 P m m m' '048:-P 2ab 2bc' 'D2h.2 P n n n:2' '049:-P 2 2c' 'D2h.3 P c c m' '050:-P 2ab 2b' 'D2h.4 P b a n:2' '051:-P 2a 2a' 'D2h.5 P m m a' '052:-P 2a 2bc' 'D2h.6 P n n a' '053:-P 2ac 2' 'D2h.7 P m n a' '054:-P 2a 2ac' 'D2h.8 P c c a' '055:-P 2 2ab' 'D2h.9 P b a m' '056:-P 2ab 2ac' 'D2h.10 P c c n' '057:-P 2c 2b' 'D2h.11 P b c m' '058:-P 2 2n' 'D2h.12 P n n m' '059:-P 2ab 2a' 'D2h.13 P m m n:2' '060:-P 2n 2ab' 'D2h.14 P b c n' '061:-P 2ac 2ab' 'D2h.15 P b c a' '062:-P 2ac 2n' 'D2h.16 P n m a' '063:-C 2c 2' 'D2h.17 C m c m' '064:-C 2ac 2' 'D2h.18 C m c e' '065:-C 2 2' 'D2h.19 C m m m' '066:-C 2 2c' 'D2h.20 C c c m' '067:-C 2a 2' 'D2h.21 C m m e' '068:-C 2a 2ac' 'D2h.22 C c c e:2' '069:-F 2 2' 'D2h.23 F m m m' '070:-F 2uv 2vw' 'D2h.24 F d d d:2' '071:-I 2 2' 'D2h.25 I m m m' '072:-I 2 2c' 'D2h.26 I b a m' '073:-I 2b 2c' 'D2h.27 I b c a' '074:-I 2b 2' 'D2h.28 I m m a' '075:P 4' 'C4.1 P 4' '076:P 4w' 'C4.2 P 41' '077:P 4c' 'C4.3 P 42' '078:P 4cw' 'C4.4 P 43' '079:I 4' 'C4.5 I 4' '080:I 4bw' 'C4.6 I 41' '081:P -4' 'S4.1 P -4' '082:I -4' 'S4.2 I -4' '083:-P 4' 'C4h.1 P 4/m' '084:-P 4c' 'C4h.2 P 42/m' '085:-P 4a' 'C4h.3 P 4/n:2' '086:-P 4bc' 'C4h.4 P 42/n:2' '087:-I 4' 'C4h.5 I 4/m' '088:-I 4ad' 'C4h.6 I 41/a:2' '089:P 4 2' 'D4.1 P 4 2 2' '090:P 4ab 2ab' 'D4.2 P 4 21 2' '091:P 4w 2c' 'D4.3 P 41 2 2' '092:P 4abw 2nw' 'D4.4 P 41 21 2' '093:P 4c 2' 'D4.5 P 42 2 2' '094:P 4n 2n' 'D4.6 P 42 21 2' '095:P 4cw 2c' 'D4.7 P 43 2 2' '096:P 4nw 2abw' 'D4.8 P 43 21 2' '097:I 4 2' 'D4.9 I 4 2 2' '098:I 4bw 2bw' 'D4.10 I 41 2 2' '099:P 4 -2' 'C4v.1 P 4 m m' '100:P 4 -2ab' 'C4v.2 P 4 b m' '101:P 4c -2c' 'C4v.3 P 42 c m' '102:P 4n -2n' 'C4v.4 P 42 n m' '103:P 4 -2c' 'C4v.5 P 4 c c' '104:P 4 -2n' 'C4v.6 P 4 n c' '105:P 4c -2' 'C4v.7 P 42 m c' '106:P 4c -2ab' 'C4v.8 P 42 b c' '107:I 4 -2' 'C4v.9 I 4 m m' '108:I 4 -2c' 'C4v.10 I 4 c m' '109:I 4bw -2' 'C4v.11 I 41 m d' '110:I 4bw -2c' 'C4v.12 I 41 c d' '111:P -4 2' 'D2d.1 P -4 2 m' '112:P -4 2c' 'D2d.2 P -4 2 c' '113:P -4 2ab' 'D2d.3 P -4 21 m' '114:P -4 2n' 'D2d.4 P -4 21 c' '115:P -4 -2' 'D2d.5 P -4 m 2' '116:P -4 -2c' 'D2d.6 P -4 c 2' '117:P -4 -2ab' 'D2d.7 P -4 b 2' '118:P -4 -2n' 'D2d.8 P -4 n 2' '119:I -4 -2' 'D2d.9 I -4 m 2' '120:I -4 -2c' 'D2d.10 I -4 c 2' '121:I -4 2' 'D2d.11 I -4 2 m' '122:I -4 2bw' 'D2d.12 I -4 2 d' '123:-P 4 2' 'D4h.1 P 4/m m m' '124:-P 4 2c' 'D4h.2 P 4/m c c' '125:-P 4a 2b' 'D4h.3 P 4/n b m:2' '126:-P 4a 2bc' 'D4h.4 P 4/n n c:2' '127:-P 4 2ab' 'D4h.5 P 4/m b m' '128:-P 4 2n' 'D4h.6 P 4/m n c' '129:-P 4a 2a' 'D4h.7 P 4/n m m:2' '130:-P 4a 2ac' 'D4h.8 P 4/n c c:2' '131:-P 4c 2' 'D4h.9 P 42/m m c' '132:-P 4c 2c' 'D4h.10 P 42/m c m' '133:-P 4ac 2b' 'D4h.11 P 42/n b c:2' '134:-P 4ac 2bc' 'D4h.12 P 42/n n m:2' '135:-P 4c 2ab' 'D4h.13 P 42/m b c' '136:-P 4n 2n' 'D4h.14 P 42/m n m' '137:-P 4ac 2a' 'D4h.15 P 42/n m c:2' '138:-P 4ac 2ac' 'D4h.16 P 42/n c m:2' '139:-I 4 2' 'D4h.17 I 4/m m m' '140:-I 4 2c' 'D4h.18 I 4/m c m' '141:-I 4bd 2' 'D4h.19 I 41/a m d:2' '142:-I 4bd 2c' 'D4h.20 I 41/a c d:2' '143:P 3' 'C3.1 P 3' '144:P 31' 'C3.2 P 31' '145:P 32' 'C3.3 P 32' '146:R 3' 'C3.4 R 3:h' '147:-P 3' 'C3i.1 P -3' '148:-R 3' 'C3i.2 R -3:h' '149:P 3 2' 'D3.1 P 3 1 2' '150:P 3 2"' 'D3.2 P 3 2 1' '151:P 31 2 (0 0 4)' 'D3.3 P 31 1 2' '152:P 31 2"' 'D3.4 P 31 2 1' '153:P 32 2 (0 0 2)' 'D3.5 P 32 1 2' '154:P 32 2"' 'D3.6 P 32 2 1' '155:R 3 2"' 'D3.7 R 3 2:h' '156:P 3 -2"' 'C3v.1 P 3 m 1' '157:P 3 -2' 'C3v.2 P 3 1 m' '158:P 3 -2"c' 'C3v.3 P 3 c 1' '159:P 3 -2c' 'C3v.4 P 3 1 c' '160:R 3 -2"' 'C3v.5 R 3 m:h' '161:R 3 -2"c' 'C3v.6 R 3 c:h' '162:-P 3 2' 'D3d.1 P -3 1 m' '163:-P 3 2c' 'D3d.2 P -3 1 c' '164:-P 3 2"' 'D3d.3 P -3 m 1' '165:-P 3 2"c' 'D3d.4 P -3 c 1' '166:-R 3 2"' 'D3d.5 R -3 m:h' '167:-R 3 2"c' 'D3d.6 R -3 c:h' '168:P 6' 'C6.1 P 6' '169:P 61' 'C6.2 P 61' '170:P 65' 'C6.3 P 65' '171:P 62' 'C6.4 P 62' '172:P 64' 'C6.5 P 64' '173:P 6c' 'C6.6 P 63' '174:P -6' 'C3h.1 P -6' '175:-P 6' 'C6h.1 P 6/m' '176:-P 6c' 'C6h.2 P 63/m' '177:P 6 2' 'D6.1 P 6 2 2' '178:P 61 2 (0 0 5)' 'D6.2 P 61 2 2' '179:P 65 2 (0 0 1)' 'D6.3 P 65 2 2' '180:P 62 2 (0 0 4)' 'D6.4 P 62 2 2' '181:P 64 2 (0 0 2)' 'D6.5 P 64 2 2' '182:P 6c 2c' 'D6.6 P 63 2 2' '183:P 6 -2' 'C6v.1 P 6 m m' '184:P 6 -2c' 'C6v.2 P 6 c c' '185:P 6c -2' 'C6v.3 P 63 c m' '186:P 6c -2c' 'C6v.4 P 63 m c' '187:P -6 2' 'D3h.1 P -6 m 2' '188:P -6c 2' 'D3h.2 P -6 c 2' '189:P -6 -2' 'D3h.3 P -6 2 m' '190:P -6c -2c' 'D3h.4 P -6 2 c' '191:-P 6 2' 'D6h.1 P 6/m m m' '192:-P 6 2c' 'D6h.2 P 6/m c c' '193:-P 6c 2' 'D6h.3 P 63/m c m' '194:-P 6c 2c' 'D6h.4 P 63/m m c' '195:P 2 2 3' 'T.1 P 2 3' '196:F 2 2 3' 'T.2 F 2 3' '197:I 2 2 3' 'T.3 I 2 3' '198:P 2ac 2ab 3' 'T.4 P 21 3' '199:I 2b 2c 3' 'T.5 I 21 3' '200:-P 2 2 3' 'Th.1 P m -3' '201:-P 2ab 2bc 3' 'Th.2 P n -3:2' '202:-F 2 2 3' 'Th.3 F m -3' '203:-F 2uv 2vw 3' 'Th.4 F d -3:2' '204:-I 2 2 3' 'Th.5 I m -3' '205:-P 2ac 2ab 3' 'Th.6 P a -3' '206:-I 2b 2c 3' 'Th.7 I a -3' '207:P 4 2 3' 'O.1 P 4 3 2' '208:P 4n 2 3' 'O.2 P 42 3 2' '209:F 4 2 3' 'O.3 F 4 3 2' '210:F 4d 2 3' 'O.4 F 41 3 2' '211:I 4 2 3' 'O.5 I 4 3 2' '212:P 4acd 2ab 3' 'O.6 P 43 3 2' '213:P 4bd 2ab 3' 'O.7 P 41 3 2' '214:I 4bd 2c 3' 'O.8 I 41 3 2' '215:P -4 2 3' 'Td.1 P -4 3 m' '216:F -4 2 3' 'Td.2 F -4 3 m' '217:I -4 2 3' 'Td.3 I -4 3 m' '218:P -4n 2 3' 'Td.4 P -4 3 n' '219:F -4a 2 3' 'Td.5 F -4 3 c' '220:I -4bd 2c 3' 'Td.6 I -4 3 d' '221:-P 4 2 3' 'Oh.1 P m -3 m' '222:-P 4a 2bc 3' 'Oh.2 P n -3 n:2' '223:-P 4n 2 3' 'Oh.3 P m -3 n' '224:-P 4bc 2bc 3' 'Oh.4 P n -3 m:2' '225:-F 4 2 3' 'Oh.5 F m -3 m' '226:-F 4a 2 3' 'Oh.6 F m -3 c' '227:-F 4vw 2vw 3' 'Oh.7 F d -3 m:2' '228:-F 4ud 2vw 3' 'Oh.8 F d -3 c:2' '229:-I 4 2 3' 'Oh.9 I m -3 m' '230:-I 4bd 2c 3' 'Oh.10 I a -3 d' save_ save_Schoenflies loop_ _enumeration_set.state C1.1 Ci.1 C2.1 C2.2 C2.3 Cs.1 Cs.2 Cs.3 Cs.4 C2h.1 C2h.2 C2h.3 C2h.4 C2h.5 C2h.6 D2.1 D2.2 D2.3 D2.4 D2.5 D2.6 D2.7 D2.8 D2.9 C2v.1 C2v.2 C2v.3 C2v.4 C2v.5 C2v.6 C2v.7 C2v.8 C2v.9 C2v.10 C2v.11 C2v.12 C2v.13 C2v.14 C2v.15 C2v.16 C2v.17 C2v.18 C2v.19 C2v.20 C2v.21 C2v.22 D2h.1 D2h.2 D2h.3 D2h.4 D2h.5 D2h.6 D2h.7 D2h.8 D2h.9 D2h.10 D2h.11 D2h.12 D2h.13 D2h.14 D2h.15 D2h.16 D2h.17 D2h.18 D2h.19 D2h.20 D2h.21 D2h.22 D2h.23 D2h.24 D2h.25 D2h.26 D2h.27 D2h.28 C4.1 C4.2 C4.3 C4.4 C4.5 C4.6 S4.1 S4.2 C4h.1 C4h.2 C4h.3 C4h.4 C4h.5 C4h.6 D4.1 D4.2 D4.3 D4.4 D4.5 D4.6 D4.7 D4.8 D4.9 D4.10 C4v.1 C4v.2 C4v.3 C4v.4 C4v.5 C4v.6 C4v.7 C4v.8 C4v.9 C4v.10 C4v.11 C4v.12 D2d.1 D2d.2 D2d.3 D2d.4 D2d.5 D2d.6 D2d.7 D2d.8 D2d.9 D2d.10 D2d.11 D2d.12 D4h.1 D4h.2 D4h.3 D4h.4 D4h.5 D4h.6 D4h.7 D4h.8 D4h.9 D4h.10 D4h.11 D4h.12 D4h.13 D4h.14 D4h.15 D4h.16 D4h.17 D4h.18 D4h.19 D4h.20 C3.1 C3.2 C3.3 C3.4 C3i.1 C3i.2 D3.1 D3.2 D3.3 D3.4 D3.5 D3.6 D3.7 C3v.1 C3v.2 C3v.3 C3v.4 C3v.5 C3v.6 D3d.1 D3d.2 D3d.3 D3d.4 D3d.5 D3d.6 C6.1 C6.2 C6.3 C6.4 C6.5 C6.6 C3h.1 C6h.1 C6h.2 D6.1 D6.2 D6.3 D6.4 D6.5 D6.6 C6v.1 C6v.2 C6v.3 C6v.4 D3h.1 D3h.2 D3h.3 D3h.4 D6h.1 D6h.2 D6h.3 D6h.4 T.1 T.2 T.3 T.4 T.5 Th.1 Th.2 Th.3 Th.4 Th.5 Th.6 Th.7 O.1 O.2 O.3 O.4 O.5 O.6 O.7 O.8 Td.1 Td.2 Td.3 Td.4 Td.5 Td.6 Oh.1 Oh.2 Oh.3 Oh.4 Oh.5 Oh.6 Oh.7 Oh.8 Oh.9 Oh.10 save_ save_colour_RGB loop_ _enumeration_set.state _enumeration_set.detail black '[ 000, 000, 000 ]' white '[ 255, 255, 255 ]' grey '[ 192, 192, 192 ]' grey_light '[ 211, 211, 211 ]' grey_slate '[ 112, 128, 144 ]' blue '[ 000, 000, 255 ]' blue_light '[ 176, 224, 230 ]' blue_medium '[ 000, 000, 205 ]' blue_dark '[ 025, 025, 112 ]' blue_navy '[ 000, 000, 128 ]' blue_royal '[ 065, 105, 225 ]' blue_sky '[ 135, 206, 235 ]' blue_steel '[ 070, 130, 180 ]' turquoise '[ 064, 224, 208 ]' cyan '[ 000, 255, 255 ]' cyan_light '[ 224, 255, 255 ]' green '[ 000, 255, 000 ]' green_light '[ 152, 251, 152 ]' green_dark '[ 000, 100, 000 ]' green_sea '[ 046, 139, 087 ]' green_lime '[ 050, 205, 050 ]' green_olive '[ 107, 142, 035 ]' green_khaki '[ 240, 230, 140 ]' yellow '[ 255, 255, 000 ]' yellow_light '[ 255, 255, 224 ]' yellow_gold '[ 255, 215, 000 ]' brown '[ 165, 042, 042 ]' brown_sienna '[ 160, 082, 045 ]' brown_beige '[ 245, 245, 220 ]' brown_tan '[ 210, 180, 140 ]' salmon '[ 250, 128, 114 ]' salmon_light '[ 255, 160, 122 ]' salmon_dark '[ 233, 150, 122 ]' orange '[ 255, 165, 000 ]' orange_dark '[ 255, 140, 000 ]' red '[ 255, 000, 000 ]' red_coral '[ 255, 127, 080 ]' red_tomato '[ 255, 099, 071 ]' red_orange '[ 255, 069, 000 ]' red_violet '[ 219, 112, 147 ]' red_maroon '[ 176, 048, 096 ]' pink '[ 255, 192, 203 ]' pink_light '[ 255, 182, 193 ]' pink_deep '[ 255, 020, 147 ]' pink_hot '[ 255, 105, 180 ]' violet '[ 238, 130, 238 ]' violet_red '[ 208, 032, 144 ]' violet_magenta '[ 255, 000, 255 ]' violet_dark '[ 148, 000, 211 ]' violet_blue '[ 138, 043, 226 ]' save_ save_element_symbol loop_ _enumeration_set.state _enumeration_set.detail Ac Actinium Ag Silver Al Aluminum Am Americium Ar Argon As Arsenic At Astatine Au Gold B Boron Ba Barium Be Beryllium Bh Bohrium Bi Bismuth Bk Berkelium Br Bromine C Carbon Ca Calcium Cd Cadmium Ce Cerium Cf Californium Cl Chlorine Cm Curium Cn Copernicium Co Cobalt Cr Chromium Cs Cesium Cu Copper Db Dubnium Ds Darmstadtium Dy Dysprosium Er Erbium Es Einsteinium Eu Europium F Fluorine Fe Iron Fm Fermium Fr Francium Ga Gallium Gd Gadolinium Ge Germanium H Hydrogen He Helium Hf Hafnium Hg Mercury Ho Holmium Hs Hassium I Iodine In Indium Ir Iridium K Potassium Kr Krypton La Lanthanum Li Lithium Lr Lawrencium Lu Lutetium Md Mendelevium Mg Magnesium Mn Manganese Mo Molybdenum Mt Meitnerium N Nitrogen Na Sodium Ne Neon Nb Niobium Nd Neodymium Ni Nickel No Nobelium Np Neptunium O Oxygen Os Osmium P Phosphorus Pd Palladium Po Polonium Pb Lead Pt Platinum Pr Praseodymium Pm Promethium Pu Plutonium Pa Protactinium Ra Radium Rb Rubidium Re Rhenium Rf Rutherfordium Rg Roentgenium Rh Rhodium Rn Radon Ru Ruthenium S Sulfur Sb Antimony Sc Scandium Se Selenium Sg Seaborgium Si Silicon Sm Samarium Sn Tin Sr Strontium Ta Tantalum Tb Terbium Tc Technetium Te Tellurium Th Thorium Ti Titanium Tl Thallium Tm Thulium U Uranium V Vanadium W Tungsten Xe Xenon Y Yttrium Yb Ytterbium Zn Zinc Zr Zirconium save_ save_units_code loop_ _enumeration_set.state _enumeration_set.detail 'none' "dimensionless - e.g. a ratio, factor, weight or scale" 'coulomb' "electronic charge in Coulombs" 'electron_volts' "electronic charge in electron volts eV" 'metres' "length 'metres (meters * 10^(0))'" 'centimetres' "length 'centimetres (meters * 10^( -2))'" 'millimetres' "length 'millimetres (meters * 10^( -3))'" 'nanometres' "length 'nanometres (meters * 10^( -9))'" 'angstroms' "length 'angstroms (meters * 10^(-10))'" 'picometres' "length 'picometres (meters * 10^(-12))'" 'femtometres' "length 'femtometres (meters * 10^(-15))'" 'reciprocal_centimetres' "per_length 'reciprocal centimetres (meters * 10^( -2)^-1)'" 'reciprocal_millimetres' "per_length 'reciprocal millimetres (meters * 10^( -3)^-1)'" 'reciprocal_nanometres' "per-length 'reciprocal nanometres (meters * 10^( -9)^-1)'" 'reciprocal_angstroms' "per-length 'reciprocal angstroms (meters * 10^(-10)^-1)'" 'reciprocal_angstrom_squared' "per-area 'reciprocal angstroms^2'" 'reciprocal_picometres' "per-length 'reciprocal picometres (meters * 10^(-12)^-1)'" 'nanometre_squared' "length_squared 'nanometres squared (meters * 10^( -9))^2'" 'angstrom_squared' "length_squared 'angstroms squared (meters * 10^(-10))^2'" '8pi_angstroms_squared' "length_squared '8pi^2 * angstroms squared (meters * 10^(-10))^2'" 'picometre_squared' "length_squared 'picometres squared (meters * 10^(-12))^2'" 'femtometre_squared' "length_squared 'femtometres squared (meters * 10^(-12))^2'" 'nanometre_cubed' "length_cubed 'nanometres cubed (meters * 10^( -9))^3'" 'angstrom_cubed' "length_cubed 'angstroms cubed (meters * 10^(-10))^3'" 'picometre_cubed' "length_cubed 'picometres cubed (meters * 10^(-12))^3'" 'grams_per_centimetre_cubed' "density 'grams per cubic centimetre'" 'kilograms_per_metre_cubed' "density 'kilograms per cubic metre'" 'megagrams_per_metre_cubed' "density 'megagrams per cubic metre'" 'angstrom_cubed_per_dalton' "density 'angstrom cubed per Dalton'" 'kilopascals' "pressure 'kilopascals'" 'gigapascals' "pressure 'gigapascals'" 'hours' "time 'hours'" 'minutes' "time 'minutes'" 'seconds' "time 'seconds'" 'microseconds' "time 'microseconds'" 'degrees' "angle 'degrees (of arc)'" 'cycles' "phase 'angle in 360 degree arcs'" 'radians' "angle 'radians'" 'degrees_squared' "angle 'degrees (of arc)'" 'degree_per_minute' "rotation_per_time 'degrees (of arc) per minute'" 'celsius' "temperature 'degrees (of temperature) Celsius'" 'kelvins' "temperature 'degrees (of temperature) Kelvin'" 'kelvins_per_minute' "cooling rate 'degrees Kelvin per minute'" 'electrons' "electrons 'electrons'" 'electron_squared' "electrons-squared 'electrons squared'" 'electrons_per_nanometre_cubed' "electron-density 'electrons per nanometres cubed (meters * 10^( -9))^3'" 'electrons_per_angstrom_cubed' "electron-density 'electrons per angstroms cubed (meters * 10^(-10))^3'" 'electrons_per_picometre_cubed' "electron-density 'electrons per picometres cubed (meters * 10^(-12))^3'" 'dalton' "standard atomic mass unit" 'pixels_per_millimetre' "area resolution unit" 'pixels_per_element' "area resolution unit" 'kilowatts' "power 'kilowatts'" 'milliamperes' "current 'milliamperes'" 'kilovolts' "emf 'kilovolts'" 'volt_squared' "emf 'volts squared'" 'Bohr_magnetons' "magnetic moment" 'arbitrary' "arbitrary 'arbitrary system of units'" 'counts_per_photon' "measure of gain used in iarray detectors" save_ #--------------------------------------------------------------------------- save_atomic_number loop_ _enumeration_default.index _enumeration_default.value H 01 D 01 H1- 01 He 02 Li 03 Li1+ 03 Be 04 Be2+ 04 B 05 C 06 N 07 O 08 O1- 08 F 09 F1- 09 Ne 10 Na 11 Na1+ 11 Mg 12 Mg2+ 12 Al 13 Al3+ 13 Si 14 Si4+ 14 P 15 S 16 Cl 17 Cl1- 17 Ar 18 K 19 K1+ 19 Ca 20 Ca2+ 20 Sc 21 Sc3+ 21 Ti 22 Ti2+ 22 Ti3+ 22 Ti4+ 22 V 23 V2+ 23 V3+ 23 V5+ 23 Cr 24 Cr2+ 24 Cr3+ 24 Mn 25 Mn2+ 25 Mn3+ 25 Mn4+ 25 Fe 26 Fe2+ 26 Fe3+ 26 Co 27 Co2+ 27 Co3+ 27 Ni 28 Ni2+ 28 Ni3+ 28 Cu 29 Cu1+ 29 Cu2+ 29 Zn 30 Zn2+ 30 Ga 31 Ga3+ 31 Ge 32 Ge4+ 32 As 33 Se 34 Br 35 Br1- 35 Kr 36 Rb 37 Rb1+ 37 Sr 38 Sr2+ 38 Y 39 Y3+ 39 Zr 40 Zr4+ 40 Nb 41 Nb3+ 41 Nb5+ 41 Mo 42 Mo3+ 42 Mo5+ 42 Mo6+ 42 Tc 43 Ru 44 Ru3+ 44 Ru4+ 44 Rh 45 Rh3+ 45 Rh4+ 45 Pd 46 Pd2+ 46 Pd4+ 46 Ag 47 Ag1+ 47 Ag2+ 47 Cd 48 Cd2+ 48 In 49 In3+ 49 Sn 50 Sn2+ 50 Sn4+ 50 Sb 51 Sb3+ 51 Sb5+ 51 Te 52 I 53 I1- 53 Xe 54 Cs 55 Cs1+ 55 Ba 56 Ba2+ 56 La 57 La3+ 57 Ce 58 Ce3+ 58 Ce4+ 58 Pr 59 Pr3+ 59 Pr4+ 59 Nd 60 Nd3+ 60 Pm 61 Sm 62 Sm3+ 62 Eu 63 Eu2+ 63 Eu3+ 63 Gd 64 Gd3+ 64 Tb 65 Tb3+ 65 Dy 66 Dy3+ 66 Ho 67 Ho3+ 67 Er 68 Er3+ 68 Tm 69 Tm3+ 69 Yb 70 Yb2+ 70 Yb3+ 70 Lu 71 Lu3+ 71 Hf 72 Hf4+ 72 Ta 73 Ta5+ 73 W 74 W6+ 74 Re 75 Os 76 Os4+ 76 Ir 77 Ir3+ 77 Ir4+ 77 Pt 78 Pt2+ 78 Pt4+ 78 Au 79 Au1+ 79 Au3+ 79 Hg 80 Hg1+ 80 Hg2+ 80 Tl 81 TL1+ 81 Tl3+ 81 Pb 82 Pb2+ 82 Pb4+ 82 Bi 83 Bi3+ 83 Bi5+ 83 Po 84 At 85 Rn 86 Fr 87 Ra 88 Ra2+ 88 Ac 89 Ac3+ 89 Th 90 Th4+ 90 Pa 91 U 92 U3+ 92 U4+ 92 U6+ 92 Np 93 Np3+ 93 Np4+ 93 Np6+ 93 Pu 94 Pu3+ 94 Pu4+ 94 Pu6+ 94 Am 95 Cm 96 Bk 97 Cf 98 save_ save_electron_count loop_ _enumeration_default.index _enumeration_default.value H 01 D 01 H1- 02 He 02 Li 03 Li1+ 02 Be 04 Be2+ 02 B 05 C 06 N 07 O 08 O1- 09 F 09 F1- 10 Ne 10 Na 11 Na1+ 10 Mg 12 Mg2+ 10 Al 13 Al3+ 10 Si 14 Si4+ 10 P 15 S 16 Cl 17 Cl1- 18 Ar 18 K 19 K1+ 18 Ca 20 Ca2+ 18 Sc 21 Sc3+ 18 Ti 22 Ti2+ 20 Ti3+ 19 Ti4+ 18 V 23 V2+ 21 V3+ 20 V5+ 18 Cr 24 Cr2+ 22 Cr3+ 21 Mn 25 Mn2+ 23 Mn3+ 22 Mn4+ 21 Fe 26 Fe2+ 24 Fe3+ 23 Co 27 Co2+ 25 Co3+ 24 Ni 28 Ni2+ 26 Ni3+ 25 Cu 29 Cu1+ 28 Cu2+ 27 Zn 30 Zn2+ 28 Ga 31 Ga3+ 28 Ge 32 Ge4+ 28 As 33 Se 34 Br 35 Br1- 36 Kr 36 Rb 37 Rb1+ 36 Sr 38 Sr2+ 36 Y 39 Y3+ 36 Zr 40 Zr4+ 36 Nb 41 Nb3+ 38 Nb5+ 36 Mo 42 Mo3+ 39 Mo5+ 37 Mo6+ 36 Tc 43 Ru 44 Ru3+ 41 Ru4+ 40 Rh 45 Rh3+ 42 Rh4+ 41 Pd 46 Pd2+ 44 Pd4+ 42 Ag 47 Ag1+ 46 Ag2+ 45 Cd 48 Cd2+ 46 In 49 In3+ 46 Sn 50 Sn2+ 48 Sn4+ 46 Sb 51 Sb3+ 48 Sb5+ 46 Te 52 I 53 I1- 54 Xe 54 Cs 55 Cs1+ 54 Ba 56 Ba2+ 54 La 57 La3+ 54 Ce 58 Ce3+ 55 Ce4+ 54 Pr 59 Pr3+ 56 Pr4+ 55 Nd 60 Nd3+ 57 Pm 61 Sm 62 Sm3+ 59 Eu 63 Eu2+ 61 Eu3+ 60 Gd 64 Gd3+ 61 Tb 65 Tb3+ 62 Dy 66 Dy3+ 63 Ho 67 Ho3+ 64 Er 68 Er3+ 65 Tm 69 Tm3+ 66 Yb 70 Yb2+ 68 Yb3+ 67 Lu 71 Lu3+ 68 Hf 72 Hf4+ 68 Ta 73 Ta5+ 68 W 74 W6+ 68 Re 75 Os 76 Os4+ 72 Ir 77 Ir3+ 74 Ir4+ 73 Pt 78 Pt2+ 76 Pt4+ 74 Au 79 Au1+ 78 Au3+ 76 Hg 80 Hg1+ 79 Hg2+ 78 Tl 81 TL1+ 80 Tl3+ 78 Pb 82 Pb2+ 80 Pb4+ 78 Bi 83 Bi3+ 80 Bi5+ 78 Po 84 At 85 Rn 86 Fr 87 Ra 88 Ra2+ 86 Ac 89 Ac3+ 86 Th 90 Th4+ 86 Pa 91 U 92 U3+ 89 U4+ 88 U6+ 84 Np 93 Np3+ 90 Np4+ 89 Np6+ 87 Pu 94 Pu3+ 91 Pu4+ 90 Pu6+ 88 Am 95 Cm 96 Bk 97 Cf 98 save_ save_ion_to_element loop_ _enumeration_default.index _enumeration_default.value H H D D H1- H He He Li Li Li1+ Li Be Be Be2+ Be B B C C N N O O O1- O F F F1- F Ne Ne Na Na Na1+ Na Mg Mg Mg2+ Mg Al Al Al3+ Al Si Si Si4+ Si P P S S Cl Cl Cl1- Cl Ar Ar K K K1+ K Ca Ca Ca2+ Ca Sc Sc Sc3+ Sc Ti Ti Ti2+ Ti Ti3+ Ti Ti4+ Ti V V V2+ V V3+ V V5+ V Cr Cr Cr2+ Cr Cr3+ Cr Mn Mn Mn2+ Mn Mn3+ Mn Mn4+ Mn Fe Fe Fe2+ Fe Fe3+ Fe Co Co Co2+ Co Co3+ Co Ni Ni Ni2+ Ni Ni3+ Ni Cu Cu Cu1+ Cu Cu2+ Cu Zn Zn Zn2+ Zn Ga Ga Ga3+ Ga Ge Ge Ge4+ Ge As As Se Se Br Br Br1- Br Kr Kr Rb Rb Rb1+ Rb Sr Sr Sr2+ Sr Y Y Y3+ Y Zr Zr Zr4+ Zr Nb Nb Nb3+ Nb Nb5+ Nb Mo Mo Mo3+ Mo Mo5+ Mo Mo6+ Mo Tc Tc Ru Ru Ru3+ Ru Ru4+ Ru Rh Rh Rh3+ Rh Rh4+ Rh Pd Pd Pd2+ Pd Pd4+ Pd Ag Ag Ag1+ Ag Ag2+ Ag Cd Cd Cd2+ Cd In In In3+ In Sn Sn Sn2+ Sn Sn4+ Sn Sb Sb Sb3+ Sb Sb5+ Sb Te Te I I I1- I Xe Xe Cs Cs Cs1+ Cs Ba Ba Ba2+ Ba La La La3+ La Ce Ce Ce3+ Ce Ce4+ Ce Pr Pr Pr3+ Pr Pr4+ Pr Nd Nd Nd3+ Nd Pm Pm Sm Sm Sm3+ Sm Eu Eu Eu2+ Eu Eu3+ Eu Gd Gd Gd3+ Gd Tb Tb Tb3+ Tb Dy Dy Dy3+ Dy Ho Ho Ho3+ Ho Er Er Er3+ Er Tm Tm Tm3+ Tm Yb Yb Yb2+ Yb Yb3+ Yb Lu Lu Lu3+ Lu Hf Hf Hf4+ Hf Ta Ta Ta5+ Ta W W W6+ W Re Re Os Os Os4+ Os Ir Ir Ir3+ Ir Ir4+ Ir Pt Pt Pt2+ Pt Pt4+ Pt Au Au Au1+ Au Au3+ Au Hg Hg Hg1+ Hg Hg2+ Hg Tl Tl TL1+ Tl Tl3+ Tl Pb Pb Pb2+ Pb Pb4+ Pb Bi Bi Bi3+ Bi Bi5+ Bi Po Po At At Rn Rn Fr Fr Ra Ra Ra2+ Ra Ac Ac Ac3+ Ac Th Th Th4+ Th Pa Pa U U U3+ U U4+ U U6+ U Np Np Np3+ Np Np4+ Np Np6+ Np Pu Pu Pu3+ Pu Pu4+ Pu Pu6+ Pu Am Am Cm Cm Bk Bk Cf Cf save_ save_atomic_mass loop_ _enumeration_default.index _enumeration_default.value H 1.008 D 2.008 H1- 1.008 He 4.003 Li 6.941 Li1+ 6.941 Be 9.012 Be2+ 9.012 B 10.811 C 12.011 N 14.007 O 15.999 O1- 15.999 F 18.998 F1- 18.998 Ne 20.179 Na 22.990 Na1+ 22.990 Mg 24.305 Mg2+ 24.305 Al 26.982 Al3+ 26.982 Si 28.086 Si4+ 28.086 P 30.974 S 32.066 Cl 35.453 Cl1- 35.453 Ar 39.948 K 39.098 K1+ 39.098 Ca 40.078 Ca2+ 40.078 Sc 44.956 Sc3+ 44.956 Ti 47.88 Ti2+ 47.88 Ti3+ 47.88 Ti4+ 47.88 V 50.942 V2+ 50.942 V3+ 50.942 V5+ 50.942 Cr 51.996 Cr2+ 51.996 Cr3+ 51.996 Mn 54.938 Mn2+ 54.938 Mn3+ 54.938 Mn4+ 54.938 Fe 55.847 Fe2+ 55.847 Fe3+ 55.847 Co 58.933 Co2+ 58.933 Co3+ 58.933 Ni 58.69 Ni2+ 58.69 Ni3+ 58.69 Cu 63.546 Cu1+ 63.546 Cu2+ 63.546 Zn 65.39 Zn2+ 65.39 Ga 69.723 Ga3+ 69.723 Ge 72.59 Ge4+ 72.59 As 74.922 Se 78.96 Br 79.904 Br1- 79.904 Kr 83.80 Rb 85.468 Rb1+ 85.468 Sr 87.62 Sr2+ 87.62 Y 88.906 Y3+ 88.906 Zr 91.224 Zr4+ 91.224 Nb 92.906 Nb3+ 92.906 Nb5+ 92.906 Mo 95.94 Mo3+ 95.94 Mo5+ 95.94 Mo6+ 95.94 Tc 98.906 Ru 101.07 Ru3+ 101.07 Ru4+ 101.07 Rh 102.906 Rh3+ 102.906 Rh4+ 102.906 Pd 106.42 Pd2+ 106.42 Pd4+ 106.42 Ag 107.868 Ag1+ 107.868 Ag2+ 107.868 Cd 112.41 Cd2+ 112.41 In 114.82 In3+ 114.82 Sn 118.71 Sn2+ 118.71 Sn4+ 118.71 Sb 121.75 Sb3+ 121.75 Sb5+ 121.75 Te 127.60 I 126.905 I1- 126.905 Xe 131.29 Cs 132.905 Cs1+ 132.905 Ba 137.33 Ba2+ 137.33 La 138.906 La3+ 138.906 Ce 140.12 Ce3+ 140.12 Ce4+ 140.12 Pr 140.908 Pr3+ 140.908 Pr4+ 140.908 Nd 144.24 Nd3+ 144.24 Pm 147. Sm 150.36 Sm3+ 150.36 Eu 151.96 Eu2+ 151.96 Eu3+ 151.96 Gd 157.25 Gd3+ 157.25 Tb 158.926 Tb3+ 158.926 Dy 162.5 Dy3+ 162.5 Ho 164.93 Ho3+ 164.93 Er 167.26 Er3+ 167.26 Tm 168.934 Tm3+ 168.934 Yb 173.04 Yb2+ 173.04 Yb3+ 173.04 Lu 174.967 Lu3+ 174.967 Hf 178.49 Hf4+ 178.49 Ta 180.948 Ta5+ 180.948 W 183.85 W6+ 183.85 Re 186.207 Os 190.2 Os4+ 190.2 Ir 192.22 Ir3+ 192.22 Ir4+ 192.22 Pt 195.08 Pt2+ 195.08 Pt4+ 195.08 Au 196.966 Au1+ 196.966 Au3+ 196.966 Hg 200.59 Hg1+ 200.59 Hg2+ 200.59 Tl 204.383 TL1+ 204.383 Tl3+ 204.383 Pb 207.2 Pb2+ 207.2 Pb4+ 207.2 Bi 208.980 Bi3+ 208.980 Bi5+ 208.980 Po 209. At 210. Rn 222. Fr 223. Ra 226.025 Ra2+ 226.025 Ac 227. Ac3+ 227. Th 232.038 Th4+ 232.038 Pa 231.036 U 238.029 U3+ 238.029 U4+ 238.029 U6+ 238.029 Np 237.048 Np3+ 237.048 Np4+ 237.048 Np6+ 237.048 Pu 242. Pu3+ 242. Pu4+ 242. Pu6+ 242. Am 243. Cm 247. Bk 247. Cf 249. save_ save_radius_bond loop_ _enumeration_default.index _enumeration_default.value H 0.37 D 0.37 H1- 0.37 He 0.40 Li 1.23 Li1+ 1.23 Be 0.89 Be2+ 0.89 B 0.80 C 0.77 N 0.74 O 0.74 O1- 0.74 F 0.72 F1- 0.72 Ne 0.72 Na 1.57 Na1+ 1.57 Mg 1.36 Mg2+ 1.36 Al 1.25 Al3+ 1.25 Si 1.17 Si4+ 1.17 P 1.10 S 1.04 Cl 0.99 Cl1- 0.99 Ar 1.00 K 2.03 K1+ 2.03 Ca 1.74 Ca2+ 1.74 Sc 1.44 Sc3+ 1.44 Ti 1.35 Ti2+ 1.35 Ti3+ 1.35 Ti4+ 1.35 V 1.22 V2+ 1.22 V3+ 1.22 V5+ 1.22 Cr 1.17 Cr2+ 1.17 Cr3+ 1.17 Mn 1.17 Mn2+ 1.17 Mn3+ 1.17 Mn4+ 1.17 Fe 1.17 Fe2+ 1.17 Fe3+ 1.17 Co 1.16 Co2+ 1.16 Co3+ 1.16 Ni 1.15 Ni2+ 1.15 Ni3+ 1.15 Cu 1.17 Cu1+ 1.17 Cu2+ 1.17 Zn 1.25 Zn2+ 1.25 Ga 1.25 Ga3+ 1.25 Ge 1.22 Ge4+ 1.22 As 1.21 Se 1.17 Br 1.14 Br1- 1.14 Kr 1.14 Rb 2.16 Rb1+ 2.16 Sr 1.91 Sr2+ 1.91 Y 1.62 Y3+ 1.62 Zr 1.45 Zr4+ 1.45 Nb 1.34 Nb3+ 1.34 Nb5+ 1.34 Mo 1.29 Mo3+ 1.29 Mo5+ 1.29 Mo6+ 1.29 Tc 1.27 Ru 1.24 Ru3+ 1.24 Ru4+ 1.24 Rh 1.25 Rh3+ 1.25 Rh4+ 1.25 Pd 1.28 Pd2+ 1.28 Pd4+ 1.28 Ag 1.34 Ag1+ 1.34 Ag2+ 1.34 Cd 1.41 Cd2+ 1.41 In 1.50 In3+ 1.50 Sn 1.41 Sn2+ 1.41 Sn4+ 1.41 Sb 1.41 Sb3+ 1.41 Sb5+ 1.41 Te 1.37 I 1.33 I1- 1.33 Xe 1.33 Cs 2.35 Cs1+ 2.35 Ba 1.98 Ba2+ 1.98 La 1.69 La3+ 1.69 Ce 1.65 Ce3+ 1.65 Ce4+ 1.65 Pr 1.65 Pr3+ 1.65 Pr4+ 1.65 Nd 1.64 Nd3+ 1.64 Pm 1.63 Sm 1.66 Sm3+ 1.66 Eu 1.85 Eu2+ 1.85 Eu3+ 1.85 Gd 1.61 Gd3+ 1.61 Tb 1.59 Tb3+ 1.59 Dy 1.59 Dy3+ 1.59 Ho 1.58 Ho3+ 1.58 Er 1.57 Er3+ 1.57 Tm 1.56 Tm3+ 1.56 Yb 1.70 Yb2+ 1.70 Yb3+ 1.70 Lu 1.56 Lu3+ 1.56 Hf 1.44 Hf4+ 1.44 Ta 1.34 Ta5+ 1.34 W 1.30 W6+ 1.30 Re 1.28 Os 1.26 Os4+ 1.26 Ir 1.26 Ir3+ 1.26 Ir4+ 1.26 Pt 1.29 Pt2+ 1.29 Pt4+ 1.29 Au 1.34 Au1+ 1.34 Au3+ 1.34 Hg 1.44 Hg1+ 1.44 Hg2+ 1.44 Tl 1.55 TL1+ 1.55 Tl3+ 1.55 Pb 1.54 Pb2+ 1.54 Pb4+ 1.54 Bi 1.52 Bi3+ 1.52 Bi5+ 1.52 Po 1.53 At 1.53 Rn 1.53 Fr 1.53 Ra 1.53 Ra2+ 1.53 Ac 1.53 Ac3+ 1.53 Th 1.65 Th4+ 1.65 Pa 1.53 U 1.42 U3+ 1.42 U4+ 1.42 U6+ 1.42 Np 1.42 Np3+ 1.42 Np4+ 1.42 Np6+ 1.42 Pu 1.42 Pu3+ 1.42 Pu4+ 1.42 Pu6+ 1.42 Am 1.42 Cm 1.42 Bk 1.42 Cf 1.42 save_ save_length_neutron loop_ _enumeration_default.index _enumeration_default.value H -3.739 D 6.671 H1- -3.739 He 3.26 Li -1.90 Li1+ -1.90 Be 7.79 Be2+ 7.79 B 5.30 C 6.646 N 9.36 O 5.803 O1- 5.803 F 5.654 F1- 5.654 Ne 4.547 Na 3.63 Na1+ 3.63 Mg 5.375 Mg2+ 5.375 Al 3.449 Al3+ 3.449 Si 4.149 Si4+ 4.149 P 5.13 S 2.847 Cl 9.577 Cl1- 9.577 Ar 1.909 K 3.71 K1+ 3.71 Ca 4.90 Ca2+ 4.90 Sc 12.29 Sc3+ 12.29 Ti -3.438 Ti2+ -3.438 Ti3+ -3.438 Ti4+ -3.438 V -0.3824 V2+ -0.382 V3+ -0.382 V5+ -0.382 Cr 3.635 Cr2+ 3.635 Cr3+ 3.635 Mn -3.73 Mn2+ -3.73 Mn3+ -3.73 Mn4+ -3.73 Fe 9.54 Fe2+ 9.54 Fe3+ 9.54 Co 2.50 Co2+ 2.50 Co3+ 2.50 Ni 10.3 Ni2+ 10.3 Ni3+ 10.3 Cu 7.718 Cu1+ 7.718 Cu2+ 7.718 Zn 5.689 Zn2+ 5.689 Ga 7.287 Ga3+ 7.287 Ge 8.192 Ge4+ 8.192 As 6.58 Se 7.970 Br 6.795 Br1- 6.795 Kr 7.80 Rb 7.08 Rb1+ 7.08 Sr 7.02 Sr2+ 7.02 Y 7.75 Y3+ 7.75 Zr 7.16 Zr4+ 7.16 Nb 7.054 Nb3+ 7.054 Nb5+ 7.054 Mo 6.95 Mo3+ 6.95 Mo5+ 6.95 Mo6+ 6.95 Tc 6.8 Ru 7.21 Ru3+ 7.21 Ru4+ 7.21 Rh 5.88 Rh3+ 5.88 Rh4+ 5.88 Pd 5.91 Pd2+ 5.91 Pd4+ 5.91 Ag 5.922 Ag1+ 5.922 Ag2+ 5.922 Cd 5.1 Cd2+ 5.1 In 4.065 In3+ 4.065 Sn 6.225 Sn2+ 6.225 Sn4+ 6.225 Sb 5.57 Sb3+ 5.57 Sb5+ 5.57 Te 5.80 I 5.28 I1- 5.28 Xe 4.85 Cs 5.42 Cs1+ 5.42 Ba 5.06 Ba2+ 5.06 La 8.24 La3+ 8.24 Ce 4.84 Ce3+ 4.84 Ce4+ 4.84 Pr 4.45 Pr3+ 4.45 Pr4+ 4.45 Nd 7.69 Nd3+ 7.69 Pm 12.6 Sm 4.2 Sm3+ 4.2 Eu 6.73 Eu2+ 6.73 Eu3+ 6.73 Gd 9.5 Gd3+ 9.5 Tb 7.38 Tb3+ 7.38 Dy 16.9 Dy3+ 16.9 Ho 8.08 Ho3+ 8.08 Er 8.03 Er3+ 8.03 Tm 7.07 Tm3+ 7.07 Yb 12.41 Yb2+ 12.41 Yb3+ 12.41 Lu 7.21 Lu3+ 7.21 Hf 7.77 Hf4+ 7.77 Ta 6.91 Ta5+ 6.91 W 4.77 W6+ 4.77 Re 9.2 Os 11.0 Os4+ 11.0 Ir 10.6 Ir3+ 10.6 Ir4+ 10.6 Pt 9.60 Pt2+ 9.60 Pt4+ 9.60 Au 7.63 Au1+ 7.63 Au3+ 7.63 Hg 12.692 Hg1+ 12.692 Hg2+ 12.692 Tl 8.776 TL1+ 8.776 Tl3+ 8.776 Pb 9.401 Pb2+ 9.401 Pb4+ 9.401 Bi 8.530 Bi3+ 8.530 Bi5+ 8.530 Po 0. At 0. Rn 0. Fr 0. Ra 10.0 Ra2+ 10.0 Ac 0. Ac3+ 0. Th 10.63 Th4+ 10.63 Pa 9.1 U 8.417 U3+ 8.417 U4+ 8.417 U6+ 8.417 Np 10.55 Np3+ 10.55 Np4+ 10.55 Np6+ 10.55 Pu 14.1 Pu3+ 14.1 Pu4+ 14.1 Pu6+ 14.1 Am 8.3 Cm 9.5 Bk 9.5 Cf 0. save_ save_dispersion_real_cu loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He .0 Li .001 Li1+ .001 Be .003 Be2+ .003 B .008 C .017 N .029 O .047 O1- .047 F .069 F1- .069 Ne .097 Na 0.129 Na1+ 0.129 Mg .165 Mg2+ .165 Al .204 Al3+ .204 Si .244 Si4+ .244 P .283 S .319 Cl .348 Cl1- .348 Ar .366 K .365 K1+ .365 Ca .341 Ca2+ .341 Sc 0.285 Sc3+ 0.285 Ti .189 Ti2+ .189 Ti3+ .189 Ti4+ .189 V .035 V2+ .035 V3+ .035 V5+ .035 Cr -.198 Cr2+ -.198 Cr3+ -.198 Mn -.568 Mn2+ -.568 Mn3+ -.568 Mn4+ -.568 Fe -1.179 Fe2+ -1.179 Fe3+ -1.179 Co -2.464 Co2+ -2.464 Co3+ -2.464 Ni -2.956 Ni2+ -2.956 Ni3+ -2.956 Cu -2.019 Cu1+ -2.019 Cu2+ -2.019 Zn -1.612 Zn2+ -1.612 Ga -1.354 Ga3+ -1.354 Ge -1.163 Ge4+ -1.163 As -1.011 Se -.879 Br -.767 Br1- -.767 Kr -.665 Rb -.574 Rb1+ -.574 Sr -.465 Sr2+ -.465 Y -.386 Y3+ -.386 Zr -.314 Zr4+ -.314 Nb -.248 Nb3+ -.248 Nb5+ -.248 Mo -.191 Mo3+ -.191 Mo5+ -.191 Mo6+ -.191 Tc -.145 Ru -.105 Ru3+ -.105 Ru4+ -.105 Rh -.077 Rh3+ -.077 Rh4+ -.077 Pd -.059 Pd2+ -.059 Pd4+ -.059 Ag -.06 Ag1+ -.06 Ag2+ -.06 Cd -.079 Cd2+ -.079 In -.126 In3+ -.126 Sn -.194 Sn2+ -.194 Sn4+ -.194 Sb -.287 Sb3+ -.287 Sb5+ -.287 Te -.418 I -.579 I1- -.579 Xe -.783 Cs -1.022 Cs1+ -1.022 Ba -1.334 Ba2+ -1.334 La -1.716 La3+ -1.716 Ce -2.17 Ce3+ -2.17 Ce4+ -2.17 Pr -2.939 Pr3+ -2.939 Pr4+ -2.939 Nd -3.431 Nd3+ -3.431 Pm -4.357 Sm -5.696 Sm3+ -5.696 Eu -7.718 Eu2+ -7.718 Eu3+ -7.718 Gd -9.242 Gd3+ -9.242 Tb -9.498 Tb3+ -9.498 Dy -10.423 Dy3+ -10.423 Ho -12.255 Ho3+ -12.255 Er -9.733 Er3+ -9.733 Tm -8.488 Tm3+ -8.488 Yb -7.701 Yb2+ -7.701 Yb3+ -7.701 Lu -7.133 Lu3+ -7.133 Hf -6.715 Hf4+ -6.715 Ta -6.351 Ta5+ -6.351 W -6.048 W6+ -6.048 Re -5.79 Os -5.581 Os4+ -5.581 Ir -5.391 Ir3+ -5.391 Ir4+ -5.391 Pt -5.233 Pt2+ -5.233 Pt4+ -5.233 Au -5.096 Au1+ -5.096 Au3+ -5.096 Hg -4.99 Hg1+ -4.99 Hg2+ -4.99 Tl -4.883 TL1+ -4.883 Tl3+ -4.883 Pb -4.818 Pb2+ -4.818 Pb4+ -4.818 Bi -4.776 Bi3+ -4.776 Bi5+ -4.776 Po -4.756 At -4.772 Rn -4.787 Fr -4.833 Ra -4.898 Ra2+ -4.898 Ac -4.994 Ac3+ -4.994 Th -5.091 Th4+ -5.091 Pa -5.216 U -5.359 U3+ -5.359 U4+ -5.359 U6+ -5.359 Np -5.529 Np3+ -5.529 Np4+ -5.529 Np6+ -5.529 Pu -5.712 Pu3+ -5.712 Pu4+ -5.712 Pu6+ -5.712 Am -5.93 Cm -6.176 Bk -6.498 Cf -6.798 save_ save_dispersion_imag_cu loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He .0 Li .0 Li1+ .0 Be .001 Be2+ .001 B .004 C .009 N .018 O .032 O1- .032 F .053 F1- .053 Ne .083 Na .124 Na1+ .124 Mg .177 Mg2+ .177 Al .246 Al3+ .246 Si .33 Si4+ .33 P .434 S .557 Cl .702 Cl1- .702 Ar .872 K 1.066 K1+ 1.066 Ca 1.286 Ca2+ 1.286 Sc 1.533 Sc3+ 1.533 Ti 1.807 Ti2+ 1.807 Ti3+ 1.807 Ti4+ 1.807 V 2.11 V2+ 2.11 V3+ 2.11 V5+ 2.11 Cr 2.443 Cr2+ 2.443 Cr3+ 2.443 Mn 2.808 Mn2+ 2.808 Mn3+ 2.808 Mn4+ 2.808 Fe 3.204 Fe2+ 3.204 Fe3+ 3.204 Co 3.608 Co2+ 3.608 Co3+ 3.608 Ni .509 Ni2+ .509 Ni3+ .509 Cu .589 Cu1+ .589 Cu2+ .589 Zn .678 Zn2+ .678 Ga 0.777 Ga3+ 0.777 Ge .886 Ge4+ .886 As 1.006 Se 1.139 Br 1.283 Br1- 1.283 Kr 1.439 Rb 1.608 Rb1+ 1.608 Sr 1.82 Sr2+ 1.82 Y 2.025 Y3+ 2.025 Zr 2.245 Zr4+ 2.245 Nb 2.482 Nb3+ 2.482 Nb5+ 2.482 Mo 2.735 Mo3+ 2.735 Mo5+ 2.735 Mo6+ 2.735 Tc 3.005 Ru 3.296 Ru3+ 3.296 Ru4+ 3.296 Rh 3.605 Rh3+ 3.605 Rh4+ 3.605 Pd 3.934 Pd2+ 3.934 Pd4+ 3.934 Ag 4.282 Ag1+ 4.282 Ag2+ 4.282 Cd 4.653 Cd2+ 4.653 In 5.045 In3+ 5.045 Sn 5.459 Sn2+ 5.459 Sn4+ 5.459 Sb 5.894 Sb3+ 5.894 Sb5+ 5.894 Te 6.352 I 6.835 I1- 6.835 Xe 7.348 Cs 7.904 Cs1+ 7.904 Ba 8.46 Ba2+ 8.46 La 9.036 La3+ 9.036 Ce 9.648 Ce3+ 9.648 Ce4+ 9.648 Pr 10.535 Pr3+ 10.535 Pr4+ 10.535 Nd 10.933 Nd3+ 10.933 Pm 11.614 Sm 12.32 Sm3+ 12.32 Eu 11.276 Eu2+ 11.276 Eu3+ 11.276 Gd 11.946 Gd3+ 11.946 Tb 9.242 Tb3+ 9.242 Dy 9.748 Dy3+ 9.748 Ho 3.704 Ho3+ 3.704 Er 3.937 Er3+ 3.937 Tm 4.181 Tm3+ 4.181 Yb 4.432 Yb2+ 4.432 Yb3+ 4.432 Lu 4.693 Lu3+ 4.693 Hf 4.977 Hf4+ 4.977 Ta 5.271 Ta5+ 5.271 W 5.577 W6+ 5.577 Re 5.891 Os 6.221 Os4+ 6.221 Ir 6.566 Ir3+ 6.566 Ir4+ 6.566 Pt 6.925 Pt2+ 6.925 Pt4+ 6.925 Au 7.297 Au1+ 7.297 Au3+ 7.297 Hg 7.686 Hg1+ 7.686 Hg2+ 7.686 Tl 8.089 TL1+ 8.089 Tl3+ 8.089 Pb 8.505 Pb2+ 8.505 Pb4+ 8.505 Bi 8.93 Bi3+ 8.93 Bi5+ 8.93 Po 9.383 At 9.843 Rn 10.317 Fr 10.803 Ra 11.296 Ra2+ 11.296 Ac 11.799 Ac3+ 11.799 Th 12.33 Th4+ 12.33 Pa 12.868 U 13.409 U3+ 13.409 U4+ 13.409 U6+ 13.409 Np 13.967 Np3+ 13.967 Np4+ 13.967 Np6+ 13.967 Pu 14.536 Pu3+ 14.536 Pu4+ 14.536 Pu6+ 14.536 Am 15.087 Cm 15.634 Bk 16.317 Cf 16.93 save_ save_dispersion_real_mo loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He .0 Li .0 Li1+ .0 Be .0 Be2+ .0 B .0 C .002 N .004 O .008 O1- .008 F .014 F1- .014 Ne .021 Na 0.03 Na1+ 0.03 Mg .042 Mg2+ .042 Al .056 Al3+ .056 Si .072 Si4+ .072 P .09 S .11 Cl .132 Cl1- .132 Ar .155 K .179 K1+ .179 Ca .203 Ca2+ .203 Sc 0.226 Sc3+ 0.226 Ti .248 Ti2+ .248 Ti3+ .248 Ti4+ .248 V .267 V2+ .267 V3+ .267 V5+ .267 Cr .284 Cr2+ .284 Cr3+ .284 Mn .295 Mn2+ .295 Mn3+ .295 Mn4+ .295 Fe .301 Fe2+ .301 Fe3+ .301 Co .299 Co2+ .299 Co3+ .299 Ni .285 Ni2+ .285 Ni3+ .285 Cu .263 Cu1+ .263 Cu2+ .263 Zn .222 Zn2+ .222 Ga 0.163 Ga3+ 0.163 Ge .081 Ge4+ .081 As -.03 Se -.178 Br -.374 Br1- -.374 Kr -.652 Rb -1.044 Rb1+ -1.044 Sr -1.657 Sr2+ -1.657 Y -2.951 Y3+ -2.951 Zr -2.965 Zr4+ -2.965 Nb -2.197 Nb3+ -2.197 Nb5+ -2.197 Mo -1.825 Mo3+ -1.825 Mo5+ -1.825 Mo6+ -1.825 Tc -1.59 Ru -1.42 Ru3+ -1.42 Ru4+ -1.42 Rh -1.287 Rh3+ -1.287 Rh4+ -1.287 Pd -1.177 Pd2+ -1.177 Pd4+ -1.177 Ag -1.085 Ag1+ -1.085 Ag2+ -1.085 Cd -1.005 Cd2+ -1.005 In -.936 In3+ -.936 Sn -.873 Sn2+ -.873 Sn4+ -.873 Sb -.816 Sb3+ -.816 Sb5+ -.816 Te -.772 I -.726 I1- -.726 Xe -.684 Cs -.644 Cs1+ -.644 Ba -.613 Ba2+ -.613 La -.588 La3+ -.588 Ce -.564 Ce3+ -.564 Ce4+ -.564 Pr -.53 Pr3+ -.53 Pr4+ -.53 Nd -.535 Nd3+ -.535 Pm -.53 Sm -.533 Sm3+ -.533 Eu -.542 Eu2+ -.542 Eu3+ -.542 Gd -.564 Gd3+ -.564 Tb -.591 Tb3+ -.591 Dy -.619 Dy3+ -.619 Ho -.666 Ho3+ -.666 Er -.723 Er3+ -.723 Tm -.795 Tm3+ -.795 Yb -.884 Yb2+ -.884 Yb3+ -.884 Lu -.988 Lu3+ -.988 Hf -1.118 Hf4+ -1.118 Ta -1.258 Ta5+ -1.258 W -1.421 W6+ -1.421 Re -1.598 Os -1.816 Os4+ -1.816 Ir -2.066 Ir3+ -2.066 Ir4+ -2.066 Pt -2.352 Pt2+ -2.352 Pt4+ -2.352 Au -2.688 Au1+ -2.688 Au3+ -2.688 Hg -3.084 Hg1+ -3.084 Hg2+ -3.084 Tl -3.556 TL1+ -3.556 Tl3+ -3.556 Pb -4.133 Pb2+ -4.133 Pb4+ -4.133 Bi -4.861 Bi3+ -4.861 Bi5+ -4.861 Po -5.924 At -7.444 Rn -8.862 Fr -7.912 Ra -7.62 Ra2+ -7.62 Ac -7.725 Ac3+ -7.725 Th -8.127 Th4+ -8.127 Pa -8.96 U -10.673 U3+ -10.673 U4+ -10.673 U6+ -10.673 Np -11.158 Np3+ -11.158 Np4+ -11.158 Np6+ -11.158 Pu -9.725 Pu3+ -9.725 Pu4+ -9.725 Pu6+ -9.725 Am -8.926 Cm -8.416 Bk -7.99 Cf -7.683 save_ save_dispersion_imag_mo loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He .0 Li .0 Li1+ .0 Be .0 Be2+ .0 B .001 C .002 N .003 O .006 O1- .006 F .01 F1- .01 Ne .016 Na .025 Na1+ .025 Mg .036 Mg2+ .036 Al .052 Al3+ .052 Si .071 Si4+ .071 P .095 S .124 Cl .159 Cl1- .159 Ar .201 K .25 K1+ .25 Ca .306 Ca2+ .306 Sc 0.372 Sc3+ 0.372 Ti .446 Ti2+ .446 Ti3+ .446 Ti4+ .446 V .53 V2+ .53 V3+ .53 V5+ .53 Cr .624 Cr2+ .624 Cr3+ .624 Mn .729 Mn2+ .729 Mn3+ .729 Mn4+ .729 Fe .845 Fe2+ .845 Fe3+ .845 Co .973 Co2+ .973 Co3+ .973 Ni 1.113 Ni2+ 1.113 Ni3+ 1.113 Cu 1.266 Cu1+ 1.266 Cu2+ 1.266 Zn 1.431 Zn2+ 1.431 Ga 1.609 Ga3+ 1.609 Ge 1.801 Ge4+ 1.801 As 2.007 Se 2.223 Br 2.456 Br1- 2.456 Kr 2.713 Rb 2.973 Rb1+ 2.973 Sr 3.264 Sr2+ 3.264 Y 3.542 Y3+ 3.542 Zr .56 Zr4+ .56 Nb 0.621 Nb3+ 0.621 Nb5+ 0.621 Mo .688 Mo3+ .688 Mo5+ .688 Mo6+ .688 Tc .759 Ru .836 Ru3+ .836 Ru4+ .836 Rh .919 Rh3+ .919 Rh4+ .919 Pd 1.007 Pd2+ 1.007 Pd4+ 1.007 Ag 1.101 Ag1+ 1.101 Ag2+ 1.101 Cd 1.202 Cd2+ 1.202 In 1.31 In3+ 1.31 Sn 1.424 Sn2+ 1.424 Sn4+ 1.424 Sb 1.546 Sb3+ 1.546 Sb5+ 1.546 Te 1.675 I 1.812 I1- 1.812 Xe 1.958 Cs 2.119 Cs1+ 2.119 Ba 2.282 Ba2+ 2.282 La 2.452 La3+ 2.452 Ce 2.632 Ce3+ 2.632 Ce4+ 2.632 Pr 2.845 Pr3+ 2.845 Pr4+ 2.845 Nd 3.018 Nd3+ 3.018 Pm 3.225 Sm 3.442 Sm3+ 3.442 Eu 3.669 Eu2+ 3.669 Eu3+ 3.669 Gd 3.904 Gd3+ 3.904 Tb 4.151 Tb3+ 4.151 Dy 4.41 Dy3+ 4.41 Ho 4.678 Ho3+ 4.678 Er 4.958 Er3+ 4.958 Tm 5.248 Tm3+ 5.248 Yb 5.548 Yb2+ 5.548 Yb3+ 5.548 Lu 5.858 Lu3+ 5.858 Hf 6.185 Hf4+ 6.185 Ta 6.523 Ta5+ 6.523 W 6.872 W6+ 6.872 Re 7.232 Os 7.605 Os4+ 7.605 Ir 7.99 Ir3+ 7.99 Ir4+ 7.99 Pt 8.388 Pt2+ 8.388 Pt4+ 8.388 Au 8.798 Au1+ 8.798 Au3+ 8.798 Hg 9.223 Hg1+ 9.223 Hg2+ 9.223 Tl 9.659 TL1+ 9.659 Tl3+ 9.659 Pb 10.102 Pb2+ 10.102 Pb4+ 10.102 Bi 10.559 Bi3+ 10.559 Bi5+ 10.559 Po 11.042 At 9.961 Rn 10.403 Fr 7.754 Ra 8.105 Ra2+ 8.105 Ac 8.472 Ac3+ 8.472 Th 8.87 Th4+ 8.87 Pa 9.284 U 9.654 U3+ 9.654 U4+ 9.654 U6+ 9.654 Np 4.148 Np3+ 4.148 Np4+ 4.148 Np6+ 4.148 Pu 4.33 Pu3+ 4.33 Pu4+ 4.33 Pu6+ 4.33 Am 4.511 Cm 4.697 Bk 4.908 Cf 5.107 save_ save_Cromer_Mann_a1 loop_ _enumeration_default.index _enumeration_default.value H .493002 D .493002 H1- .897661 He 0.8734 Li 1.1282 Li1+ .6968 Be 1.5919 Be2+ 6.2603 B 2.0545 C 2.31 N 12.2126 O 3.0485 O1- 4.1916 F 3.5392 F1- 3.6322 Ne 3.9553 Na 4.7626 Na1+ 3.2565 Mg 5.4204 Mg2+ 3.4988 Al 6.4202 Al3+ 4.17448 Si 6.2915 Si4+ 4.43918 P 6.4345 S 6.9053 Cl 11.4604 Cl1- 18.2915 Ar 7.4845 K 8.2186 K1+ 7.9578 Ca 8.6266 Ca2+ 15.6348 Sc 9.189 Sc3+ 13.4008 Ti 9.7595 Ti2+ 9.11423 Ti3+ 17.7344 Ti4+ 19.5114 V 10.2971 V2+ 10.106 V3+ 9.43141 V5+ 15.6887 Cr 10.6406 Cr2+ 9.54034 Cr3+ 9.6809 Mn 11.2819 Mn2+ 10.8061 Mn3+ 9.84521 Mn4+ 9.96253 Fe 11.7695 Fe2+ 11.0424 Fe3+ 11.1764 Co 12.2841 Co2+ 11.2296 Co3+ 10.338 Ni 12.8376 Ni2+ 11.4166 Ni3+ 10.7806 Cu 13.338 Cu1+ 11.9475 Cu2+ 11.8168 Zn 14.0743 Zn2+ 11.9719 Ga 15.2354 Ga3+ 12.692 Ge 16.0816 Ge4+ 12.9172 As 16.6723 Se 17.0006 Br 17.1789 Br1- 17.1718 Kr 17.3555 Rb 17.1784 Rb1+ 17.5816 Sr 17.5663 Sr2+ 18.0874 Y 17.776 Y3+ 17.9268 Zr 17.8765 Zr4+ 18.1668 Nb 17.6142 Nb3+ 19.8812 Nb5+ 17.9163 Mo 3.7025 Mo3+ 21.1664 Mo5+ 21.0149 Mo6+ 17.8871 Tc 19.1301 Ru 19.2674 Ru3+ 18.5638 Ru4+ 18.5003 Rh 19.2957 Rh3+ 18.8785 Rh4+ 18.8545 Pd 19.3319 Pd2+ 19.1701 Pd4+ 19.2493 Ag 19.2808 Ag1+ 19.1812 Ag2+ 19.1643 Cd 19.2214 Cd2+ 19.1514 In 19.1624 In3+ 19.1045 Sn 19.1889 Sn2+ 19.1094 Sn4+ 18.9333 Sb 19.6418 Sb3+ 18.9755 Sb5+ 19.8685 Te 19.9644 I 20.1472 I1- 20.2332 Xe 20.2933 Cs 20.3892 Cs1+ 20.3524 Ba 20.3361 Ba2+ 20.1807 La 20.578 La3+ 20.2489 Ce 21.1671 Ce3+ 20.8036 Ce4+ 20.3235 Pr 22.044 Pr3+ 21.3727 Pr4+ 20.9413 Nd 22.6845 Nd3+ 21.961 Pm 23.3405 Sm 24.0042 Sm3+ 23.1504 Eu 24.6274 Eu2+ 24.0063 Eu3+ 23.7497 Gd 25.0709 Gd3+ 24.3466 Tb 25.8976 Tb3+ 24.9559 Dy 26.507 Dy3+ 25.5395 Ho 26.9049 Ho3+ 26.1296 Er 27.6563 Er3+ 26.722 Tm 28.1819 Tm3+ 27.3083 Yb 28.6641 Yb2+ 28.1209 Yb3+ 27.8917 Lu 28.9476 Lu3+ 28.4628 Hf 29.144 Hf4+ 28.8131 Ta 29.2024 Ta5+ 29.1587 W 29.0818 W6+ 29.4936 Re 28.7621 Os 28.1894 Os4+ 30.419 Ir 27.3049 Ir3+ 30.4156 Ir4+ 30.7058 Pt 27.0059 Pt2+ 29.8429 Pt4+ 30.9612 Au 16.8819 Au1+ 28.0109 Au3+ 30.6886 Hg 20.6809 Hg1+ 25.0853 Hg2+ 29.5641 Tl 27.5446 TL1+ 21.3985 Tl3+ 30.8695 Pb 31.0617 Pb2+ 21.7886 Pb4+ 32.1244 Bi 33.3689 Bi3+ 21.8053 Bi5+ 33.5364 Po 34.6726 At 35.3163 Rn 35.5631 Fr 35.9299 Ra 35.7630 Ra2+ 35.2150 Ac 35.6597 Ac3+ 35.1736 Th 35.5645 Th4+ 35.1007 Pa 35.8847 U 36.0228 U3+ 35.5747 U4+ 35.3715 U6+ 34.8509 Np 36.1874 Np3+ 35.7074 Np4+ 35.5103 Np6+ 35.0136 Pu 36.5254 Pu3+ 35.8400 Pu4+ 35.6493 Pu6+ 35.1736 Am 36.6706 Cm 36.6488 Bk 36.7881 Cf 36.9185 save_ save_Cromer_Mann_b1 loop_ _enumeration_default.index _enumeration_default.value H 10.5109 D 10.5109 H1- 53.1368 He 9.1037 Li 3.9546 Li1+ 4.6237 Be 43.6427 Be2+ .0027 B 23.2185 C 20.8439 N .0057 O 13.2771 O1- 12.8573 F 10.2825 F1- 5.27756 Ne 8.4042 Na 3.285 Na1+ 2.6671 Mg 2.8275 Mg2+ 2.1676 Al 3.0387 Al3+ 1.93816 Si 2.4386 Si4+ 1.64167 P 1.9067 S 1.4679 Cl .0104 Cl1- .0066 Ar 0.9072 K 12.7949 K1+ 12.6331 Ca 10.4421 Ca2+ -.0074 Sc 9.0213 Sc3+ .29854 Ti 7.8508 Ti2+ 7.5243 Ti3+ .22061 Ti4+ .178847 V 6.8657 V2+ 6.8818 V3+ 6.39535 V5+ .679003 Cr 6.1038 Cr2+ 5.66078 Cr3+ 5.59463 Mn 5.3409 Mn2+ 5.2796 Mn3+ 4.91797 Mn4+ 4.8485 Fe 4.7611 Fe2+ 4.6538 Fe3+ 4.6147 Co 4.2791 Co2+ 4.1231 Co3+ 3.90969 Ni 3.8785 Ni2+ 3.6766 Ni3+ 3.5477 Cu 3.5828 Cu1+ 3.3669 Cu2+ 3.37484 Zn 3.2655 Zn2+ 2.9946 Ga 3.0669 Ga3+ 2.81262 Ge 2.8509 Ge4+ 2.53718 As 2.6345 Se 2.4098 Br 2.1723 Br1- 2.2059 Kr 1.9384 Rb 1.7888 Rb1+ 1.7139 Sr 1.5564 Sr2+ 1.4907 Y 1.4029 Y3+ 1.35417 Zr 1.27618 Zr4+ 1.2148 Nb 1.18865 Nb3+ .019175 Nb5+ 1.12446 Mo .2772 Mo3+ .014734 Mo5+ .014345 Mo6+ 1.03649 Tc .864132 Ru .80852 Ru3+ .847329 Ru4+ .844582 Rh .751536 Rh3+ .764252 Rh4+ .760825 Pd .698655 Pd2+ .696219 Pd4+ .683839 Ag .6446 Ag1+ .646179 Ag2+ .645643 Cd .5946 Cd2+ .597922 In .5476 In3+ .551522 Sn 5.8303 Sn2+ .5036 Sn4+ 5.764 Sb 5.3034 Sb3+ .467196 Sb5+ 5.44853 Te 4.81742 I 4.347 I1- 4.3579 Xe 3.9282 Cs 3.569 Cs1+ 3.552 Ba 3.216 Ba2+ 3.21367 La 2.94817 La3+ 2.9207 Ce 2.81219 Ce3+ 2.77691 Ce4+ 2.65941 Pr 2.77393 Pr3+ 2.6452 Pr4+ 2.54467 Nd 2.66248 Nd3+ 2.52722 Pm 2.5627 Sm 2.47274 Sm3+ 2.31641 Eu 2.3879 Eu2+ 2.27783 Eu3+ 2.22258 Gd 2.25341 Gd3+ 2.13553 Tb 2.24256 Tb3+ 2.05601 Dy 2.1802 Dy3+ 1.9804 Ho 2.07051 Ho3+ 1.91072 Er 2.07356 Er3+ 1.84659 Tm 2.02859 Tm3+ 1.78711 Yb 1.9889 Yb2+ 1.78503 Yb3+ 1.73272 Lu 1.90182 Lu3+ 1.68216 Hf 1.83262 Hf4+ 1.59136 Ta 1.77333 Ta5+ 1.50711 W 1.72029 W6+ 1.42755 Re 1.67191 Os 1.62903 Os4+ 1.37113 Ir 1.59279 Ir3+ 1.34323 Ir4+ 1.30923 Pt 1.51293 Pt2+ 1.32927 Pt4+ 1.24813 Au .4611 Au1+ 1.35321 Au3+ 1.2199 Hg .545 Hg1+ 1.39507 Hg2+ 1.21152 Tl .65515 TL1+ 1.4711 Tl3+ 1.1008 Pb .6902 Pb2+ 1.3366 Pb4+ 1.00566 Bi .704 Bi3+ 1.2356 Bi5+ .91654 Po .700999 At .685870 Rn .6631 Fr .646453 Ra .616341 Ra2+ .604909 Ac .589092 Ac3+ .579689 Th .563359 Th4+ .555054 Pa .547751 U .5293 U3+ .520480 U4+ .516598 U6+ .507079 Np .511929 Np3+ .502322 Np4+ .498626 Np6+ .489810 Pu .499384 Pu3+ .484938 Pu4+ .481422 Pu6+ .473204 Am .483629 Cm .465154 Bk .451018 Cf .437533 save_ save_Cromer_Mann_a2 loop_ _enumeration_default.index _enumeration_default.value H .322912 D .322912 H1- .565616 He 0.6309 Li .7508 Li1+ .7888 Be 1.1278 Be2+ .8849 B 1.3326 C 1.02 N 3.1322 O 2.2868 O1- 1.63969 F 2.6412 F1- 3.51057 Ne 3.1125 Na 3.1736 Na1+ 3.9362 Mg 2.1735 Mg2+ 3.8378 Al 1.9002 Al3+ 3.3876 Si 3.0353 Si4+ 3.20345 P 4.1791 S 5.2034 Cl 7.1964 Cl1- 7.2084 Ar 6.6623 K 7.4398 K1+ 7.4917 Ca 7.3873 Ca2+ 7.9518 Sc 7.3679 Sc3+ 8.0273 Ti 7.3558 Ti2+ 7.62174 Ti3+ 8.73816 Ti4+ 8.23473 V 7.3511 V2+ 7.3541 V3+ 7.7419 V5+ 8.14208 Cr 7.3537 Cr2+ 7.7509 Cr3+ 7.81136 Mn 7.3573 Mn2+ 7.362 Mn3+ 7.87194 Mn4+ 7.97057 Fe 7.3573 Fe2+ 7.374 Fe3+ 7.3863 Co 7.3409 Co2+ 7.3883 Co3+ 7.88173 Ni 7.292 Ni2+ 7.4005 Ni3+ 7.75868 Cu 7.1676 Cu1+ 7.3573 Cu2+ 7.11181 Zn 7.0318 Zn2+ 7.3862 Ga 6.7006 Ga3+ 6.69883 Ge 6.3747 Ge4+ 6.70003 As 6.0701 Se 5.8196 Br 5.2358 Br1- 6.3338 Kr 6.7286 Rb 9.6435 Rb1+ 7.6598 Sr 9.8184 Sr2+ 8.1373 Y 10.2946 Y3+ 9.1531 Zr 10.948 Zr4+ 10.0562 Nb 12.0144 Nb3+ 18.0653 Nb5+ 13.3417 Mo 17.2356 Mo3+ 18.2017 Mo5+ 18.0992 Mo6+ 11.175 Tc 11.0948 Ru 12.9182 Ru3+ 13.2885 Ru4+ 13.1787 Rh 14.3501 Rh3+ 14.1259 Rh4+ 13.9806 Pd 15.5017 Pd2+ 15.2096 Pd4+ 14.79 Ag 16.6885 Ag1+ 15.9719 Ag2+ 16.2456 Cd 17.6444 Cd2+ 17.2535 In 18.5596 In3+ 18.1108 Sn 19.1005 Sn2+ 19.0548 Sn4+ 19.7131 Sb 19.0455 Sb3+ 18.933 Sb5+ 19.0302 Te 19.0138 I 18.9949 I1- 18.997 Xe 19.0298 Cs 19.1062 Cs1+ 19.1278 Ba 19.297 Ba2+ 19.1136 La 19.599 La3+ 19.3763 Ce 19.7695 Ce3+ 19.559 Ce4+ 19.8186 Pr 19.6697 Pr3+ 19.7491 Pr4+ 20.0539 Nd 19.6847 Nd3+ 19.9339 Pm 19.6095 Sm 19.4258 Sm3+ 20.2599 Eu 19.0886 Eu2+ 19.9504 Eu3+ 20.3745 Gd 19.0798 Gd3+ 20.4208 Tb 18.2185 Tb3+ 20.3271 Dy 17.6383 Dy3+ 20.2861 Ho 17.294 Ho3+ 20.0994 Er 16.4285 Er3+ 19.7748 Tm 15.8851 Tm3+ 19.332 Yb 15.4345 Yb2+ 17.6817 Yb3+ 18.7614 Lu 15.2208 Lu3+ 18.121 Hf 15.1726 Hf4+ 18.4601 Ta 15.2293 Ta5+ 18.8407 W 15.43 W6+ 19.3763 Re 15.7189 Os 16.155 Os4+ 15.2637 Ir 16.7296 Ir3+ 15.862 Ir4+ 15.5512 Pt 17.7639 Pt2+ 16.7224 Pt4+ 15.9829 Au 18.5913 Au1+ 17.8204 Au3+ 16.9029 Hg 19.0417 Hg1+ 18.4973 Hg2+ 18.06 Tl 19.1584 TL1+ 20.4723 Tl3+ 18.3841 Pb 13.0637 Pb2+ 19.5682 Pb4+ 18.8003 Bi 12.951 Bi3+ 19.5026 Bi5+ 25.0946 Po 15.4733 At 19.0211 Rn 21.2816 Fr 23.0547 Ra 22.9064 Ra2+ 21.6700 Ac 23.1032 Ac3+ 22.1112 Th 23.4219 Th4+ 22.4418 Pa 23.2948 U 23.4128 U3+ 22.5259 U4+ 22.5326 U6+ 22.7584 Np 23.5964 Np3+ 22.6130 Np4+ 22.5787 Np6+ 22.7286 Pu 23.8083 Pu3+ 22.7169 Pu4+ 22.6460 Pu6+ 22.7181 Am 24.0992 Cm 24.4096 Bk 24.7736 Cf 25.1995 save_ save_Cromer_Mann_b2 loop_ _enumeration_default.index _enumeration_default.value H 26.1257 D 26.1257 H1- 15.187 He 3.3568 Li 1.0524 Li1+ 1.9557 Be 1.8623 Be2+ .8313 B 1.021 C 10.2075 N 9.8933 O 5.7011 O1- 4.17236 F 4.2944 F1- 14.7353 Ne 3.4262 Na 8.8422 Na1+ 6.1153 Mg 79.2611 Mg2+ 4.7542 Al .7426 Al3+ 4.14553 Si 32.3337 Si4+ 3.43757 P 27.157 S 22.2151 Cl 1.1662 Cl1- 1.1717 Ar 14.8407 K .7748 K1+ .7674 Ca .6599 Ca2+ .6089 Sc .5729 Sc3+ 7.9629 Ti .5 Ti2+ .457585 Ti3+ 7.04716 Ti4+ 6.67018 V .4385 V2+ .4409 V3+ .383349 V5+ 5.40135 Cr .392 Cr2+ .344261 Cr3+ .334393 Mn .3432 Mn2+ .3435 Mn3+ .294393 Mn4+ .283303 Fe .3072 Fe2+ .3053 Fe3+ .3005 Co .2784 Co2+ .2726 Co3+ .238668 Ni .2565 Ni2+ .2449 Ni3+ .22314 Cu .247 Cu1+ .2274 Cu2+ .244078 Zn .2333 Zn2+ .2031 Ga .2412 Ga3+ .22789 Ge .2516 Ge4+ .205855 As .2647 Se .2726 Br 16.5796 Br1- 19.3345 Kr 16.5623 Rb 17.3151 Rb1+ 14.7957 Sr 14.0988 Sr2+ 12.6963 Y 12.8006 Y3+ 11.2145 Zr 11.916 Zr4+ 10.1483 Nb 11.766 Nb3+ 1.13305 Nb5+ .028781 Mo 1.0958 Mo3+ 1.03031 Mo5+ 1.02238 Mo6+ 8.48061 Tc 8.14487 Ru 8.43467 Ru3+ 8.37164 Ru4+ 8.12534 Rh 8.21758 Rh3+ 7.84438 Rh4+ 7.62436 Pd 7.98929 Pd2+ 7.55573 Pd4+ 7.14833 Ag 7.4726 Ag1+ 7.19123 Ag2+ 7.18544 Cd 6.9089 Cd2+ 6.80639 In 6.3776 In3+ 6.3247 Sn .5031 Sn2+ 5.8378 Sn4+ .4655 Sb .4607 Sb3+ 5.22126 Sb5+ .467973 Te .420885 I .3814 I1- .3815 Xe 0.344 Cs .3107 Cs1+ .3086 Ba .2756 Ba2+ .28331 La .244475 La3+ .250698 Ce .226836 Ce3+ .23154 Ce4+ .21885 Pr .222087 Pr3+ .214299 Pr4+ .202481 Nd .210628 Nd3+ .199237 Pm 0.202088 Sm .196451 Sm3+ .174081 Eu .1942 Eu2+ .17353 Eu3+ .16394 Gd .181951 Gd3+ .155525 Tb .196143 Tb3+ .149525 Dy .202172 Dy3+ .143384 Ho .19794 Ho3+ .139358 Er .223545 Er3+ .13729 Tm .238849 Tm3+ .136974 Yb .25711 Yb2+ .15997 Yb3+ .13879 Lu 9.98519 Lu3+ .142292 Hf 9.5999 Hf4+ .128903 Ta 9.37046 Ta5+ .116741 W 9.2259 W6+ .104621 Re 9.09227 Os 8.97948 Os4+ 6.84706 Ir 8.86553 Ir3+ 7.10909 Ir4+ 6.71983 Pt 8.81174 Pt2+ 7.38979 Pt4+ 6.60834 Au 8.6216 Au1+ 7.7395 Au3+ 6.82872 Hg 8.4484 Hg1+ 7.65105 Hg2+ 7.05639 Tl 8.70751 TL1+ .517394 Tl3+ 6.53852 Pb 2.3576 Pb2+ .488383 Pb4+ 6.10926 Bi 2.9238 Bi3+ 6.24149 Bi5+ .039042 Po 3.55078 At 3.97458 Rn 4.0691 Fr 4.17619 Ra 3.87135 Ra2+ 3.57670 Ac 3.65155 Ac3+ 3.41437 Th 3.46204 Th4+ 3.24498 Pa 3.41519 U 3.3253 U3+ 3.12293 U4+ 3.05053 U6+ 2.89030 Np 3.25396 Np3+ 3.03807 Np4+ 2.96627 Np6+ 2.81099 Pu 3.26371 Pu3+ 2.96118 Pu4+ 2.89020 Pu6+ 2.73848 Am 3.20647 Cm 3.08997 Bk 3.04619 Cf 3.00775 save_ save_Cromer_Mann_a3 loop_ _enumeration_default.index _enumeration_default.value H .140191 D .140191 H1- .415815 He 0.3112 Li .6175 Li1+ .3414 Be .5391 Be2+ .7993 B 1.0979 C 1.5886 N 2.0125 O 1.5463 O1- 1.52673 F 1.517 F1- 1.26064 Ne 1.4546 Na 1.2674 Na1+ 1.3998 Mg 1.2269 Mg2+ 1.3284 Al 1.5936 Al3+ 1.20296 Si 1.9891 Si4+ 1.19453 P 1.78 S 1.4379 Cl 6.2556 Cl1- 6.5337 Ar 0.6539 K 1.0519 K1+ 6.359 Ca 1.5899 Ca2+ 8.4372 Sc 1.6409 Sc3+ 1.65943 Ti 1.6991 Ti2+ 2.2793 Ti3+ 5.25691 Ti4+ 2.01341 V 2.0703 V2+ 2.2884 V3+ 2.15343 V5+ 2.03081 Cr 3.324 Cr2+ 3.58274 Cr3+ 2.87603 Mn 3.0193 Mn2+ 3.5268 Mn3+ 3.56531 Mn4+ 2.76067 Fe 3.5222 Fe2+ 4.1346 Fe3+ 3.3948 Co 4.0034 Co2+ 4.7393 Co3+ 4.76795 Ni 4.4438 Ni2+ 5.3442 Ni3+ 5.22746 Cu 5.6158 Cu1+ 6.2455 Cu2+ 5.78135 Zn 5.1652 Zn2+ 6.4668 Ga 4.3591 Ga3+ 6.06692 Ge 3.7068 Ge4+ 6.06791 As 3.4313 Se 3.9731 Br 5.6377 Br1- 5.5754 Kr 5.5493 Rb 5.1399 Rb1+ 5.8981 Sr 5.422 Sr2+ 2.5654 Y 5.72629 Y3+ 1.76795 Zr 5.41732 Zr4+ 1.01118 Nb 4.04183 Nb3+ 11.0177 Nb5+ 10.799 Mo 12.8876 Mo3+ 11.7423 Mo5+ 11.4632 Mo6+ 6.57891 Tc 4.64901 Ru 4.86337 Ru3+ 9.32602 Ru4+ 4.71304 Rh 4.73425 Rh3+ 3.32515 Rh4+ 2.53464 Pd 5.29537 Pd2+ 4.32234 Pd4+ 2.89289 Ag 4.8045 Ag1+ 5.27475 Ag2+ 4.3709 Cd 4.461 Cd2+ 4.47128 In 4.2948 In3+ 3.78897 Sn 4.4585 Sn2+ 4.5648 Sn4+ 3.4182 Sb 5.0371 Sb3+ 5.10789 Sb5+ 2.41253 Te 6.14487 I 7.5138 I1- 7.8069 Xe 8.9767 Cs 10.662 Cs1+ 10.2821 Ba 10.888 Ba2+ 10.9054 La 11.3727 La3+ 11.6323 Ce 11.8513 Ce3+ 11.9369 Ce4+ 12.1233 Pr 12.3856 Pr3+ 12.1329 Pr4+ 12.4668 Nd 12.774 Nd3+ 12.12 Pm 13.1235 Sm 13.4396 Sm3+ 11.9202 Eu 13.7603 Eu2+ 11.8034 Eu3+ 11.8509 Gd 13.8518 Gd3+ 11.8708 Tb 14.3167 Tb3+ 12.2471 Dy 14.5596 Dy3+ 11.9812 Ho 14.5583 Ho3+ 11.9788 Er 14.9779 Er3+ 12.1506 Tm 15.1542 Tm3+ 12.3339 Yb 15.3087 Yb2+ 13.3335 Yb3+ 12.6072 Lu 15.1 Lu3+ 12.8429 Hf 14.7586 Hf4+ 12.7285 Ta 14.5135 Ta5+ 12.8268 W 14.4327 W6+ 13.0544 Re 14.5564 Os 14.9305 Os4+ 14.7458 Ir 15.6115 Ir3+ 13.6145 Ir4+ 14.2326 Pt 15.7131 Pt2+ 13.2153 Pt4+ 13.7348 Au 25.5582 Au1+ 14.3359 Au3+ 12.7801 Hg 21.6575 Hg1+ 16.8883 Hg2+ 12.8374 Tl 15.538 TL1+ 18.7478 Tl3+ 11.9328 Pb 18.442 Pb2+ 19.1406 Pb4+ 12.0175 Bi 16.5877 Bi3+ 19.1053 Bi5+ 19.2497 Po 13.1138 At 9.49887 Rn 8.0037 Fr 12.1439 Ra 12.4739 Ra2+ 7.91342 Ac 12.5977 Ac3+ 8.19216 Th 12.7473 Th4+ 9.78554 Pa 14.1891 U 14.9491 U3+ 12.2165 U4+ 12.0291 U6+ 14.0099 Np 15.6402 Np3+ 12.9898 Np4+ 12.7766 Np6+ 14.3884 Pu 16.7707 Pu3+ 13.5807 Pu4+ 13.3595 Pu6+ 14.7635 Am 17.3415 Cm 17.3990 Bk 17.8919 Cf 18.3317 save_ save_Cromer_Mann_b3 loop_ _enumeration_default.index _enumeration_default.value H 3.14236 D 3.14236 H1- 186.576 He 22.9276 Li 85.3905 Li1+ .6316 Be 103.483 Be2+ 2.2758 B 60.3498 C .5687 N 28.9975 O .3239 O1- 47.0179 F .2615 F1- .442258 Ne 0.2306 Na .3136 Na1+ .2001 Mg .3808 Mg2+ .185 Al 31.5472 Al3+ .228753 Si .6785 Si4+ .2149 P .526 S .2536 Cl 18.5194 Cl1- 19.5424 Ar 43.8983 K 213.187 K1+ -.002 Ca 85.7484 Ca2+ 10.3116 Sc 136.108 Sc3+ -.28604 Ti 35.6338 Ti2+ 19.5361 Ti3+ -.15762 Ti4+ -.29263 V 26.8938 V2+ 20.3004 V3+ 15.1908 V5+ 9.97278 Cr 20.2626 Cr2+ 13.3075 Cr3+ 12.8288 Mn 17.8674 Mn2+ 14.343 Mn3+ 10.8171 Mn4+ 10.4852 Fe 15.3535 Fe2+ 12.0546 Fe3+ 11.6729 Co 13.5359 Co2+ 10.2443 Co3+ 8.35583 Ni 12.1763 Ni2+ 8.873 Ni3+ 7.64468 Cu 11.3966 Cu1+ 8.6625 Cu2+ 7.9876 Zn 10.3163 Zn2+ 7.0826 Ga 10.7805 Ga3+ 6.36441 Ge 11.4468 Ge4+ 5.47913 As 12.9479 Se 15.2372 Br .2609 Br1- .2871 Kr 0.2261 Rb .2748 Rb1+ .1603 Sr .1664 Sr2+ 24.5651 Y .125599 Y3+ 22.6599 Zr .117622 Zr4+ 21.6054 Nb .204785 Nb3+ 10.1621 Nb5+ 9.28206 Mo 11.004 Mo3+ 9.53659 Mo5+ 8.78809 Mo6+ .058881 Tc 21.5707 Ru 24.7997 Ru3+ .017662 Ru4+ .036495 Rh 25.8749 Rh3+ 21.2487 Rh4+ 19.3317 Pd 25.2052 Pd2+ 22.5057 Pd4+ 17.9144 Ag 24.6605 Ag1+ 21.7326 Ag2+ 21.4072 Cd 24.7008 Cd2+ 20.2521 In 25.8499 In3+ 17.3595 Sn 26.8909 Sn2+ 23.3752 Sn4+ 14.0049 Sb 27.9074 Sb3+ 19.5902 Sb5+ 14.1259 Te 28.5284 I 27.766 I1- 29.5259 Xe 26.4659 Cs 24.3879 Cs1+ 23.7128 Ba 20.2073 Ba2+ 20.0558 La 18.7726 La3+ 17.8211 Ce 17.6083 Ce3+ 16.5408 Ce4+ 15.7992 Pr 16.7669 Pr3+ 15.323 Pr4+ 14.8137 Nd 15.885 Nd3+ 14.1783 Pm 15.1009 Sm 14.3996 Sm3+ 12.1571 Eu 13.7546 Eu2+ 11.6096 Eu3+ 11.311 Gd 12.9331 Gd3+ 10.5782 Tb 12.6648 Tb3+ 10.0499 Dy 12.1899 Dy3+ 9.34972 Ho 11.4407 Ho3+ 8.80018 Er 11.3604 Er3+ 8.36225 Tm 10.9975 Tm3+ 7.96778 Yb 10.6647 Yb2+ 8.18304 Yb3+ 7.64412 Lu .261033 Lu3+ 7.33727 Hf .275116 Hf4+ 6.76232 Ta .295977 Ta5+ 6.31524 W .321703 W6+ 5.93667 Re .3505 Os .382661 Os4+ .165191 Ir .417916 Ir3+ .204633 Ir4+ .167252 Pt .424593 Pt2+ .263297 Pt4+ .16864 Au 1.4826 Au1+ .356752 Au3+ .212867 Hg 1.5729 Hg1+ .443378 Hg2+ .284738 Tl 1.96347 TL1+ 7.43463 Tl3+ .219074 Pb 8.618 Pb2+ 6.7727 Pb4+ .147041 Bi 8.7937 Bi3+ .469999 Bi5+ 5.71414 Po 9.55642 At 11.3824 Rn 14.0422 Fr 23.1052 Ra 19.9887 Ra2+ 12.6010 Ac 18.5990 Ac3+ 12.9187 Th 17.8309 Th4+ 13.4661 Pa 16.9235 U 16.0927 U3+ 12.7148 U4+ 12.5723 U6+ 13.1767 Np 15.3622 Np3+ 12.1449 Np4+ 11.9484 Np6+ 12.3300 Pu 14.9455 Pu3+ 11.5331 Pu4+ 11.3160 Pu6+ 11.5530 Am 14.3136 Cm 13.4346 Bk 12.8946 Cf 12.4044 save_ save_Cromer_Mann_a4 loop_ _enumeration_default.index _enumeration_default.value H .04081 D .04081 H1- .116973 He 0.178 Li .4653 Li1+ .1563 Be .7029 Be2+ .1647 B .7068 C .865 N 1.1663 O .867 O1- -20.307 F 1.0243 F1- .940706 Ne 1.1251 Na 1.1128 Na1+ 1.0032 Mg 2.3073 Mg2+ .8497 Al 1.9646 Al3+ .528137 Si 1.541 Si4+ .41653 P 1.4908 S 1.5863 Cl 1.6455 Cl1- 2.3386 Ar 1.6442 K .8659 K1+ 1.1915 Ca 1.0211 Ca2+ .8537 Sc 1.468 Sc3+ 1.57936 Ti 1.9021 Ti2+ .087899 Ti3+ 1.92134 Ti4+ 1.5208 V 2.0571 V2+ .0223 V3+ .016865 V5+ -9.576 Cr 1.4922 Cr2+ .509107 Cr3+ .113575 Mn 2.2441 Mn2+ .2184 Mn3+ .323613 Mn4+ .054447 Fe 2.3045 Fe2+ .4399 Fe3+ .0724 Co 2.3488 Co2+ .7108 Co3+ .725591 Ni 2.38 Ni2+ .9773 Ni3+ .847114 Cu 1.6735 Cu1+ 1.5578 Cu+ 1.14523 Zn 2.41 Zn2+ 1.394 Ga 2.9623 Ga3+ 1.0066 Ge 3.683 Ge4+ .859041 As 4.2779 Se 4.3543 Br 3.9851 Br1- 3.7272 Kr 3.5375 Rb 1.5292 Rb1+ 2.7817 Sr 2.6694 Sr2+ -34.193 Y 3.26588 Y3+ -33.108 Zr 3.65721 Zr4+ -2.6479 Nb 3.53346 Nb3+ 1.94715 Nb5+ .337905 Mo 3.7429 Mo3+ 2.30951 Mo5+ .740625 Mo6+ 0. Tc 2.71263 Ru 1.56756 Ru3+ 3.00964 Ru4+ 2.18535 Rh 1.28918 Rh3+ -6.1989 Rh4+ -5.6526 Pd .605844 Pd2+ 0. Pd4+ -7.9492 Ag 1.0463 Ag1+ .357534 Ag2+ 0. Cd 1.6029 Cd2+ 0. In 2.0396 In3+ 0. Sn 2.4663 Sn2+ .487 Sn4+ .0193 Sb 2.6827 Sb3+ .288753 Sb5+ 0. Te 2.5239 I 2.2735 I1- 2.8868 Xe 1.99 Cs 1.4953 Cs1+ .9615 Ba 2.6959 Ba2+ .773634 La 3.28719 La3+ .336048 Ce 3.33049 Ce3+ .612376 Ce4+ .144583 Pr 2.82428 Pr3+ .97518 Pr4+ .296689 Nd 2.85137 Nd3+ 1.51031 Pm 2.87516 Sm 2.89604 Sm3+ 2.71488 Eu 2.9227 Eu2+ 3.87243 Eu3+ 3.26503 Gd 3.54545 Gd3+ 3.7149 Tb 2.95354 Tb3+ 3.773 Dy 2.96577 Dy3+ 4.50073 Ho 3.63837 Ho3+ 4.93676 Er 2.98233 Er3+ 5.17379 Tm 2.98706 Tm3+ 5.38348 Yb 2.98963 Yb2+ 5.14657 Yb3+ 5.47647 Lu 3.71601 Lu3+ 5.59415 Hf 4.30013 Hf4+ 5.59927 Ta 4.76492 Ta5+ 5.38695 W 5.11982 W6+ 5.06412 Re 5.44174 Os 5.67589 Os4+ 5.06795 Ir 5.83377 Ir3+ 5.82008 Ir4+ 5.53672 Pt 5.7837 Pt2+ 6.35234 Pt4+ 5.92034 Au 5.86 Au1+ 6.58077 Au3+ 6.52354 Hg 5.9676 Hg1+ 6.48216 Hg2+ 6.89912 Tl 5.52593 TL1+ 6.82847 Tl3+ 7.00574 Pb 5.9696 Pb2+ 7.01107 Pb4+ 6.96886 Bi 6.4692 Bi3+ 7.10295 Bi5+ 6.91555 Po 7.02588 At 7.42518 Rn 7.4433 Fr 2.11253 Ra 3.21097 Ra2+ 7.65078 Ac 4.08655 Ac3+ 7.05545 Th 4.80703 Th4+ 5.29444 Pa 4.17287 U 4.1880 U3+ 5.37073 U4+ 4.79840 U6+ 1.21457 Np 4.18550 Np3+ 5.43227 Np4+ 4.92159 Np6+ 1.75669 Pu 3.47947 Pu3+ 5.66016 Pu4+ 5.18831 Pu6+ 2.28678 Am 3.49331 Cm 4.21665 Bk 4.23284 Cf 4.24391 save_ save_Cromer_Mann_b4 loop_ _enumeration_default.index _enumeration_default.value H 57.7997 D 57.7997 H1- 3.56709 He 0.9821 Li 168.261 Li1+ 10.0953 Be .542 Be2+ 5.1146 B .1403 C 51.6512 N .5826 O 32.9089 O1- -.01404 F 26.1476 F1- 47.3437 Ne 21.7814 Na 129.424 Na1+ 14.039 Mg 7.1937 Mg2+ 10.1411 Al 85.0886 Al3+ 8.28524 Si 81.6937 Si4+ 6.65365 P 68.1645 S 56.172 Cl 47.7784 Cl1- 60.4486 Ar 33.3929 K 41.6841 K1+ 31.9128 Ca 178.437 Ca2+ 25.9905 Sc 51.3531 Sc3+ 16.0662 Ti 116.105 Ti2+ 61.6558 Ti3+ 15.9768 Ti4+ 12.9464 V 102.478 V2+ 115.122 V3+ 63.969 V5+ .940464 Cr 98.7399 Cr2+ 32.4224 Cr3+ 32.8761 Mn 83.7543 Mn2+ 41.3235 Mn3+ 24.1281 Mn4+ 27.573 Fe 76.8805 Fe2+ 31.2809 Fe3+ 38.5566 Co 71.1692 Co2+ 25.6466 Co3+ 18.3491 Ni 66.3421 Ni2+ 22.1626 Ni3+ 16.9673 Cu 64.8126 Cu1+ 25.8487 Cu2+ 19.897 Zn 58.7097 Zn2+ 18.0995 Ga 61.4135 Ga3+ 14.4122 Ge 54.7625 Ge4+ 11.603 As 47.7972 Se 43.8163 Br 41.4328 Br1- 58.1535 Kr 39.3972 Rb 164.934 Rb1+ 31.2087 Sr 132.376 Sr2+ -.0138 Y 104.354 Y3+ -.01319 Zr 87.6627 Zr4+ -.10276 Nb 69.7957 Nb3+ 28.3389 Nb5+ 25.7228 Mo 61.6584 Mo3+ 26.6307 Mo5+ 23.3452 Mo6+ 0. Tc 86.8472 Ru 94.2928 Ru3+ 22.887 Ru4+ 20.8504 Rh 98.6062 Rh3+ -.01036 Rh4+ -.0102 Pd 76.8986 Pd2+ 0. Pd4+ .005127 Ag 99.8156 Ag1+ 66.1147 Ag2+ 0. Cd 87.4825 Cd2+ 0. In 92.8029 In3+ 0. Sn 83.9571 Sn2+ 62.2061 Sn4+ -.7583 Sb 75.2825 Sb3+ 55.5113 Sb5+ 0. Te 70.8403 I 66.8776 I1- 84.9304 Xe 64.2658 Cs 213.904 Cs1+ 59.4565 Ba 167.202 Ba2+ 51.746 La 133.124 La3+ 54.9453 Ce 127.113 Ce3+ 43.1692 Ce4+ 62.2355 Pr 143.644 Pr3+ 36.4065 Pr4+ 45.4643 Nd 137.903 Nd3+ 30.8717 Pm 132.721 Sm 128.007 Sm3+ 24.8242 Eu 123.174 Eu2+ 26.5156 Eu3+ 22.9966 Gd 101.398 Gd3+ 21.7029 Tb 115.362 Tb3+ 21.2773 Dy 111.874 Dy3+ 19.581 Ho 92.6566 Ho3+ 18.5908 Er 105.703 Er3+ 17.8974 Tm 102.961 Tm3+ 17.2922 Yb 100.417 Yb2+ 20.39 Yb3+ 16.8153 Lu 84.3298 Lu3+ 16.3535 Hf 72.029 Hf4+ 14.0366 Ta 63.3644 Ta5+ 12.4244 W 57.056 W6+ 11.1972 Re 52.0861 Os 48.1647 Os4+ 18.003 Ir 45.0011 Ir3+ 20.3254 Ir4+ 17.4911 Pt 38.6103 Pt2+ 22.9426 Pt4+ 16.9392 Au 36.3956 Au1+ 26.4043 Au3+ 18.659 Hg 38.3246 Hg1+ 28.2262 Hg2+ 20.7482 Tl 45.8149 TL1+ 28.8482 Tl3+ 17.2114 Pb 47.2579 Pb2+ 23.8132 Pb4+ 14.714 Bi 48.0093 Bi3+ 20.3185 Bi5+ 12.8285 Po 47.0045 At 45.4715 Rn 44.2473 Fr 150.645 Ra 142.325 Ra2+ 29.8436 Ac 117.020 Ac3+ 25.9443 Th 99.1722 Th4+ 23.9533 Pa 105.251 U 100.613 U3+ 26.3394 U4+ 23.4582 U6+ 25.2017 Np 97.4908 Np3+ 25.4928 Np4+ 22.7502 Np6+ 22.6581 Pu 105.980 Pu3+ 24.3992 Pu4+ 21.8301 Pu6+ 20.9303 Am 102.273 Cm 88.4834 Bk 86.0030 Cf 83.7881 save_ save_Cromer_Mann_c loop_ _enumeration_default.index _enumeration_default.value H .003038 D .003038 H1- .002389 He 0.0064 Li .0377 Li1+ .0167 Be .0385 Be2+ -6.1092 B -.1932 C .2156 N -11.529 O .2508 O1- 21.9412 F .2776 F1- .653396 Ne 0.3515 Na .6760 Na1+ .4040 Mg .8584 Mg2+ .4853 Al 1.1151 Al3+ .706786 Si 1.1407 Si4+ .746297 P 1.1149 S .8669 Cl -9.5574 Cl1- -16.378 Ar 1.44450 K 1.4228 K1+ -4.9978 Ca 1.3751 Ca2+ -14.875 Sc 1.3329 Sc3+ -6.6667 Ti 1.2807 Ti2+ .897155 Ti3+ -14.652 Ti4+ -13.280 V 1.2199 V2+ 1.2298 V3+ .656565 V5+ 1.71430 Cr 1.1832 Cr2+ .616898 Cr3+ .518275 Mn 1.0896 Mn2+ 1.0874 Mn3+ .393974 Mn4+ .251877 Fe 1.0369 Fe2+ 1.0097 Fe3+ .9707 Co 1.0118 Co2+ .9324 Co3+ .286667 Ni 1.0341 Ni2+ .8614 Ni3+ .386044 Cu 1.1910 Cu1+ .8900 Cu2+ 1.14431 Zn 1.3041 Zn2+ .7807 Ga 1.7189 Ga3+ 1.53545 Ge 2.1313 Ge4+ 1.45572 As 2.5310 Se 2.8409 Br 2.9557 Br1- 3.1776 Kr 2.825 Rb 3.4873 Rb1+ 2.0782 Sr 2.5064 Sr2+ 41.4025 Y 1.91213 Y3+ 40.2602 Zr 2.06929 Zr4+ 9.41454 Nb 3.75591 Nb3+ -12.912 Nb5+ -6.3934 Mo 4.3875 Mo3+ -14.421 Mo5+ -14.316 Mo6+ .344941 Tc 5.40428 Ru 5.37874 Ru3+ -3.1892 Ru4+ 1.42357 Rh 5.32800 Rh3+ 11.8678 Rh4+ 11.2835 Pd 5.26593 Pd2+ 5.29160 Pd4+ 13.0174 Ag 5.1790 Ag1+ 5.21572 Ag2+ 5.21404 Cd 5.0694 Cd2+ 5.11937 In 4.9391 In3+ 4.99635 Sn 4.7821 Sn2+ 4.7861 Sn4+ 3.9182 Sb 4.5909 Sb3+ 4.69626 Sb5+ 4.69263 Te 4.35200 I 4.0712 I1- 4.0714 Xe 3.7118 Cs 3.3352 Cs1+ 3.2791 Ba 2.7731 Ba2+ 3.02902 La 2.14678 La3+ 2.40860 Ce 1.86264 Ce3+ 2.09013 Ce4+ 1.59180 Pr 2.05830 Pr3+ 1.77132 Pr4+ 1.24285 Nd 1.98486 Nd3+ 1.47588 Pm 2.02876 Sm 2.20963 Sm3+ .954586 Eu 2.5745 Eu2+ 1.36389 Eu3+ .759344 Gd 2.41960 Gd3+ .645089 Tb 3.58324 Tb3+ .691967 Dy 4.29728 Dy3+ .689690 Ho 4.56796 Ho3+ .852795 Er 5.92046 Er3+ 1.17613 Tm 6.75621 Tm3+ 1.63929 Yb 7.56672 Yb2+ 3.70983 Yb3+ 2.26001 Lu 7.97628 Lu3+ 2.97573 Hf 8.58154 Hf4+ 2.39699 Ta 9.24354 Ta5+ 1.78555 W 9.88750 W6+ 1.01074 Re 10.4720 Os 11.0005 Os4+ 6.49804 Ir 11.4722 Ir3+ 8.27903 Ir4+ 6.96824 Pt 11.6883 Pt2+ 9.85329 Pt4+ 7.39534 Au 12.0658 Au1+ 11.2299 Au3+ 9.09680 Hg 12.6089 Hg1+ 12.0205 Hg2+ 10.6268 Tl 13.1746 TL1+ 12.5258 Tl3+ 9.80270 Pb 13.4118 Pb2+ 12.4734 Pb4+ 8.08428 Bi 13.5782 Bi3+ 12.4711 Bi5+ -6.7994 Po 13.6770 At 13.7108 Rn 13.6905 Fr 13.7247 Ra 13.6211 Ra2+ 13.5431 Ac 13.5266 Ac3+ 13.4637 Th 13.4314 Th4+ 13.3760 Pa 13.4287 U 13.3966 U3+ 13.3092 U4+ 13.2671 U6+ 13.1665 Np 13.3573 Np3+ 13.2544 Np4+ 13.2116 Np6+ 13.1130 Pu 13.3812 Pu3+ 13.1991 Pu4+ 13.1555 Pu6+ 13.0582 Am 13.3592 Cm 13.2887 Bk 13.2754 Cf 13.2674 save_ save_hi_ang_Fox_c0 loop_ _enumeration_default.index _enumeration_default.value H -4.8 D -4.8 H1- -4.8 He 0.52543 Li 0.89463 Li1+ 0.89463 Be 1.2584 Be2+ 1.2584 B 1.6672 C 1.70560 N 1.54940 O 1.30530 O1- 1.30530 F 1.16710 F1- 1.16710 Ne 1.09310 Na 0.84558 Na1+ 0.84558 Mg 0.71877 Mg2+ 0.71877 Al 0.67975 Al3+ 0.67975 Si 0.70683 Si4+ 0.70683 P 0.85532 S 1.10400 Cl 1.42320 Cl1- 1.42320 Ar 1.82020 K 2.26550 K1+ 2.26550 Ca 2.71740 Ca2+ 2.71740 Sc 3.11730 Sc3+ 3.11730 Ti 3.45360 Ti2+ 3.45360 Ti3+ 3.45360 Ti4+ 3.45360 V 3.71270 V2+ 3.71270 V3+ 3.71270 V5+ 3.71270 Cr 3.87870 Cr2+ 3.87870 Cr3+ 3.87870 Mn 3.98550 Mn2+ 3.98550 Mn3+ 3.98550 Mn4+ 3.98550 Fe 3.99790 Fe2+ 3.99790 Fe3+ 3.99790 Co 3.95900 Co2+ 3.95900 Co3+ 3.95900 Ni 3.86070 Ni2+ 3.86070 Ni3+ 3.86070 Cu 3.72510 Cu1+ 3.72510 Cu2+ 3.72510 Zn 3.55950 Zn2+ 3.55950 Ga 3.37560 Ga3+ 3.37560 Ge 3.17800 Ge4+ 3.17800 As 2.97740 Se 2.78340 Br 2.60610 Br1- 2.60610 Kr 2.44280 Rb 2.30990 Rb1+ 2.30990 Sr 2.21070 Sr2+ 2.21070 Y 2.14220 Y3+ 2.14220 Zr 2.12690 Zr4+ 2.12690 Nb 2.12120 Nb3+ 2.12120 Nb5+ 2.12120 Mo 2.18870 Mo3+ 2.18870 Mo5+ 2.18870 Mo6+ 2.18870 Tc 2.25730 Ru 2.37300 Ru3+ 2.37300 Ru4+ 2.37300 Rh 2.50990 Rh3+ 2.50990 Rh4+ 2.50990 Pd 2.67520 Pd2+ 2.67520 Pd4+ 2.67520 Ag 2.88690 Ag1+ 2.88690 Ag2+ 2.88690 Cd 3.08430 Cd2+ 3.08430 In 3.31400 In3+ 3.31400 Sn 3.49840 Sn2+ 3.49840 Sn4+ 3.49840 Sb 3.70410 Sb3+ 3.70410 Sb5+ 3.70410 Te 3.88240 I 4.08010 I1- 4.08010 Xe 4.24610 Cs 4.38910 Cs1+ 4.38910 Ba 4.51070 Ba2+ 4.51070 La 4.60250 La3+ 4.60250 Ce 4.69060 Ce3+ 4.69060 Ce4+ 4.69060 Pr 4.72150 Pr3+ 4.72150 Pr4+ 4.72150 Nd 4.75090 Nd3+ 4.75090 Pm 4.74070 Sm 4.71700 Sm3+ 4.71700 Eu 4.66940 Eu2+ 4.66940 Eu3+ 4.66940 Gd 4.61010 Gd3+ 4.61010 Tb 4.52550 Tb3+ 4.52550 Dy 4.45230 Dy3+ 4.45230 Ho 4.37660 Ho3+ 4.37660 Er 4.29460 Er3+ 4.29460 Tm 4.21330 Tm3+ 4.21330 Yb 4.13430 Yb2+ 4.13430 Yb3+ 4.13430 Lu 4.04230 Lu3+ 4.04230 Hf 3.95160 Hf4+ 3.95160 Ta 3.85000 Ta5+ 3.85000 W 3.76510 W6+ 3.76510 Re 3.67600 Os 3.60530 Os4+ 3.60530 Ir 3.53130 Ir3+ 3.53130 Ir4+ 3.53130 Pt 3.47070 Pt2+ 3.47070 Pt4+ 3.47070 Au 3.41630 Au1+ 3.41630 Au3+ 3.41630 Hg 3.37350 Hg1+ 3.37350 Hg2+ 3.37350 Tl 3.34590 TL1+ 3.34590 Tl3+ 3.34590 Pb 3.32330 Pb2+ 3.32330 Pb4+ 3.32330 Bi 3.31880 Bi3+ 3.31880 Bi5+ 3.31880 Po 3.32030 At 3.34250 Rn 3.37780 Fr 3.41990 Ra 3.47530 Ra2+ 3.47530 Ac 3.49020 Ac3+ 3.49020 Th 3.61060 Th4+ 3.61060 Pa 3.68630 U 3.76650 U3+ 3.76650 U4+ 3.76650 U6+ 3.76650 Np 3.82870 Np3+ 3.82870 Np4+ 3.82870 Np6+ 3.82870 Pu 3.88970 Pu3+ 3.88970 Pu4+ 3.88970 Pu6+ 3.88970 Am 3.95060 Cm 4.01470 Bk 4.07780 Cf 4.14210 save_ save_hi_ang_Fox_c1 loop_ _enumeration_default.index _enumeration_default.value H -.5 D -.5 H1- -.5 He -3.433 Li -2.4366 Li1+ -2.4366 Be -1.9459 Be2+ -1.9459 B -1.8556 C -1.56760 N -1.20190 O -0.83742 O1- -0.83742 F -0.63203 F1- -0.63203 Ne -0.50221 Na -0.26294 Na1+ -0.26294 Mg -0.13144 Mg2+ -0.13144 Al -0.08756 Al3+ -0.08756 Si -0.09888 Si4+ -0.09888 P -0.21262 S -0.40325 Cl -0.63936 Cl1- -0.63936 Ar -0.92776 K -1.24530 K1+ -1.24530 Ca -1.55670 Ca2+ -1.55670 Sc -1.81380 Sc3+ -1.81380 Ti -2.01150 Ti2+ -2.01150 Ti3+ -2.01150 Ti4+ -2.01150 V -2.13920 V2+ -2.13920 V3+ -2.13920 V5+ -2.13920 Cr -2.19000 Cr2+ -2.19000 Cr3+ -2.19000 Mn -2.18850 Mn2+ -2.18850 Mn3+ -2.18850 Mn4+ -2.18850 Fe -2.11080 Fe2+ -2.11080 Fe3+ -2.11080 Co -1.99650 Co2+ -1.99650 Co3+ -1.99650 Ni -1.88690 Ni2+ -1.88690 Ni3+ -1.88690 Cu -1.65500 Cu1+ -1.65500 Cu2+ -1.65500 Zn -1.45100 Zn2+ -1.45100 Ga -1.23910 Ga3+ -1.23910 Ge -1.02230 Ge4+ -1.02230 As -0.81038 Se -0.61110 Br -0.43308 Br1- -0.43308 Kr -0.27244 Rb -0.14328 Rb1+ -0.14328 Sr -0.04770 Sr2+ -0.04770 Y 0.01935 Y3+ 0.01935 Zr 0.08618 Zr4+ 0.08618 Nb 0.05381 Nb3+ 0.05381 Nb5+ 0.05381 Mo -0.00655 Mo3+ -0.00655 Mo5+ -0.00655 Mo6+ -0.00655 Tc -0.05737 Ru -0.15040 Ru3+ -0.15040 Ru4+ -0.15040 Rh -0.25906 Rh3+ -0.25906 Rh4+ -0.25906 Pd -0.39137 Pd2+ -0.39137 Pd4+ -0.39137 Ag -0.56119 Ag1+ -0.56119 Ag2+ -0.56119 Cd -0.71450 Cd2+ -0.71450 In -0.89697 In3+ -0.89697 Sn -1.02990 Sn2+ -1.02990 Sn4+ -1.02990 Sb -1.18270 Sb3+ -1.18270 Sb5+ -1.18270 Te -1.30980 I -1.45080 I1- -1.45080 Xe -1.56330 Cs -1.65420 Cs1+ -1.65420 Ba -1.72570 Ba2+ -1.72570 La -1.77070 La3+ -1.77070 Ce -1.81790 Ce3+ -1.81790 Ce4+ -1.81790 Pr -1.81390 Pr3+ -1.81390 Pr4+ -1.81390 Nd -1.80800 Nd3+ -1.80800 Pm -1.76600 Sm -1.71410 Sm3+ -1.71410 Eu -1.64140 Eu2+ -1.64140 Eu3+ -1.64140 Gd -1.55750 Gd3+ -1.55750 Tb -1.45520 Tb3+ -1.45520 Dy -1.36440 Dy3+ -1.36440 Ho -1.27460 Ho3+ -1.27460 Er -1.18170 Er3+ -1.18170 Tm -1.09060 Tm3+ -1.09060 Yb -1.00310 Yb2+ -1.00310 Yb3+ -1.00310 Lu -0.90518 Lu3+ -0.90518 Hf -0.80978 Hf4+ -0.80978 Ta -0.70599 Ta5+ -0.70599 W -0.61807 W6+ -0.61807 Re -0.52688 Os -0.45420 Os4+ -0.45420 Ir -0.37856 Ir3+ -0.37856 Ir4+ -0.37856 Pt -0.31534 Pt2+ -0.31534 Pt4+ -0.31534 Au -0.25987 Au1+ -0.25987 Au3+ -0.25987 Hg -0.21428 Hg1+ -0.21428 Hg2+ -0.21428 Tl -0.18322 TL1+ -0.18322 Tl3+ -0.18322 Pb -0.15596 Pb2+ -0.15596 Pb4+ -0.15596 Bi -0.14554 Bi3+ -0.14554 Bi5+ -0.14554 Po -0.13999 At -0.15317 Rn -0.17800 Fr -0.20823 Ra -0.25005 Ra2+ -0.25005 Ac -0.25109 Ac3+ -0.25109 Th -0.35409 Th4+ -0.35409 Pa -0.41329 U -0.47542 U3+ -0.47542 U4+ -0.47542 U6+ -0.47542 Np -0.51955 Np3+ -0.51955 Np4+ -0.51955 Np6+ -0.51955 Pu -0.56296 Pu3+ -0.56296 Pu4+ -0.56296 Pu6+ -0.56296 Am -0.60554 Cm -0.65062 Bk -0.69476 Cf -0.73977 save_ save_hi_ang_Fox_c2 loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He 4.8007 Li 2.325 Li1+ 2.325 Be 1.3046 Be2+ 1.3046 B 1.6044 C 1.18930 N 0.51064 O -0.16738 O1- -0.16738 F -0.40207 F1- -0.40207 Ne -0.53648 Na -0.87884 Na1+ -0.87884 Mg -1.20900 Mg2+ -1.20900 Al -0.95431 Al3+ -0.95431 Si -0.98356 Si4+ -0.98356 P -0.37390 S 0.20094 Cl 0.84722 Cl1- 0.84722 Ar 1.59220 K 2.38330 K1+ 2.38330 Ca 3.13170 Ca2+ 3.13170 Sc 3.71390 Sc3+ 3.71390 Ti 4.13170 Ti2+ 4.13170 Ti3+ 4.13170 Ti4+ 4.13170 V 4.35610 V2+ 4.35610 V3+ 4.35610 V5+ 4.35610 Cr 4.38670 Cr2+ 4.38670 Cr3+ 4.38670 Mn 4.27960 Mn2+ 4.27960 Mn3+ 4.27960 Mn4+ 4.27960 Fe 3.98170 Fe2+ 3.98170 Fe3+ 3.98170 Co 3.60630 Co2+ 3.60630 Co3+ 3.60630 Ni 3.12390 Ni2+ 3.12390 Ni3+ 3.12390 Cu 2.60290 Cu1+ 2.60290 Cu2+ 2.60290 Zn 2.03390 Zn2+ 2.03390 Ga 1.46160 Ga3+ 1.46160 Ge 0.89119 Ge4+ 0.89119 As 0.34861 Se -0.14731 Br -0.57381 Br1- -0.57381 Kr -0.95570 Rb -1.22600 Rb1+ -1.22600 Sr -1.41100 Sr2+ -1.41100 Y -1.52240 Y3+ -1.52240 Zr -1.49190 Zr4+ -1.49190 Nb -1.50070 Nb3+ -1.50070 Nb5+ -1.50070 Mo -1.25340 Mo3+ -1.25340 Mo5+ -1.25340 Mo6+ -1.25340 Tc -1.07450 Ru -0.77694 Ru3+ -0.77694 Ru4+ -0.77694 Rh -0.44719 Rh3+ -0.44719 Rh4+ -0.44719 Pd -0.05894 Pd2+ -0.05894 Pd4+ -0.05894 Ag 0.42189 Ag1+ 0.42189 Ag2+ 0.42189 Cd 0.84482 Cd2+ 0.84482 In 1.35030 In3+ 1.35030 Sn 1.68990 Sn2+ 1.68990 Sn4+ 1.68990 Sb 2.08920 Sb3+ 2.08920 Sb5+ 2.08920 Te 2.41170 I 2.76730 I1- 2.76730 Xe 3.04200 Cs 3.25450 Cs1+ 3.25450 Ba 3.41320 Ba2+ 3.41320 La 3.49970 La3+ 3.49970 Ce 3.60280 Ce3+ 3.60280 Ce4+ 3.60280 Pr 3.56480 Pr3+ 3.56480 Pr4+ 3.56480 Nd 3.51970 Nd3+ 3.51970 Pm 3.37430 Sm 3.20800 Sm3+ 3.20800 Eu 2.98580 Eu2+ 2.98580 Eu3+ 2.98580 Gd 2.73190 Gd3+ 2.73190 Tb 2.43770 Tb3+ 2.43770 Dy 2.17540 Dy3+ 2.17540 Ho 1.92540 Ho3+ 1.92540 Er 1.67060 Er3+ 1.67060 Tm 1.42390 Tm3+ 1.42390 Yb 1.18810 Yb2+ 1.18810 Yb3+ 1.18810 Lu 0.92889 Lu3+ 0.92889 Hf 0.67951 Hf4+ 0.67951 Ta 0.41103 Ta5+ 0.41103 W 0.18568 W6+ 0.18568 Re -0.04706 Os -0.22529 Os4+ -0.22529 Ir -0.41174 Ir3+ -0.41174 Ir4+ -0.41174 Pt -0.56487 Pt2+ -0.56487 Pt4+ -0.56487 Au -0.69030 Au1+ -0.69030 Au3+ -0.69030 Hg -0.79013 Hg1+ -0.79013 Hg2+ -0.79013 Tl -0.84911 TL1+ -0.84911 Tl3+ -0.84911 Pb -0.89878 Pb2+ -0.89878 Pb4+ -0.89878 Bi -0.90198 Bi3+ -0.90198 Bi5+ -0.90198 Po -0.89333 At -0.83350 Rn -0.74320 Fr -0.64000 Ra -0.50660 Ra2+ -0.50660 Ac -0.49651 Ac3+ -0.49651 Th -0.18926 Th4+ -0.18926 Pa -0.01192 U 0.16850 U3+ 0.16850 U4+ 0.16850 U6+ 0.16850 Np 0.29804 Np3+ 0.29804 Np4+ 0.29804 Np6+ 0.29804 Pu 0.42597 Pu3+ 0.42597 Pu4+ 0.42597 Pu6+ 0.42597 Am 0.54967 Cm 0.67922 Bk 0.80547 Cf 0.93342 save_ save_hi_ang_Fox_c3 loop_ _enumeration_default.index _enumeration_default.value H .0 D .0 H1- .0 He -2.5476 Li -.71949 Li1+ -.71949 Be -0.04297 Be2+ -0.04297 B -0.65981 C -0.42715 N 0.02472 O 0.47500 O1- 0.47500 F 0.54352 F1- 0.54352 Ne 0.60957 Na 0.76974 Na1+ 0.76974 Mg 0.82738 Mg2+ 0.82738 Al 0.72294 Al3+ 0.72294 Si 0.55631 Si4+ 0.55631 P 0.20731 S -0.26058 Cl -0.76135 Cl1- -0.76135 Ar -1.32510 K -1.91290 K1+ -1.91290 Ca -2.45670 Ca2+ -2.45670 Sc -2.85330 Sc3+ -2.85330 Ti -3.11710 Ti2+ -3.11710 Ti3+ -3.11710 Ti4+ -3.11710 V -3.22040 V2+ -3.22040 V3+ -3.22040 V5+ -3.22040 Cr -3.17520 Cr2+ -3.17520 Cr3+ -3.17520 Mn -3.02150 Mn2+ -3.02150 Mn3+ -3.02150 Mn4+ -3.02150 Fe -2.71990 Fe2+ -2.71990 Fe3+ -2.71990 Co -2.37050 Co2+ -2.37050 Co3+ -2.37050 Ni -1.94290 Ni2+ -1.94290 Ni3+ -1.94290 Cu -1.49760 Cu1+ -1.49760 Cu2+ -1.49760 Zn -1.02160 Zn2+ -1.02160 Ga -0.55471 Ga3+ -0.55471 Ge -0.09984 Ge4+ -0.09984 As 0.32231 Se 0.69837 Br 1.00950 Br1- 1.00950 Kr 1.27070 Rb 1.45320 Rb1+ 1.45320 Sr 1.55410 Sr2+ 1.55410 Y 1.59630 Y3+ 1.59630 Zr 1.51820 Zr4+ 1.51820 Nb 1.50150 Nb3+ 1.50150 Nb5+ 1.50150 Mo 1.24010 Mo3+ 1.24010 Mo5+ 1.24010 Mo6+ 1.24010 Tc 1.06630 Ru 0.79060 Ru3+ 0.79060 Ru4+ 0.79060 Rh 0.49443 Rh3+ 0.49443 Rh4+ 0.49443 Pd 0.15404 Pd2+ 0.15404 Pd4+ 0.15404 Ag -0.25659 Ag1+ -0.25659 Ag2+ -0.25659 Cd -0.60990 Cd2+ -0.60990 In -1.03910 In3+ -1.03910 Sn -1.29860 Sn2+ -1.29860 Sn4+ -1.29860 Sb -1.61640 Sb3+ -1.61640 Sb5+ -1.61640 Te -1.86420 I -2.13920 I1- -2.13920 Xe -2.34290 Cs -2.49220 Cs1+ -2.49220 Ba -2.59590 Ba2+ -2.59590 La -2.64050 La3+ -2.64050 Ce -2.70670 Ce3+ -2.70670 Ce4+ -2.70670 Pr -2.65180 Pr3+ -2.65180 Pr4+ -2.65180 Nd -2.59010 Nd3+ -2.59010 Pm -2.44210 Sm -2.28170 Sm3+ -2.28170 Eu -2.07460 Eu2+ -2.07460 Eu3+ -2.07460 Gd -1.84040 Gd3+ -1.84040 Tb -1.57950 Tb3+ -1.57950 Dy -1.34550 Dy3+ -1.34550 Ho -1.13090 Ho3+ -1.13090 Er -0.91467 Er3+ -0.91467 Tm -0.70804 Tm3+ -0.70804 Yb -0.51120 Yb2+ -0.51120 Yb3+ -0.51120 Lu -0.29820 Lu3+ -0.29820 Hf -0.09620 Hf4+ -0.09620 Ta 0.11842 Ta5+ 0.11842 W 0.29787 W6+ 0.29787 Re 0.48180 Os 0.61700 Os4+ 0.61700 Ir 0.75967 Ir3+ 0.75967 Ir4+ 0.75967 Pt 0.87492 Pt2+ 0.87492 Pt4+ 0.87492 Au 0.96224 Au1+ 0.96224 Au3+ 0.96224 Hg 1.02850 Hg1+ 1.02850 Hg2+ 1.02850 Tl 1.05970 TL1+ 1.05970 Tl3+ 1.05970 Pb 1.08380 Pb2+ 1.08380 Pb4+ 1.08380 Bi 1.06850 Bi3+ 1.06850 Bi5+ 1.06850 Po 1.04380 At 0.97641 Rn 0.88510 Fr 0.78354 Ra 0.65836 Ra2+ 0.65836 Ac 0.64340 Ac3+ 0.64340 Th 0.36849 Th4+ 0.36849 Pa 0.20878 U 0.05060 U3+ 0.05060 U4+ 0.05060 U6+ 0.05060 Np -0.06566 Np3+ -0.06566 Np4+ -0.06566 Np6+ -0.06566 Pu -0.18080 Pu3+ -0.18080 Pu4+ -0.18080 Pu6+ -0.18080 Am -0.29112 Cm -0.40588 Bk -0.51729 Cf -0.62981 save_ save_colour_hue loop_ _enumeration_default.index _enumeration_default.value H white D blue_light H1- white He unknown Li unknown Li1+ unknown Be unknown Be2+ unknown B unknown C steel_grey N blue O red O1- red F green F1- green Ne unknown Na magenta Na1+ magenta Mg magenta Mg2+ magenta Al magenta Al3+ magenta Si unknown Si4+ unknown P magenta S yellow Cl green Cl1- green Ar unknown K magenta K1+ magenta Ca magenta Ca2+ magenta Sc unknown Sc3+ unknown Ti magenta Ti2+ magenta Ti3+ magenta Ti4+ magenta V magenta V2+ magenta V3+ magenta V5+ magenta Cr magenta Cr2+ magenta Cr3+ magenta Mn magenta Mn2+ magenta Mn3+ magenta Mn4+ magenta Fe magenta Fe2+ magenta Fe3+ magenta Co magenta Co2+ magenta Co3+ magenta Ni magenta Ni2+ magenta Ni3+ magenta Cu magenta Cu1+ magenta Cu2+ magenta Zn magenta Zn2+ magenta Ga magenta Ga3+ magenta Ge magenta Ge4+ magenta As magenta Se yellow Br green Br1- green Kr unknown Rb unknown Rb1+ unknown Sr unknown Sr2+ unknown Y unknown Y3+ unknown Zr unknown Zr4+ unknown Nb unknown Nb3+ unknown Nb5+ unknown Mo unknown Mo3+ magenta Mo5+ magenta Mo6+ magenta Tc unknown Ru unknown Ru3+ unknown Ru4+ unknown Rh unknown Rh3+ unknown Rh4+ unknown Pd unknown Pd2+ unknown Pd4+ unknown Ag magenta Ag1+ magenta Ag2+ magenta Cd magenta Cd2+ magenta In unknown In3+ unknown Sn magenta Sn2+ magenta Sn4+ magenta Sb magenta Sb3+ magenta Sb5+ magenta Te unknown I green I1- green Xe unknown Cs unknown Cs1+ unknown Ba unknown Ba2+ unknown La unknown La3+ unknown Ce unknown Ce3+ unknown Ce4+ unknown Pr unknown Pr3+ unknown Pr4+ unknown Nd unknown Nd3+ unknown Pm unknown Sm unknown Sm3+ unknown Eu unknown Eu2+ unknown Eu3+ unknown Gd unknown Gd3+ unknown Tb unknown Tb3+ unknown Dy unknown Dy3+ unknown Ho unknown Ho3+ unknown Er unknown Er3+ unknown Tm unknown Tm3+ unknown Yb unknown Yb2+ unknown Yb3+ unknown Lu unknown Lu3+ unknown Hf unknown Hf4+ unknown Ta unknown Ta5+ unknown W unknown W6+ unknown Re unknown Os unknown Os4+ unknown Ir unknown Ir3+ unknown Ir4+ unknown Pt magenta Pt2+ magenta Pt4+ magenta Au magenta Au1+ magenta Au3+ magenta Hg magenta Hg1+ magenta Hg2+ magenta Tl unknown TL1+ unknown Tl3+ unknown Pb magenta Pb2+ magenta Pb4+ magenta Bi magenta Bi3+ magenta Bi5+ magenta Po unknown At unknown Rn unknown Fr unknown Ra unknown Ra2+ unknown Ac unknown Ac3+ unknown Th unknown Th4+ unknown Pa unknown U unknown U3+ unknown U4+ unknown U6+ unknown Np unknown Np3+ unknown Np4+ unknown Np6+ unknown Pu unknown Pu3+ unknown Pu4+ unknown Pu6+ unknown Am unknown Cm unknown Bk unknown Cf unknown save_ #============================================================================= # The dictionary's creation history. #============================================================================ loop_ _dictionary_audit.version _dictionary_audit.date _dictionary_audit.revision 1.0.01 2005-12-12 ; Initial version of the TEMPLATES dictionary created from the definitions used in CORE_3 dictionary version 3.5.02 ; 1.0.1 2006-02-12 ; Remove dictionary attributes from a save frame. Change category core_templates to template ; 1.2.01 2006-02-21 ; File structure to conform with prototype version dictionaries. ; 1.2.02 2006-03-07 ; Added the template _template.relational_id for the ddl3 dictionary. ; 1.2.03 2006-06-20 ; Apply DDL 3.6.04 attributes. ; 1.2.04 2006-06-27 ; Change filename to com_val.dic. apply DDL 3.6.05 changes. add 'context' and 'method' enumerated lists add 'enumeration_default' blocks to this file ; 1.2.05 2006-08-30 ; In type.contents change constrction of Otag to 'ANchar [_]' ; 1.2.06 2006-11-13 ; Remove method and context frames ; 1.2.07 2006-12-14 ; Apply DDL3 3.7.01 attributes. ; 1.2.08 2007-10-11 ; Correct definitions of Ctag and Otag in _type.contents ; 1.2.09 2011-03-25 ; Change the syntax of "Filename" in type_contents enumeration set. ; 1.3.01 2011-08-03 ; Remove definition.id lines in keeping with nested imports. ; 1.3.02 2011-12-01 ; Update the DDL version. No Matrix types present. ; 1.3.03 2012-05-07 ; Update the DDL version. Change dictionary class to Template ; 1.3.04 2012-07-08 ; Remove type.contents enumeration list ; 1.3.05 2012-10-16 ; Change all element symbols in the ion-to-elemnt default list to Upper and lower case characters (from all upper). ; 1.4.01 2013-03-08 ; Changes arising from alerts issued by ALIGN. ; 1.4.02 2013-04-16 ; Changed type.source 'Measured' to 'Recorded' ; 1.4.03 2014-06-09 ; Inserted dummy line at top of each frame; this is skipped on import ; 1.4.04 2016-04-01 ; Added Bohr magnetons and radians to the units list (James Hester) ; 1.4.05 2016-05-13 ; Added Schoenflies group list (James Hester) ; 1.4.06 2017-12-12 ; Updated atom symbol list for newly-named elements (James Hester) ; pycifrw-4.4/src/Programs/type_test.cif000066400000000000000000000013571345362224200201410ustar00rootroot00000000000000#CIF1.1 # A CIF file for validity testing. One block contains a series of # invalidly-typed items as per the supplied DDL1 dictionary file # The other block contains a report which any validity checking # program should be able to produce on request. data_invalid_type_block _test_reference_block_id invalid_type_block _test_construct_character 'there is nothing wrong with this' # this data item should be a number _test_construct_number 453K # there is no time type in DDL1 or 2 _test_construct_time 15:34 data_invalid_type_block_report _test_result_block_id invalid_type_block _test_result_validity invalid _test_result_fail_item_name '_test_number' _test_result_fail_type 'Value construction' _test_result_fail_item_value 453K pycifrw-4.4/src/Programs/updated.cif000066400000000000000000000005171345362224200175440ustar00rootroot00000000000000#\#CIF_2.0 # This file has been updated data_changed loop_ _atom_site_label _atom_site_type_symbol _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z Ge1 Ge 0.44769(7) 0.92488(4) 0.20378(2) Ge2 Ge 0.55025(7) 1.22809(4) 0.17280(2)pycifrw-4.4/src/Programs/validate_cif.py000066400000000000000000000135321345362224200204200ustar00rootroot00000000000000# A program to check CIFs against dictionaries. # # Usage: validate_cif [-d dictionary_dir] -f dictionary file cifname # # We need option parsing: from optparse import OptionParser # We need our cif library: import CifFile import os import urllib # # return a CifFile object from an FTP location def cif_by_ftp(ftp_ptr,store=True,directory="."): # print "Opening %s" % ftp_ptr if store: new_fn = os.path.split(urllib.url2pathname(ftp_ptr))[1] target = os.path.abspath(os.path.join(directory,new_fn)) if target != ftp_ptr: urllib.urlretrieve(ftp_ptr,target) print "Stored %s as %s" % (ftp_ptr,target) print 'Reading ' + target ret_cif = CifFile.CifFile(target) else: ret_cif = CifFile.ReadCif(ftp_ptr) return ret_cif # get a canonical CIF dictionary given name and version # we use the IUCr repository file, perhaps stored locally def locate_dic(dicname,dicversion,regloc="cifdic.register",store_dir = "."): register = cif_by_ftp(regloc,directory=store_dir) good_gen = register["validation_dictionaries"] dataloop = good_gen.GetLoop("_cifdic_dictionary.version") matches = [a for a in dataloop if getattr(a,"_cifdic_dictionary.name")==dicname and \ getattr(a,"_cifdic_dictionary.version")==dicversion] if len(matches)==0: print "Unable to find any matches for %s version %s" % (dicname,dicversion) return "" elif len(matches)>1: print "Warning: found more than one candidate, choosing first." print map(str,matches) return getattr(matches[0],"_cifdic_dictionary.URL") # the location def parse_options(): # define our options op = OptionParser(usage="%prog [options] ciffile", version="%prog 0.7") op.add_option("-d","--dict_dir", dest = "dirname", default = ".", help = "Directory where locally stored dictionaries are located") op.add_option("-f","--dict_file", dest = "dictnames", action="append", help = "A dictionary name stored locally") op.add_option("-u","--dict-version", dest = "versions", action="append", help = "A dictionary version") op.add_option("-n","--name", dest = "iucr_names",action="append", help = "Dictionary name as registered by IUCr") op.add_option("-s","--store", dest = "store_flag",action="store_true", help = "Store this dictionary locally", default=True) op.add_option("-c","--canon-reg", dest = "registry",action="store_const", const = "ftp://ftp.iucr.org/pub/cifdics/cifdic.register", help = "Fetch and use canonical dictionary registry from IUCr") op.add_option("-m","--markup", dest = "use_html",action="store_true", help = "Output result in HTML",default=False) op.add_option("-t","--is_dict", dest = "dict_flag", action="store_true",default=False, help = "CIF file should be validated as a CIF dictionary") op.add_option("-r","--registry-loc", dest = "registry", default = "file:cifdic.register", help = "Location of global dictionary registry (see also -c option)") (options,args) = op.parse_args() # our logic: if we are given a dictionary file using -f, the dictionaries # are all located locally; otherwise, they are all located externally, and # we use the IUCr register to locate them. # create the dictionary file names import sys if len(sys.argv) <= 1: print "No arguments given: use option --help to get a help message\n" exit return options,args def execute_with_options(options,args): if options.dictnames: diclist = map(lambda a:os.path.join(options.dirname,a),options.dictnames) print "Using following local dictionaries to validate:" for dic in diclist: print "%s" % dic fulldic = CifFile.merge_dic(diclist,mergemode='overlay') else: # print "Locating dictionaries using registry at %s" % options.registry dics = map(None,options.iucr_names,options.versions) dicurls = map(lambda a:locate_dic(a[0],a[1],regloc=options.registry,store_dir=options.dirname),dics) diccifs = map(lambda a:cif_by_ftp(a,options.store_flag,options.dirname),dicurls) fulldic = CifFile.merge_dic(diccifs) diclist = dicurls # for use in reporting later # open the cif file cf = CifFile.CifFile(args[0],grammar="auto") output_header(options.use_html,args[0],diclist) print CifFile.validate_report(CifFile.Validate(cf,dic= fulldic,isdic=options.dict_flag),use_html=options.use_html) output_footer(options.use_html) # # Headers and footers for HTML/ASCII output # def output_header(use_html,filename,dictionaries): prog_info = "Validate_cif version 0.7, Copyright ASRP 2005-\n" if use_html: print "PyCIFRW validation report" print '" print "

Validation results for %s

" % filename print "

Validation performed by %s

" % prog_info print "

Dictionaries used:

    " for one_dic in dictionaries: print "
  • %s" % one_dic print "
" else: print "Validation results for %s\n" % filename print "Validation performed by %s" % prog_info print "File validated against following dictionaries:" for one_dic in dictionaries: print " %s" % one_dic def output_footer(use_html): if use_html: print "" def main (): apply(execute_with_options,parse_options()) if __name__ == "__main__": main() pycifrw-4.4/src/StarFile.html000066400000000000000000005675411345362224200162570ustar00rootroot00000000000000 StarFile.nw
<Copyright statement>= (U->)
__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""

Introduction

This file implements a general STAR reading/writing utility. The basic objects (StarFile/StarBlock) read and write syntactically correct STAR files including save frames.

The StarFile class is initialised with either no arguments (a new STAR file) or with the name of an already existing STAR file. Data items are accessed/changed/added using the python mapping type ie to get dataitem you would type value = cf[blockname][dataitem].

Note also that a StarFile object can be accessed as a mapping type, ie using square brackets. Most mapping operations have been implemented (see below).

We define a generic BlockCollection class that both CifFiles and StarFiles are subclasses of. It is also used when the user requests a collection of blocks from a StarFile.

The LoopBlock class used to be the root class of StarBlocks and all loop blocks for recursive handling of nested loops, but with removal of nested loop support it is simpler to model a StarBlock as a collection of dataitems with additional information specifying which datanames are grouped together. LoopBlocks are still used to provide packet-based access to loops.

<*>=
# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

<Copyright statement>

import sys

# Python 2,3 compatibility
try:
    from urllib import urlopen         # for arbitrary opening
    from urlparse import urlparse, urlunparse
except:
    from urllib.request import urlopen
    from urllib.parse import urlparse,urlunparse
import re,os
import textwrap

try:
    from StringIO import StringIO #not cStringIO as we cannot subclass
except ImportError:
    from io import StringIO

if isinstance(u"abc",str):   #Python 3
    unicode = str

try:
    import numpy
    have_numpy = True
except ImportError:
    have_numpy = False

<Define a collection datatype>
<LoopBlock class>
<StarBlock class>
<Star packet class>
<BlockCollection class>
<StarFile class>
<Subclass StringIO>
<Define an error class>
<Read in a STAR file>
<Get data dimension>
<Utility functions>
<API documentation flags>

BlockCollection

Starfiles and Ciffiles are both collections of blocks. We abstract this into the BlockCollection class, and then inherit from it to make a StarFile object. The philosophy is that the treatment of the constituent blocks is managed by the enclosing block collection based on how the block collection was initialised.

<BlockCollection class>= (<-U)
class BlockCollection(object):
    """A container for StarBlock objects. The constructor takes
    one non-keyword argument `datasource` to set the initial data.  If
    `datasource` is a Python dictionary, the values must be `StarBlock`
    objects and the keys will be blocknames in the new object. Keyword
    arguments:

    standard:
        `CIF` or `Dic`.  `CIF` enforces 75-character blocknames, and will
        print block contents before that block's save frame.

    blocktype:
        The type of blocks held in this container. Normally `StarBlock`
        or `CifBlock`.

    characterset:
        `ascii` or `unicode`.  Blocknames and datanames appearing within
        blocks are restricted to the appropriate characterset. Note that
        only characters in the basic multilingual plane are accepted. This
        restriction will be lifted when PyCIFRW is ported to Python3.

    scoping:
        `instance` or `dictionary`: `instance` implies that save frames are
        hidden from save frames lower in the hierarchy or in sibling
        hierarchies. `dictionary` makes all save frames visible everywhere
        within a data block.  This setting is only relevant for STAR2 dictionaries and
        STAR2 data files, as save frames are currently not used in plain CIF data
        files.

"""
    <Initialise BC data structures>
    <Block collection locking>
    <BC emulation of mapping type>
    <Add a new data section>
    <Re-identify a data block>
    <Make a BC from a name list>
    <Merge with another block collection>
    <Conformance checks>
    <Collect all values of a single key in all blocks>
    <Switch save frame scoping rules>
    <Parent child utilities>
    <Set output template>
<Write out to string representation>

With the advent of CIF2, the allowed character set has expanded to encompass most of Unicode. Our object needs to know about this different characterset in order to check incoming values and datanames for conformance. This is done via the 'characterset' keyword.

DDLm dictionaries assume that all definitions in nested save frames are equally accessible from other nested save frames, whereas in instance files save frames are logically insulated from other save frames at the same or lower levels. Block names may be duplicated if they are in different enclosing frames, although all save frame names have to be unique within a DDLm dictionary (as importation is allowed to refer to the save frame names with no qualifications). We deal with potential duplication by appending a '+' to the access key of legitimate save frames with duplicate names. Our child_table dictionary links the internal block key to its parent and mixed-case name used when outputting the block.

If scoping is 'instance', nested datablocks are invisible and only accessible through the 'saves' attribute, which produces a view onto the same block collection.

To take account of dictionaries with 10s of thousands of entries (e.g. the PDB) we optimise block merging for speed. Most of the information in separate structures below could be derived from child_table, but we take the space hit for speed. The canonical reference to a block is the lowercase version of the name. We use these addresses to index into a table that contains the actual block name and the parent blockname.

<Initialise BC data structures>= (<-U)
def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
             characterset='ascii',scoping='instance',**kwargs):
    import collections
    self.dictionary = {}
    self.standard = standard
    self.lower_keys = set()           # short_cuts
    self.renamed = {}
    self.PC = collections.namedtuple('PC',['block_id','parent'])
    self.child_table = {}
    self.visible_keys = []            # for efficiency
    self.block_input_order = []       # to output in same order
    self.scoping = scoping  #will trigger setting of child table
    self.blocktype = blocktype
    self.master_template = {}   #for outputting
    self.set_grammar('2.0')
    self.set_characterset(characterset)
    if isinstance(datasource,BlockCollection):
        self.merge_fast(datasource)
        self.scoping = scoping   #reset visibility
    elif isinstance(datasource,dict):
        for key,value in datasource.items():
             self[key]= value
    self.header_comment = ''

def set_grammar(self,new_grammar):
    """Set the syntax and grammar for output to `new_grammar`"""
    if new_grammar not in ['1.1','1.0','2.0','STAR2']:
        raise StarError('Unrecognised output grammar %s' % new_grammar)
    self.grammar = new_grammar

def set_characterset(self,characterset):
    """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames
    have already been added to any datablocks, they are not checked."""
    self.characterset = characterset
    for one_block in self.lower_keys:
        self[one_block].set_characterset(characterset)

Unlocking. When editing dictionaries with many datablocks, we would rather just unlock all datablocks at once.

<Block collection locking>= (<-U)
def unlock(self):
    """Allow overwriting of all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite=True

def lock(self):
    """Disallow overwriting for all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite = False

Checking block name lengths. This is not needed for a STAR block, but is useful for CIF.

<Check block name lengths>=
def checklengths(self,maxlength):
    toolong = [a.block_id for a in self.child_table.values() if len(a.block_id)>maxlength]
    if toolong:
        errorstring = ""
        for bn in toolong:
            errorstring += "\n" + bn
        raise StarError( 'Following block name(s) too long: \n' + errorstring)

Switch scoping. We interpose some code in the normal __setattr__ method so detect a scoping switch. In some cases we want to hide save frames from our accesses, in other cases we wish to make all frames visible. Setting the scoping attribute allows this to be swapped around. We do not assume that no change means we do not have to do anything.

<Switch save frame scoping rules>= (<-U)
def __setattr__(self,attr_name,newval):
    if attr_name == 'scoping':
        if newval not in ('dictionary','instance'):
            raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
        if newval == 'dictionary':
            self.visible_keys = [a for a in self.lower_keys]
        else:
            #only top-level datablocks visible
            self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
    object.__setattr__(self,attr_name,newval)

Emulation of a mapping type. We also put odd little useful utilities in this section.

<BC emulation of mapping type>= (<-U)
def __str__(self):
    return self.WriteOut()

def __setitem__(self,key,value):
    self.NewBlock(key,value,parent=None)

def __getitem__(self,key):
    if isinstance(key,(unicode,str)):
       lowerkey = key.lower()
       if lowerkey in self.lower_keys:
           return self.dictionary[lowerkey]
       #print 'Visible keys:' + `self.visible_keys`
       #print 'All keys' + `self.lower_keys`
       #print 'Child table' + `self.child_table`
       raise KeyError('No such item %s' % key)

# we have to get an ordered list of the current keys,
# as we'll have to delete one of them anyway.
# Deletion will delete any key regardless of visibility

def __delitem__(self,key):
    dummy = self[key]   #raise error if not present
    lowerkey = key.lower()
    # get rid of all children recursively as well
    children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
    for child in children:
        del self[child]   #recursive call
    del self.dictionary[lowerkey]
    del self.child_table[lowerkey]
    try:
        self.visible_keys.remove(lowerkey)
    except KeyError:
        pass
    self.lower_keys.remove(lowerkey)
    self.block_input_order.remove(lowerkey)

def __len__(self):
    return len(self.visible_keys)

def __contains__(self,item):
    """Support the 'in' operator"""
    if not isinstance(item,(unicode,str)): return False
    if item.lower() in self.visible_keys:
        return True
    return False

# We iterate over all visible
def __iter__(self):
    for one_block in self.keys():
        yield self[one_block]

# TODO: handle different case
def keys(self):
    return self.visible_keys

# Note that has_key does not exist in 3.5
def has_key(self,key):
    return key in self

def get(self,key,default=None):
    if key in self:     # take account of case
        return self.__getitem__(key)
    else:
        return default

def clear(self):
    self.dictionary.clear()
    self.lower_keys = set()
    self.child_table = {}
    self.visible_keys = []
    self.block_input_order = []

def copy(self):
    newcopy = self.dictionary.copy()  #all blocks
    for k,v in self.dictionary.items():
        newcopy[k] = v.copy()
    newcopy = BlockCollection(newcopy)
    newcopy.child_table = self.child_table.copy()
    newcopy.lower_keys = self.lower_keys.copy()
    newcopy.block_input_order = self.block_input_order.copy()
    newcopy.characterset = self.characterset
    newcopy.SetTemplate(self.master_template.copy())
    newcopy.scoping = self.scoping  #this sets visible keys
    return newcopy

def update(self,adict):
    for key in adict.keys():
        self[key] = adict[key]

def items(self):
    return [(a,self[a]) for a in self.keys()]

def first_block(self):
    """Return the 'first' block.  This is not necessarily the first block in the file."""
    if self.keys():
        return self[self.keys()[0]]

Parent-child utilities. As we are now emulating parent-child relationships using self.child_table, we provide some useful methods.

<Parent child utilities>= (<-U)
def get_parent(self,blockname):
    """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
    possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
    try:
        first = next(possibles)   #get first one
    except:
        raise StarError('no parent for %s' % blockname)
    try:
       second = next(possibles)
    except StopIteration:
       return first[1].parent
    raise StarError('More than one parent for %s' % blockname)

def get_roots(self):
    """Get the top-level blocks"""
    return [a for a in self.child_table.items() if a[1].parent==None]

def get_children(self,blockname,include_parent=False,scoping='dictionary'):
    """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
    True, the parent block will also be included in the block collection as the root."""
    newbc = BlockCollection()
    block_lower = blockname.lower()
    proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
    newbc.child_table = dict(proto_child_table)
    if not include_parent:
       newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
    newbc.lower_keys = set([a[0] for a in proto_child_table])
    newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
    if include_parent:
        newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
        newbc.lower_keys.add(block_lower)
        newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
    newbc.scoping = scoping
    return newbc

def get_immediate_children(self,parentname):
    """Get the next level of children of the given block as a list, without nested levels"""
    child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
    return child_handles

# This takes time
def get_child_list(self,parentname):
    """Get a list of all child categories in alphabetical order"""
    child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
    child_handles.sort()
    return child_handles

def is_child_of_parent(self,parentname,blockname):
    """Return `True` if `blockname` is a child of `parentname`"""
    checkname = parentname.lower()
    more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
    if blockname.lower() in more_children:
       return True
    else:
       for one_child in more_children:
           if self.is_child_of_parent(one_child,blockname): return True
    return False

def set_parent(self,parentname,childname):
    """Set the parent block"""
    # first check that both blocks exist
    if parentname.lower() not in self.lower_keys:
        raise KeyError('Parent block %s does not exist' % parentname)
    if childname.lower() not in self.lower_keys:
        raise KeyError('Child block %s does not exist' % childname)
    old_entry = self.child_table[childname.lower()]
    self.child_table[childname.lower()]=self.PC(old_entry.block_id,
           parentname.lower())
    self.scoping = self.scoping #reset visibility

Making a Block Collection from a set of our own block names. This is used in merging, where we must merge with a Block Collection. Any pointers to parent blocks that are not in the list become None, ie. become top level blocks. We use our own child table to find links between the supplied block names and ourself. ::

<Make a BC from a name list>= (<-U)
def makebc(self,namelist,scoping='dictionary'):
    """Make a block collection from a list of block names"""
    newbc = BlockCollection()
    block_lower = [n.lower() for n in namelist]
    proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower]
    newbc.child_table = dict(proto_child_table)
    new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower]
    newbc.child_table.update(dict(new_top_level))
    newbc.lower_keys = set([a[0] for a in proto_child_table])
    newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
    newbc.scoping = scoping
    newbc.block_input_order = block_lower
    return newbc


Adding a new block. A new block is just a new item in our dictionary, so we add a new entry. We return the new block name in case we have changed it, so the calling routine can refer to it later. Also, there is a limit of 75 characters for the block name length, which we enforce here. By setting fix to true, blocknames will have illegal whitespace changed to underscore.

self.standard is used to enforce differences in treatments of block names. If self.standard is set at all, blocks will not replace a previous block with the same name. DDLm dictionaries are not permitted identical save frame names, but those save frame names may be identical to the enclosing datablock. We rename the access key if an identically-named save frame is introduced anywhere in the file by appending a '+'. These renames are stored in the rename dictionary. The name appearing in the output file is not changed, only the access key. If self.standard is 'Dic', then we put block contents before save frames in accordance with stylistic conventions when printing out.

Note that we must take account of upper/lower case differences being irrelevant for STAR/CIF, but that we want to preserve the original case.

To allow for nested blocks, we can specify a parent block. When the file is printed, the new block will appear inside the parent block if nested frames have been requested or if the parent block is a top-level block.

blockcontents cannot be set immediately to StarBlock as a default, because it will evaluate the constructor once and then assign all new blocks to the same object.

<Add a new data section>= (<-U)
def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    """Add a new block named `blockname` with contents `blockcontents`. If `fix`
    is True, `blockname` will have spaces and tabs replaced by underscores. `parent`
    allows a parent block to be set so that block hierarchies can be created.  Depending on
    the output standard, these blocks will be printed out as nested save frames or
    ignored."""
    if blockcontents is None:
        blockcontents = self.blocktype()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError('Blockname %s is longer than 75 characters' % blockname)
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:   #already there
        if self.standard is not None:
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    self.block_input_order.append(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname))
    self[new_lowerbn].set_grammar(self.grammar)
    self[new_lowerbn].set_characterset(self.characterset)
    self[new_lowerbn].formatting_hints = self.master_template
    return new_lowerbn  #in case calling routine wants to know

Renaming a block. This is a slightly intricate operation as we have to also make sure the original children are pointed to the new blockname. We assume that both oldname and newname are already lower case. We can simply change the key used to identify the block using _rekey, or we cna change the block name that is printed using rename. In the latter case, there must be no name collisions or the operation will fail.

<Re-identify a data block>= (<-U)
def _rekey(self,oldname,newname,block_id=''):
    """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
       does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
    move_block = self[oldname]    #old block
    is_visible = oldname in self.visible_keys
    move_block_info = self.child_table[oldname]    #old info
    move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
    # now rewrite the necessary bits
    self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
    oldpos = self.block_input_order.index(oldname)
    del self[oldname]   #do this after updating child table so we don't delete children
    self.dictionary.update({newname:move_block})
    self.lower_keys.add(newname)
    #print 'Block input order was: ' + `self.block_input_order`
    self.block_input_order[oldpos:oldpos]=[newname]
    if block_id == '':
       self.child_table.update({newname:move_block_info})
    else:
       self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
    if is_visible: self.visible_keys += [newname]

def rename(self,oldname,newname):
    """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
       conformance checks are conducted."""
    realoldname = oldname.lower()
    realnewname = newname.lower()
    if realnewname in self.lower_keys:
        raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname))
    if realoldname not in self.lower_keys:
        raise KeyError('Cannot find old block %s' % realoldname)
    self._rekey(realoldname,realnewname,block_id=newname)

Merging. Originally, this package envisaged Cif and STAR files as collections of either Starblocks or Cifblocks, which differed only in their capacity to hold save frames and nested loops. From version 4.05, we envisage Cif and Star files as collections of StarBlocks, neither of which hold any nested save frames. Instead, save frames relationships are held in a separate table, which we look up when outputting.

This was originally implemented for dictionary merging support, which is now deprecated with the new DDLm way of combining dictionaries. We cannot merge CifDic objects, because the internal data structures for DDL2 and DDL1 are different (parent-child in particular), so any merge operation would have to first recreate the original Cif structure before proceeding.

Merging can be strict, overlay or replace. In all cases, if the block name is different, we simply add it in. If it is the same, in strict mode we flag an error, in replace mode we replace it, and in overlay mode we actually add/replace individual data items. The default mode will be determined from the setting of 'standard': if no standard has been specified, the mode is 'replace', otherwise the mode is 'strict'.

If the single_block list is non-empty, we assume that we should merge on the block level, using the given block names as the particular blocks to merge. This is essentially what we have to do for DDL2 dictionaries, where all the definitions are stored in save frames inside a single block.

Note also the related situation where we are in 'strict' mode, and the DDL1 dictionaries both have an "on_this_dictionary" block. So we have an extra keyword argument "idblock" which contains a blockname to ignore during merging, i.e. it will remain the same as before merging.

The suggested overlay method involves adding to loops, rather than replacing them completely. Identical rows must be removed, and any key values with identical values remaining after this have to flag an error. We do not read in the ddl specifications themselves, to avoid messing around with hard-coded filenames, so we require the calling function to provide us with this file (not yet implemented).

The match_att keyword allows us to match blocks/save frames on a particular attribute, rather than the block name itself. This means we can do the right thing and compare _name entries rather than block names (the default behaviour).

Note also a problem with the overlay protocol as written up in Vol. G: if we try matching on item.name, we will run into trouble where _item.name is looped in DDL2-style dictionaries. We cannot match on a complete match against all item names in the list, because we would like to be able to add item names in overlay mode. So we have to deduce the 'main' item name from any parent-child information that we have using a helper function which is passed to us.

Nested save frames are emulated through child table lookups, so we should merge this table when merging block collections. Unless parent is not empty, we put all new blocks on the same level. Otherwise, any top-level blocks in the incoming block collection (parent is None) are given the parent specified in parent. In previous versions this was text, but due to the inability to specify to future callers that the name has been changed, parent is now itself a datablock.

As for NewBlock, we allow duplicate save frame names in the precise situation where one of the blocks is a top-level block.

The drop_att attribute allows a particular datablock attribute to be used to determine if datablocks are semantically identical.

<Merge with another block collection>= (<-U)
def merge_fast(self,new_bc,parent=None):
    """Do a fast merge. WARNING: this may change one or more of its frame headers in order to
    remove duplicate frames.  Please keep a handle to the block object instead of the text of
    the header."""
    if self.standard is None:
        mode = 'replace'
    else:
        mode = 'strict'
    overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
    if parent is not None:
        parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent]
        if len(parent_name)==0 or len(parent_name)>1:
            raise StarError("Unable to find unique parent block name: have %s" % str(parent_name))
        parent_name = parent_name[0]
    else:
        parent_name = None  #an error will be thrown if we treat as a string
    if overlap_flag and mode != 'replace':
        double_keys = self.lower_keys.intersection(new_bc.lower_keys)
        for dup_key in double_keys:
              our_parent = self.child_table[dup_key].parent
              their_parent = new_bc.child_table[dup_key].parent
              if (our_parent is None and their_parent is not None and parent is None) or\
                  parent is not None:  #rename our block
                start_key = dup_key
                while start_key in self.lower_keys: start_key = start_key+'+'
                self._rekey(dup_key,start_key)
                if parent_name.lower() == dup_key:  #we just renamed the prospective parent!
                    parent_name = start_key
              elif our_parent is not None and their_parent is None and parent is None:
                start_key = dup_key
                while start_key in new_bc.lower_keys: start_key = start_key+'+'
                new_bc._rekey(dup_key,start_key)
              else:
                raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
    self.dictionary.update(new_bc.dictionary)
    self.lower_keys.update(new_bc.lower_keys)
    self.visible_keys += (list(new_bc.lower_keys))
    self.block_input_order += new_bc.block_input_order
    #print('Block input order now:' + repr(self.block_input_order))
    self.child_table.update(new_bc.child_table)
    if parent_name is not None:     #redo the child_table entries
          reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
          reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list]
          self.child_table.update(dict(reparent_dict))

def merge(self,new_bc,mode=None,parent=None,single_block=[],
               idblock="",match_att=[],match_function=None):
    if mode is None:
        if self.standard is None:
           mode = 'replace'
        else:
           mode = 'strict'
    if single_block:
        self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                               match_att=match_att,
                                               match_function=match_function)
        return None
    base_keys = [a[1].block_id for a in self.child_table.items()]
    block_to_item = base_keys   #default
    new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
    if match_att:
        #make a blockname -> item name map
        if match_function:
            block_to_item = [match_function(self[a]) for a in self.keys()]
        else:
            block_to_item = [self[a].get(match_att[0],None) for a in self.keys()]
        #print `block_to_item`
    for key in new_keys:        #run over incoming blocknames
        if key == idblock: continue    #skip dictionary id
        basekey = key           #default value
        if len(match_att)>0:
           attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
        else:
           attval = 0
        for ii in range(len(block_to_item)):  #do this way to get looped names
            thisatt = block_to_item[ii]       #keyname in old block
            #print "Looking for %s in %s" % (attval,thisatt)
            if attval == thisatt or \
               (isinstance(thisatt,list) and attval in thisatt):
                  basekey = base_keys.pop(ii)
                  block_to_item.remove(thisatt)
                  break
        if not basekey in self or mode=="replace":
            new_parent = new_bc.get_parent(key)
            if parent is not None and new_parent is None:
               new_parent = parent
            self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
        else:
            if mode=="strict":
                raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
            elif mode=="overlay":
                # print "Merging block %s with %s" % (basekey,key)
                self[basekey].merge(new_bc[key],mode,match_att=match_att)
            else:
                raise StarError( "Merge called with unknown mode %s" % mode)

Checking conformance. CIF and STAR standards differ in allowing nested loops and maximum data name lengths. Although the CIF 1.1 standard allows very long lines (2048 characters), data names are still restricted to be no more than 75 characters in length in the CIF standard.

<Conformance checks>= (<-U)
def checknamelengths(self,target_block,maxlength=-1):
    if maxlength < 0:
        return
    else:
        toolong = [a for a in target_block.keys() if len(a)>maxlength]
    outstring = ""
    if toolong:
       outstring = "\n".join(toolong)
       raise StarError( 'Following data names too long:' + outstring)

When validating DDL2-type dictionaries against the DDL spec file, we have to be able to see all values of parent data items across all save frames in order to validate parent-child relations (I have inferred this, but if I ever find a standard document this may turn out to be wrong). So this method is provided to return a list of all values taken by the given attribute within all of the blocks inside a block collection.

A flat list is returned, even if looped values happen to occur in a data block. This is because the one routine that calls this method is interested in whether or not a given value occurs, rather than how it occurs or what it occurs with. We also remove duplicate values.

<Collect all values of a single key in all blocks>= (<-U)
def get_all(self,item_name):
    raw_values = [self[a].get(item_name) for a in self.keys()]
    raw_values = [a for a in raw_values if a != None]
    ret_vals = []
    for rv in raw_values:
        if isinstance(rv,list):
            for rvv in rv:
                if rvv not in ret_vals: ret_vals.append(rvv)
        else:
            if rv not in ret_vals: ret_vals.append(rv)
    return ret_vals

Writing all this stuff out to a string. We loop over each of the individual sections, getting their string representation. We implement this using the cStringIO module for faster work. Note that the default output comment specifies a CIF 1.1 standard file.

Note that child blocks must be save frames, so we hard-code 'save'.

If self.grammar is '2.0', save frames are not nested and table/list delimiters are spaces; if 'STAR2', save frames are nested. We allow the maximum line length to be overridden here although preferably the output length is set when initialising the file.

<Write out to string representation>= (<-U)
    def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None):
        """Return the contents of this file as a string, wrapping if possible at `wraplength`
        characters and restricting maximum line length to `maxoutlength`.  Delimiters and
        save frame nesting are controlled by `self.grammar`. If `blockorder` is
        provided, blocks are output in this order unless nested save frames have been
        requested (STAR2). The default block order is the order in which blocks were input.
        `saves_after` inserts all save frames after the given dataname,
        which allows less important items to appear later.  Useful in conjunction with a
        template for dictionary files."""
        if maxoutlength != 0:
            self.SetOutputLength(maxoutlength)
        if not comment:
            comment = self.header_comment
        outstring = StringIO()
        if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0":
            outstring.write(r"#\#CIF_2.0" + "\n")
        outstring.write(comment)
        # prepare all blocks
        for b in self.dictionary.values():
            b.set_grammar(self.grammar)
            b.formatting_hints = self.master_template
            b.SetOutputLength(wraplength,self.maxoutlength)
        # loop over top-level
        # monitor output
        all_names = list(self.child_table.keys())   #i.e. lower case
        if blockorder is None:
            blockorder = self.block_input_order
        top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None]
        for blockref,blockname in top_block_names:
            print('Writing %s, ' % blockname + repr(self[blockref]))
            outstring.write('\n' + 'data_' +blockname+'\n')
            all_names.remove(blockref)
            if self.standard == 'Dic':              #put contents before save frames
                outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application'))
            if self.grammar == 'STAR2':  #nested save frames
                child_refs = self.get_immediate_children(blockref)
                for child_ref,child_info in child_refs:
                    child_name = child_info.block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    self.block_to_string_nested(child_ref,child_name,outstring,4)
                    outstring.write('\n' + 'save_'+ '\n')
            elif self.grammar in ('1.0','1.1','2.0'):                   #non-nested save frames
                child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)]
                for child_ref in child_refs:
                    child_name = self.child_table[child_ref].block_id
                    outstring.write('\n\n' + 'save_' + child_name + '\n')
                    outstring.write(str(self[child_ref]))
                    outstring.write('\n\n' + 'save_' + '\n')
                    all_names.remove(child_ref.lower())
            else:
                raise StarError('Grammar %s is not recognised for output' % self.grammar)
            if self.standard != 'Dic':              #put contents after save frames
                outstring.write(str(self[blockref]))
            else:
                outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application'))
        returnstring =  outstring.getvalue()
        outstring.close()
        if len(all_names)>0:
            print('WARNING: following blocks not output: %s' % repr(all_names))
        else:
            print('All blocks output.')
        return returnstring

    def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0):
        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children,
           and syntactically nesting save frames"""
        child_refs = self.get_immediate_children(block_ref)
        self[block_ref].set_grammar(self.grammar)
        if self.standard == 'Dic':
            outstring.write(str(self[block_ref]))
        for child_ref,child_info in child_refs:
            child_name = child_info.block_id
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string_nested(child_ref,child_name,outstring,indentlevel)
            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
        if self.standard != 'Dic':
            outstring.write(str(self[block_ref]))

Output template. We process the template file and immediately set all blocks to this value. New blocks will not see this template, so we store the template for application after the blocks are created.

<Set output template>= (<-U)
def SetTemplate(self,template_file):
        """Use `template_file` as a template for all block output"""
        self.master_template = process_template(template_file)
        for b in self.dictionary.values():
            b.formatting_hints = self.master_template

StarFile

If we are passed a filename, we open it and read it in, assuming that it is a conformant STAR file. A StarFile object is a dictionary of StarBlock objects, accessed by block name. Parameter maxoutlength sets the maximum line size for output. If maxoutlength is not specified, it defaults to the maximum input length.

<StarFile class>= (<-U)
class StarFile(BlockCollection):
<Initialise data structures>
<Set URI>

When initialising, we add those parts that are unique to the StarFile as opposed to a simple collection of blocks - i.e. reading in from a file, and some line length restrictions. We do not indent this section in the noweb file, so that our comment characters output at the beginning of the line.

<Initialise data structures>= (<-U)
    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
                scoping='instance',grammar='1.1',scantype='standard',
                 permissive=False,**kwargs):
        super(StarFile,self).__init__(datasource=datasource,**kwargs)
        self.my_uri = getattr(datasource,'my_uri','')
        if maxoutlength == 0:
            self.maxoutlength = 2048
        else:
            self.maxoutlength = maxoutlength
        self.scoping = scoping
        if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"):
            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
                     maxlength = maxinlength,permissive=permissive)
        self.header_comment = \
"""#\\#STAR
##########################################################################
#               STAR Format file
#               Produced by PySTARRW module
#
#  This is a STAR file.  STAR is a superset of the CIF file type.  For
#  more information, please refer to International Tables for Crystallography,
#  Volume G, Chapter 2.1
#
##########################################################################
"""

A function to make sure we have the correct file location

<Set URI>= (<-U)
    def set_uri(self,my_uri): self.my_uri = my_uri

Reading in a file. We use the Yapps3-generated YappsStarParser module to provide grammar services. The structure returned from parsing is a StarFile, with possible grammar violations due to duplicate block names.

We allow fast reads using the compiled StarScan module by passing the option 'flex' to this routine. We also permit an already-opened stream to be passed to us (thanks to Boris Dusek for this contribution). There are 3 possible syntax variations: very old CIF files allowed unquoted data values to begin with open square brackets, version 1.1 disallowed this, and DDLm-conformant files interpret these as actual bracket expressions. The different grammars are selected by the 'grammar' argument.

We allow reading CBF files, which can contain binary sections, by removing all characters found between the strings '-BINARY-FORMAT-SECTION'. This is not a robust approach as this string could theoretically be found in a comment or datavalue.

We save our URL for possible later use in finding files relative to the location of this file e.g. with DDLm dictionary imports.

<Read in a STAR file>= (<-U)
def ReadStar(filename,prepared = None, maxlength=-1,
             scantype='standard',grammar='STAR2',CBF=False, permissive=False):

    """ Read in a STAR file, returning the contents in the `prepared` object.

    * `filename` may be a URL, a file
    path on the local system, or any object with a `read` method.

    * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename`
    will be added to.

    * `maxlength` is the maximum allowable line length in the input file. This has been set at
    2048 characters for CIF but is unlimited (-1) for STAR files.

    * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1`
    is identical except for the exclusion of square brackets as the first characters in
    undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will
    read files according to the STAR2 publication.  If grammar is `None` or `auto`, autodetection
    will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files.
    Note that (nested) save frames are read in all grammar variations and then flagged afterwards if
    they do not match the requested grammar.

    * `scantype` can be `standard` or `flex`.  `standard` provides pure Python parsing at the
    cost of a factor of 10 or so in speed.  `flex` will tokenise the input CIF file using
    fast C routines.  Note that running PyCIFRW in Jython uses native Java regular expressions
    to provide a speedup regardless of this argument.

    * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is
    excised from the input data stream before parsing and is not available in the returned object.

    * `permissive` allows non UTF8 encodings (currently only latin1) in the input file. These are a 
    violation of the standard.

    """

    # save desired scoping
    save_scoping = prepared.scoping
    from . import YappsStarParser_1_1 as Y11
    from . import YappsStarParser_1_0 as Y10
    from . import YappsStarParser_2_0 as Y20
    from . import YappsStarParser_STAR2 as YST
    if prepared is None:
        prepared = StarFile()
    if grammar == "auto" or grammar is None:
        try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)]
    elif grammar == '1.0':
        try_list = [('1.0',Y10)]
    elif grammar == '1.1':
        try_list = [('1.1',Y11)]
    elif grammar == '2.0':
        try_list = [('2.0',Y20)]
    elif grammar == 'STAR2':
        try_list = [('STAR2',YST)]
    else:
        raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar ))
    if isinstance(filename,(unicode,str)):
        # create an absolute URL
        relpath = urlparse(filename)
        if relpath.scheme == "":
            if not os.path.isabs(filename):
                fullpath = os.path.join(os.getcwd(),filename)
            else:
                fullpath = filename
            newrel = list(relpath)
            newrel[0] = "file"
            newrel[2] = fullpath
            my_uri = urlunparse(newrel)
        else:
            my_uri = urlunparse(relpath)
        # print("Full URL is: " + my_uri)
        filestream = urlopen(my_uri)
        try:
            text = filestream.read().decode('utf-8-sig')
        except UnicodeDecodeError:
            if permissive:
                text = filestream.read().decode('latin1')
                print("WARNING: %s violates standard (latin1 encoding instead of UTF8)." % filename)
            else:
                raise StarError("%s: bad encoding (must be utf8 or ascii)" % filename)
        filestream.close()
    else:
        filestream = filename   #already opened for us
        text = filestream.read()
        if not isinstance(text,unicode):
            try:
                text = text.decode('utf-8-sig')  #CIF is always ascii/utf8
            except UnicodeDecodeError:
                if permissive:
                    text = filestream.read().decode('latin1')
                    print("WARNING: text violates CIF standard (latin1 encoding instead of UTF8)")
                else:
                    raise StarError("Bad input encoding (must be utf8 or ascii)")
        my_uri = ""
    if not text:      # empty file, return empty block
        return prepared.set_uri(my_uri)
    # filter out non-ASCII characters in CBF files if required.  We assume
    # that the binary is enclosed in a fixed string that occurs
    # nowhere else.
    if CBF:
       text_bits  = text.split("-BINARY-FORMAT-SECTION-")
       text = text_bits[0]
       for section in range(2,len(text_bits),2):
           text = text+" (binary omitted)"+text_bits[section]
    # we recognise ctrl-Z as end of file
    endoffile = text.find(chr(26))
    if endoffile >= 0:
        text = text[:endoffile]
    split = text.split('\n')
    if maxlength > 0:
        toolong = [a for a in split if len(a)>maxlength]
        if toolong:
            pos = split.index(toolong[0])
            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    # honour the header string
    if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list:
        try_list.remove(('2.0',Y20),)
        if not try_list:
            raise StarError('File %s missing CIF2.0 header' % (filename))
    for grammar_name,Y in try_list:
       if scantype == 'standard' or grammar_name in ['2.0','STAR2']:
            parser = Y.StarParser(Y.StarParserScanner(text))
       else:
            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
       # handle encoding switch
       if grammar_name in ['2.0','STAR2']:
           prepared.set_characterset('unicode')
       else:
           prepared.set_characterset('ascii')
       proto_star = None
       try:
           proto_star = getattr(parser,"input")(prepared)
       except Y.yappsrt.YappsSyntaxError as e:
           input = parser._scanner.input
           Y.yappsrt.print_error(input, e, parser._scanner)
       except Y.yappsrt.NoMoreTokens:
           print('Could not complete parsing; stopped around here:',file=sys.stderr)
           print(parser._scanner,file=sys.stderr)
       except ValueError:
           print('Unexpected error:')
           import traceback
           traceback.print_exc()
       if proto_star is not None:
           proto_star.set_grammar(grammar_name)   #remember for output
           break
    if proto_star is None:
        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
        errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner )
        raise StarError( errorstring)
    # set visibility correctly
    proto_star.scoping = 'dictionary'
    proto_star.set_uri(my_uri)
    proto_star.scoping = save_scoping
    return proto_star

Dictionaries

If a dictionary is attached to a StarBlock, we can use it to provide automatic value conversion whenever a value is retrieved.

<Working with dictionaries>= (U->)
<Assigning a dictionary>

In DDL1 and DDL2, there is not a whole lot of point associating a DDL dictionary with a CIF file in an ongoing way. However, with DDLm the dictionary can be used when searching for attributes, so is no longer simply a checking mechanism but is now also a generative mechanism. So there are advantages to making this assignment for DDLm.

If we are passed a non-DDLm dictionary, we ignore the request as there is nothing we can do with it outside the normal validity checking, for which a different routine is in place.

Having a dictionary in place also implies that values that are returned are automatically converted to the type given in the dictionary.

<Assigning a dictionary>= (<-U)
def assign_dictionary(self,dic):
    if not dic.diclang=="DDLm":
        print("Warning: ignoring dictionary %s" % dic.my_uri)
        return
    self.dictionary = dic

def unassign_dictionary(self):
    """Remove dictionary-dependent behaviour"""
    self.dictionary = None

Collection datatype

DDLm introduced data values which could be lists, tuples or hash tables. We define a distinct StarList class to distinguish them from loop lists, and take the opportunity to expand the getitem method to allow multiple arguments.

<Define a collection datatype>= (<-U)
class StarList(list):
    def __getitem__(self,args):
        if isinstance(args,(int,slice)):
            return super(StarList,self).__getitem__(args)
        elif isinstance(args,tuple) and len(args)>1:   #extended comma notation
            return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:])
        else:
            return super(StarList,self).__getitem__(args[0])

    def __str__(self):
        return "SL("+super(StarList,self).__str__() + ")"

class StarDict(dict):
    pass


Loop Block class

A LoopBlock is provided as a row-based interface to a collection of columns, so that iteration over packets is possible. It is initialised with a StarBlock object and dataname and returns an object that accesses the loop containing the dataname. Datavalues is not copied, meaning that changes to the data (e.g. appending a packet) will be apparent in the StarBlock parent (i.e the LoopBlock is like a view onto the parent).

<LoopBlock class>= (<-U)
class LoopBlock(object):
    <Initialise Loop Block>
    <Add emulation of a mapping type>
    <Selection of iterators>
    <Remove a data item>
    <Get complete looped data>
    <Packet handling methods>
    <Change data item order>
    <Return position of data item>
    <Get co-looped names>
    <Add to looped data>

Initialising: We do not check conformance to standards here: it assumed that this has been done by the creating routine.

<Initialise Loop Block>= (<-U)
def __init__(self,parent_block,dataname):
    self.loop_no = parent_block.FindLoop(dataname)
    if self.loop_no < 0:
        raise KeyError('%s is not in a loop structure' % dataname)
    self.parent_block = parent_block

<Add emulation of a mapping type>= (<-U)
def keys(self):
    return self.parent_block.loops[self.loop_no]

def values(self):
    return [self.parent_block[a] for a in self.keys()]

#Avoid iterator even though that is Python3-esque
def items(self):
    return list(zip(self.keys(),self.values()))

def __getitem__(self,dataname):
    if isinstance(dataname,int):   #a packet request
        return self.GetPacket(dataname)
    if dataname in self.keys():
        return self.parent_block[dataname]
    else:
        raise KeyError('%s not in loop block' % dataname)

def __setitem__(self,dataname,value):
    self.parent_block[dataname] = value
    self.parent_block.AddLoopName(self.keys()[0],dataname)

def __contains__(self,key):
    return key in self.parent_block.loops[self.loop_no]

def has_key(self,key):
    return key in self

def __iter__(self):
    packet_list = zip(*self.values())
    names = self.keys()
    for p in packet_list:
        r = StarPacket(p)
        for n in range(len(names)):
            setattr(r,names[n].lower(),r[n])
        yield r

# for compatibility
def __getattr__(self,attname):
    return getattr(self.parent_block,attname)

Packets. We store columns, so extracting packets is a much slower task.

<Packet handling methods>= (<-U)
<Get nth loop packet>
<Add a packet>
<Get item order>

A StarPacket object looks very much like a list, in order to support the DDLm semantics of allowing a particular value to be accessed by attribute. DDLm also allows merged categories, which means that a packet can contain datanames from the appropriate sub-categories.

Furthermore, a StarPacket can derive missing values by calling the appropriate dREL function. To do this, we store the key name used to create the packet.

Note that all attributes must be lower case in order to meet the caseless matching required by the STAR/CIF standards.

<Star packet class>= (<-U)
class StarPacket(list):
    def merge_packet(self,incoming):
        """Merge contents of incoming packet with this packet"""
        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
        self.extend(incoming)
        for na in new_attrs:
            setattr(self,na,getattr(incoming,na))

    def __getattr__(self,att_name):
        """Derive a missing attribute"""
        if att_name.lower() in self.__dict__:
            return getattr(self,att_name.lower())
        if att_name in ('cif_dictionary','fulldata','key'):
            raise AttributeError('Programming error: can only assign value of %s' % att_name)
        d = self.cif_dictionary
        c = self.fulldata
        k = self.key
        assert isinstance(k,list)
        d.derive_item(att_name,c,store_value=True)
        #
        # now pick out the new value
        # self.key is a list of the key values
        keydict = dict([(v,(getattr(self,v),True)) for v in k])
        full_pack = c.GetCompoundKeyedPacket(keydict)
        return getattr(full_pack,att_name)

Get nth looped packet. This returns a packet of data.

<Get nth loop packet>= (<-U)
def GetPacket(self,index):
    thispack = StarPacket([])
    for myitem in self.parent_block.loops[self.loop_no]:
        thispack.append(self[myitem][index])
        setattr(thispack,myitem,thispack[-1])
    return thispack

Adding a packet. We are passed a StarPacket object, which is just a list which is accessible by attribute. As I have not yet produced a proper __init__ or __new__ method to allow creation of a new StarPacket, it is advisable to create a new packet by copying an old packet.

<Add a packet>= (<-U)
def AddPacket(self,packet):
    for myitem in self.parent_block.loops[self.loop_no]:
        old_values = self.parent_block[myitem]
        old_values.append(packet.__getattribute__(myitem))
        self.parent_block[myitem] = old_values

Return order of items - this is just a copy of the list of datanames making up this loop.

<Get item order>= (<-U)
def GetItemOrder(self):
    """Return a list of datanames in this `LoopBlock` in the order that they will be
    printed"""
    return self.parent_block.loops[self.loop_no][:]

Move an item to a different position in the loop. This only affects the printout order. We allow different capitalisation and have to absorb the possibility of nested loops in the order list, and being passed a loop reference in the itemname argument.

<Change data item order>= (<-U)
def ChangeItemOrder(self,itemname,newpos):
    """Change the position at which `itemname` appears when printing out to `newpos`."""
    self.parent_block.loops[self.loop_no].remove(itemname.lower())
    self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

Get co-looped names. Sometimes we just want names, and will get the values ourselves on a need-to-know basis.

<Get co-looped names>= (<-U U->) [D->]
def GetLoopNames(self,keyname):
    if keyname in self:
        return self.keys()
    for aloop in self.loops:
        try:
            return aloop.GetLoopNames(keyname)
        except KeyError:
            pass
    raise KeyError('Item does not exist')

Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in data, which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in AddLoopItem, when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance.

<Add to looped data>= (<-U U->) [D->]
def AddToLoop(self,dataname,loopdata):
    thisloop = self.GetLoop(dataname)
    for itemname,itemvalue in loopdata.items():
        thisloop[itemname] = itemvalue

Star Block class

A Star Block is no longer simply a LoopBlock. Historically it was distinguished by holding save frames, but this has been removed. Development note: in the original implementation, a StarBlock was just a special type of LoopBlock. In our new implementation, a LoopBlock is a simple structure that is created to access loops in a certain way.

The other difference between LoopBlocks and StarBlocks is that the latter can have a dictionary attached, whereas inner LoopBlocks should not.

<StarBlock class>= (<-U)
class StarBlock(object):
    <Initialise a StarBlock>
    <Add StarBlock emulation of mapping type>
    <Return position of data item>
    <Change order of data item>
    <Return order of all data items>
    <Add a data item>
    <Old multi-item add routine>
    <Check data name for STAR conformance>
    <Check data item for STAR conformance>
    <Regularise data values>
    <Remove a data item>
    <Return value of item>
    <Dealing with loops>
    <Functions for printing out>
    <Merge with another block>
    <Working with dictionaries>

Initialising a StarBlock. If given non-zero data to initialise the block with, we either copy (if it is a dictionary) or else initialise each key-value pair separately (if tuples). We take care to include our special "loop" key if it is not in the supplied dictionary, but apart from this we make no check of the actual conformance of the dictionary items.

To maximise efficiency, we store all keys as lower case, and keep a table of key vs the actual supplied capitalisation for printout.

The overwrite argument allows values to be silently replaced, as per a normal python dictionary. However, when reading in from a file, we want to detect duplicated values, so we set this to false. As DDLm introduces the unicode character set, we need to indicate which character set we are prepared to accept.

We store the data in self.block. Each entry in this table is a tuple with first element the string value, and second element the corresponding calculated or actual value. We use a tuple to emphasise that both values need to be changed together.

Formatting hints are used on output to suggest column positions for looped datanames and delimiters. In practice these are used only for dictionaries where fine-tuned layout is helpful for human readers.

We provide a simple function to change the maximum name length, so that we can read in a StarBlock and then enforce that the names are a maximum length as required by CIF. Values calculated with a dictionary are cached by setting self.cache_vals to True.

<Initialise a StarBlock>= (<-U)
def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
             characterset='ascii',maxnamelength=-1):
    self.block = {}    #the actual data storage (lower case keys)
    self.loops = {}    #each loop is indexed by a number and contains a list of datanames
    self.item_order = []  #lower case, loops referenced by integer
    self.formatting_hints = {}
    self.true_case = {} #transform lower case to supplied case
    self.provide_value = False  #prefer string version always
    self.dictionary = None      #DDLm dictionary
    self.popout = False         #used during load iteration
    self.curitem = -1           #used during iteration
    self.cache_vals = True      #store all calculated values
    self.maxoutlength = maxoutlength
    self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
    self.set_characterset(characterset)   #to check input names
    self.wraplength = wraplength
    self.overwrite = overwrite
    self.string_delimiters = ["'",'"',"\n;"]   #universal CIF set
    self.list_delimiter = "  "                 #CIF2 default
    self.wrapper = textwrap.TextWrapper()
    if isinstance(data,(tuple,list)):
        for item in data:
            self.AddLoopItem(item)
    elif isinstance(data,StarBlock):
        self.block = data.block.copy()
        self.item_order = data.item_order[:]
        self.true_case = data.true_case.copy()
        # loops as well
        self.loops = data.loops.copy()

def setmaxnamelength(self,maxlength):
    """Set the maximum allowable dataname length (-1 for no check)"""
    self.maxnamelength = maxlength
    if maxlength > 0:
        bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
        if len(bad_names)>0:
            raise StarError('Datanames too long: ' + repr( bad_names ))

def set_characterset(self,characterset):
    """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
    self.characterset = characterset
    if characterset == 'ascii':
        self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
    elif characterset == 'unicode':
        if sys.maxunicode < 1114111:
           self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
        else:
           self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)

Adding emulation of a mapping type. We add any of the other functions we would like to emulate. __len__ returns the number of items in this block, either in a loop or not. So it is not the simple length of the dictionary.

A Star Block can hold save frames in the outermost loop. From version 4.05 we do not allow save frames to be set from within the block; rather, an enclosing block collection should be created (e.g. a Star File) and the save frame added to that block collection with the 'enclosing' StarBlock set as its parent. We catch the saves key and print an error message to show deprecation.

<Add StarBlock emulation of mapping type>= (<-U)
def __str__(self):
    return self.printsection()

def __setitem__(self,key,value):
    if key == "saves":
        raise StarError("""Setting the saves key is deprecated. Add the save block to
an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
    self.AddItem(key,value)

def __getitem__(self,key):
    if key == "saves":
        raise StarError("""The saves key is deprecated. Access the save block from
the enclosing block collection (e.g. CIF or STAR file object)""")
    try:
       rawitem,is_value = self.GetFullItemValue(key)
    except KeyError:
       if self.dictionary:
           # send the dictionary the required key and a pointer to us
           try:
               new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False)
           except StarDerivationFailure:   #try now with defaults included
               try:
                   new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True)
               except StarDerivationFailure as s:
                   print("In StarBlock.__getitem__, " + repr(s))
                   raise KeyError('No such item: %s' % key)
           print('Set %s to derived value %s' % (key, repr(new_value)))
           return new_value
       else:
           raise KeyError('No such item: %s' % key)
    # we now have an item, we can try to convert it to a number if that is appropriate
    # note numpy values are never stored but are converted to lists
    if not self.dictionary or not key in self.dictionary: return rawitem
    print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem )))
    if is_value:
        if self.provide_value: return rawitem
        else:
           print('Turning %s into string' % repr( rawitem ))
           return self.convert_to_string(key)
    else:    # a string
        if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \
                                  (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)):
            return self.dictionary.change_type(key,rawitem)
        elif self.provide_value: # catch the question marks
            do_calculate = False
            if isinstance(rawitem,(list,tuple)):
                known = [a for a in rawitem if a != '?']
                if len(known) == 0:   #all questions
                    do_calculate = True
            elif rawitem == '?':
                    do_calculate = True
            if do_calculate:
               # remove old value
               del self[key]
               try:
                   new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False)
               except StarDerivationFailure as s:
                   try:
                       new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True)
                   except StarDerivationFailure as s:

                       print("Could not turn %s into a value:" + repr(s))
                       return rawitem
               else:
                   print('Set %s to derived value %s' % (key, repr( new_value )))
                   return new_value
        return rawitem   #can't do anything

def __delitem__(self,key):
    self.RemoveItem(key)

def __len__(self):
    blen = len(self.block)
    return blen

def __nonzero__(self):
    if self.__len__() > 0: return 1
    return 0

# keys returns all internal keys
def keys(self):
    return list(self.block.keys())    #always lower case

def values(self):
    return [self[a] for a in self.keys()]

def items(self):
    return list(zip(self.keys(),self.values()))

def __contains__(self,key):
    if isinstance(key,(unicode,str)) and key.lower() in self.keys():
        return True
    return False

def has_key(self,key):
    return key in self

def has_key_or_alias(self,key):
    """Check if a dataname or alias is available in the block"""
    initial_test = key in self
    if initial_test: return True
    elif self.dictionary:
        aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)]
        if len(aliases)>0:
           return True
    return False

def get(self,key,default=None):
    if key in self:
        retval = self.__getitem__(key)
    else:
        retval = default
    return retval

def clear(self):
    self.block = {}
    self.loops = {}
    self.item_order = []
    self.true_case = {}

# doesn't appear to work
def copy(self):
    newcopy = StarBlock()
    newcopy.block = self.block.copy()
    newcopy.loops = []
    newcopy.item_order = self.item_order[:]
    newcopy.true_case = self.true_case.copy()
    newcopy.loops = self.loops.copy()
#    return self.copy.im_class(newcopy)   #catch inheritance
    return newcopy

def update(self,adict):
    for key in adict.keys():
        self.AddItem(key,adict[key])

This method is used when printing out, which is why it takes both names and numbers.

<Return position of data item>= (<-U <-U)
def GetItemPosition(self,itemname):
    """A utility function to get the numerical order in the printout
    of `itemname`.  An item has coordinate `(loop_no,pos)` with
    the top level having a `loop_no` of -1.  If an integer is passed to
    the routine then it will return the position of the loop
    referenced by that number."""
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not itemname in self:
        raise ValueError('No such dataname %s' % itemname)
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

This routine moves around the order of objects in the printout. We can only move an item within the loop in which it appears.

<Change order of data item>= (<-U)
def ChangeItemOrder(self,itemname,newpos):
    """Move the printout order of `itemname` to `newpos`. If `itemname` is
    in a loop, `newpos` refers to the order within the loop."""
    if isinstance(itemname,(unicode,str)):
        true_name = itemname.lower()
    else:
        true_name = itemname
    loopno = self.FindLoop(true_name)
    if loopno < 0:  #top level
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)
    else:
        self.loops[loopno].remove(true_name)
        self.loops[loopno].insert(newpos,true_name)

<Return order of all data items>= (<-U)
def GetItemOrder(self):
    """Return a list of datanames in the order in which they will be printed.  Loops are
    referred to by numerical index"""
    return self.item_order[:]

Adding a data item.

We check for consistency, by making sure the new item is not in the block already. If it is, we replace it (consistent with the meaning of square brackets in Python), unless self.overwrite is False, in which case an error is raised.

We skip checking of data values if the precheck value is true- this is typically set if the item is being read from a file, and so is already checked, or will be checked in bulk at the end.

Note that all strings are stored internally as unicode.

<Add a data item>= (<-U)
def AddItem(self,key,value,precheck=False):
    """Add dataname `key` to block with value `value`.  `value` may be
    a single value, a list or a tuple. If `precheck` is False (the default),
    all values will be checked and converted to unicode strings as necessary. If
    `precheck` is True, this checking is bypassed.  No checking is necessary
    when values are read from a CIF file as they are already in correct form."""
    if not isinstance(key,(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( key ))
    key = unicode(key)    #everything is unicode internally
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters
    # check for overwriting
    if key in self:
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    if not precheck:   #need to sanitise
        regval,empty_val = self.regularise_data(value)
        pure_string = check_stringiness(regval)
        self.check_item_value(regval)
    else:
        regval,empty_val = value,None
        pure_string = True
    # update ancillary information first
    lower_key = key.lower()
    if not lower_key in self and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    try:
        del self.true_case[lower_key]
    except KeyError:
        pass
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})

This is the original routine for adding a loop item, left in for consistency with old versions. Do not use.

<Old multi-item add routine>= (<-U)
def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    """*Deprecated*. Use `AddItem` followed by `CreateLoop` if
    necessary."""
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],(unicode,str)):
         raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] ))
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

Checking the data names. The CIF 1.1 standard restricts characters in a data name to ASCII 33-126 and there should be a leading underscore. Items are allowed to have the blank characters as well, i.e. ascii 09,10,13 and 32. Data items may be lists, which we need to detect before checking. We assume that the item has been regularised before this check is called.

The CIF2 standard allows all of Unicode, with certain blocks disallowed. The removal of the disallowed characters takes place on file read.

We have the name length as a separate call as file reading will automatically produce datanames with the correct syntax, so during file reading we do not require any checking, but we do still need to check name length.

<Check data name for STAR conformance>= (<-U)
def check_data_name(self,dataname,maxlength=-1):
    if maxlength > 0:
        self.check_name_length(dataname,maxlength)
    if dataname[0]!='_':
        raise StarError( 'Dataname ' + dataname + ' does not begin with _')
    if self.characterset=='ascii':
        if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
    else:
        # print 'Checking %s for unicode characterset conformance' % dataname
        if len ([a for a in dataname if ord(a) < 33]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
        if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
        if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
        if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
        if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
        if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0:
            print('%s fails' % dataname)
            for a in dataname: print('%x' % ord(a),end="")
            print()
            raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

def check_name_length(self,dataname,maxlength):
    if len(dataname)>maxlength:
        raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
    return

<Check data item for STAR conformance>= (<-U)
def check_item_value(self,item):
    test_item = item
    if not isinstance(item,(list,dict,tuple)):
       test_item = [item]         #single item list
    def check_one (it):
        if isinstance(it,unicode):
            if it=='': return
            me = self.char_check.match(it)
            if not me:
                print("Fail value check: %s" % it)
                raise StarError('Bad character in %s' % it)
            else:
                if me.span() != (0,len(it)):
                    print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it )))
                    raise StarError('Data item "' + repr( it ) +  u'"... contains forbidden characters')
    [check_one(a) for a in test_item]

Regularising data. We want the copy.deepcopy operation to work, so we cannot have any arrays passed into the master dictionary. We make sure everything goes in either as a single item or as a dict/list/tuple. We provide an empty datavalue with the same structure as the returned value so that the value/string alternate is correctly initialised/reset.

Note that all string data should be Unicode. To maintain compatibility for Python 2 we apply Unicode to any string data.

<Regularise data values>= (<-U)
def regularise_data(self,dataitem):
    """Place dataitem into a list if necessary"""
    from numbers import Number
    if isinstance(dataitem,str):
        return unicode(dataitem),None
    if isinstance(dataitem,(Number,unicode,StarList,StarDict)):
        return dataitem,None  #assume StarList/StarDict contain unicode if necessary
    if isinstance(dataitem,(tuple,list)):
        v,s = zip(*list([self.regularise_data(a) for a in dataitem]))
        return list(v),list(s)
        #return dataitem,[None]*len(dataitem)
    # so try to make into a list
    try:
        regval = list(dataitem)
    except TypeError as value:
        raise StarError( str(dataitem) + ' is wrong type for data value\n' )
    v,s = zip(*list([self.regularise_data(a) for a in regval]))
    return list(v),list(s)

Dimension of data. This would ordinarily be the number of nested levels, and if we have a naked string, we have to return zero. We recursively burrow down to the lowest level. If a list is of zero length, we cannot burrow any further, so simply return one more than the current level.

We return as well the length of the received packet. Note that we consider dataitems which are *not* tuples or lists to be primitive. This includes StarLists (which are a single data item) and numpy arrays. Unfortunately this means we have to use the ungainly check involving the __class__ property, as StarLists and Tuples are subclasses of list and tuple and will therefore count as instances of them. In the context of DDLm it is probably more elegant to define a special class for looped data rather than for primitive lists as data items.

This is a method of the module, rather than belonging to any particular class.

<Get data dimension>= (<-U)
def get_dim(dataitem,current=0,packlen=0):
    zerotypes = [int, float, str]
    if type(dataitem) in zerotypes:
        return current, packlen
    if not dataitem.__class__ == ().__class__ and \
       not dataitem.__class__ == [].__class__:
       return current, packlen
    elif len(dataitem)>0:
    #    print "Get_dim: %d: %s" % (current,`dataitem`)
        return get_dim(dataitem[0],current+1,len(dataitem))
    else: return current+1,0

Numpy arrays are more difficult to check as they don't seem to implement automatic Python-style iteration (at least matrices don't). So we have to pick up this case while attempting to make dependence on Numpy optional.

<Check stringiness>= (U->)
def check_stringiness(data):
   """Check that the contents of data are all strings"""
   if not hasattr(data,'dtype'):   #so not Numpy
       from numbers import Number
       if isinstance(data,Number): return False
       elif isinstance(data,(unicode,str)): return True
       elif data is None:return False  #should be data are None :)
       else:
           for one_item in data:
               if not check_stringiness(one_item): return False
           return True   #all must be strings
   else:   #numerical python
       import numpy
       if data.ndim == 0:    #a bare value
           if data.dtype.kind in ['S','U']: return True
           else: return False
       else:
           for one_item in numpy.nditer(data):
               print('numpy data: ' + repr( one_item ))
               if not check_stringiness(one_item): return False
           return True

Removing a data item. We delete the item, and if it is looped, and nothing is left in the loop, we remove the loop. RemoveLoopItem is here for compatibility only.

<Remove a data item>= (<-U <-U)
def RemoveItem(self,itemname):
    """Remove `itemname` from the block."""
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if testkey in self:
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveLoopItem(self,itemname):
    """*Deprecated*. Use `RemoveItem` instead"""
    self.RemoveItem(itemname)

Returning an item value. Note that a looped block has little meaning without all the items in the loop. Routine GetLoop is better in this case. This is a real time-intensive loop, so we initially assume that the key we have been passed is the right key (i.e. case is the same) and only check for case if this fails.

We define an alternative call that returns both the stored value and whether or not it is a non-string value. This saves other routines performing the same check. But any StarLists are considered to be unready for use as values as they may in fact be Arrays or Matrices and therefore require their type to be changed.

Note that if the value is '?', or a list of '?', we could delete the dataitem altogether, however that would lead to inconsistencies with previous calls to has_key, keys() etc.

<Return value of item>= (<-U)
def GetItemValue(self,itemname):
    """Return value of `itemname`.  If `itemname` is looped, a list
    of all values will be returned."""
    return self.GetFullItemValue(itemname)[0]

def GetFullItemValue(self,itemname):
    """Return the value associated with `itemname`, and a boolean flagging whether
    (True) or not (False) it is in a form suitable for calculation.  False is
    always returned for strings and `StarList` objects."""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError('Itemname %s not in datablock' % itemname)
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if not_none(s):
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif not_none(s):
        return s,False         #a list of string values
    else:
        if len(v)>0:
            return v,not isinstance(v[0],StarList)
        return v,True

A StarBlock allows dealing with loops on a columnar level. For row-based operations, a LoopBlock can be created with GetLoop and iterated over.

<Dealing with loops>= (<-U)
<Create a loop>
<Add name to loop>
<Find a loop>
<Get complete looped data>
<Get co-looped names>
<Add name to loop>
<Add to looped data>
<Remove a packet>
<Get packet by key>
<Get packet by compound key>
<Get semantic packet by key>
<Get semantic packet by compound key>

Creating loops. In the latest version of PyCIFRW, a loop is simply a collection of datanames that together make up the loop. It is indexed by a number, which goes into the item_order array to produce the loop when printing out. No check of dataname existence is done, so that a loop can be created before the datanames are provided. In order to iterate over loop packets, a LoopBlock needs to be created subsequently.

When we create the loop, we remove the datanames from the item order list to prevent them being output twice, and we also remove them from any other loop. Thus, at any point in time, a dataname belongs to only one loop, but can be switched to another loop trivially.

<Create a loop>= (<-U)
def CreateLoop(self,datanames,order=-1,length_check=True):
       """Create a loop in the datablock. `datanames` is a list of datanames that
       together form a loop.  If length_check is True, they should have been initialised in the block
       to have the same number of elements (possibly 0). If `order` is given,
       the loop will appear at this position in the block when printing
       out. A loop counts as a single position."""

       if length_check:
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set )))
           elif len(listed_values) != 0:
               raise ValueError('Request to loop datanames where some are single values and some are not')
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       # remove empty loops
       empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0]
       for a in empty_loops:
           self.item_order.remove(a)
           del self.loops[a]
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames]

Removing a loop. The looped names are not removed, but will cause chaos on output unless they are placed into a different loop or deleted.

<Remove a loop>=
def remove_loop(self,oldloop):
    """Remove loop referenced by [[oldloop]]. Datanames remain in the
    structure and should be removed separately if necessary"""
    # print "Removing %s: item_order %s" % (`oldloop`,self.item_order)
    # print "Length %d" % len(oldloop)
    self.item_order.remove(oldloop)
    self.loops.remove(oldloop)

Adding a dataname that has already been set to a loop. While relatively trivial, we still need to check that it does not exist in any other loops, and remove this dataname from the item order if it is present. We always use the canonical lower-case form. Also, the access to self[oldname] may trigger a round of evaluation, which we wish to avoid, so we make sure to switch off calculations in this case.

<Add name to loop>= (<-U)
def AddLoopName(self,oldname, newname):
    """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no
    error is raised.  If `newname` is in a different loop, it is removed from that loop.
    The number of values associated with `newname` must match the number of values associated
    with all other columns of the new loop or a `ValueError` will be raised."""
    lower_newname = newname.lower()
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError('%s not in loop' % oldname)
    if lower_newname in self.loops[loop_no]:
        return
    # check length
    old_provides = self.provide_value
    self.provide_value = False
    loop_len = len(self[oldname])
    self.provide_value = old_provides
    if len(self[newname]) != loop_len:
        raise StarLengthError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len))
    # remove from any other loops
    [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(lower_newname)
    # remove from item_order if present
    try:
        self.item_order.remove(lower_newname)
    except ValueError:
        pass

Loops. We should distinguish two loop structures: the loop structures provided by the syntax, and the loop structures defined by the dictionary ('semantic' loops). The members of these loops do not coincide for 'joined' categories, where datanames may appear in either separate loops, or within one loop. Until we have a dictionary, we have no way to find the semantic loops.

The first function below returns the particular loop block containing the specified dataname, so that we can manipulate its contents directly, and therefore refers to a syntactic loop.

<Get complete looped data>= (<-U <-U)
def GetLoop(self,keyname):
    """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`.
    `keyname` is only significant as a way to specify the loop."""
    return LoopBlock(self,keyname)

<Find a loop>= (<-U)
def FindLoop(self,keyname):
    """Find the loop that contains `keyname` and return its numerical index or
    -1 if not present. The numerical index can be used to refer to the loop in
    other routines."""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

Get co-looped names. Sometimes we just want names, and will get the values ourselves on a need-to-know basis.

<Get co-looped names>+= (<-U <-U) [<-D]
def GetLoopNames(self,keyname):
    """Return all datanames appearing together with `keyname`"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError('%s is not in any loop' % keyname)

Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in data, which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in AddLoopItem, when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance.

<Add to looped data>+= (<-U <-U) [<-D]
def AddToLoop(self,dataname,loopdata):
    """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`.

    Add multiple columns to the loop containing `dataname`. `loopdata` is a
    collection of (key,value) pairs, where `key` is the new dataname and `value`
    is a list of values for that dataname"""
    self.update(loopdata)
    for one_name in loopdata:
        self.AddLoopName(dataname,one_name)

The draft DDLm specification uses square brackets next to a pre-specified identifier to mean "the packet of this category for which the key equals this item". We implement a function which fullfils this role for use in the pythonised dREL script. At this StarFile level we have no idea as to which data name is the key, so that is passed to us from the dictionary processing layer. Note we assume a single key rather than multiple keys for this call, and let the calling layer handle multiple or missing packets.

We guarantee to return a single packet, or else raise a ValueError.

<Get packet by key>= (<-U)
def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet (a `StarPacket` object) where `keyname` has value
    `keyvalue`. Ignore case in `keyvalue` if `no_case` is True.  `ValueError`
    is raised if no packet is found or more than one packet is found."""
    my_loop = self.GetLoop(keyname)
    #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block))
    #print('Packet check on:' + keyname)
    #[print(repr(getattr(a,keyname))) for a in my_loop]
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack)))
    print("Keyed packet: %s" % one_pack[0])
    return one_pack[0]

The current version of DDLm allows compound keys. We implement a routine to return a single packet corresponding to the values of the specified datanames.

<Get packet by compound key>= (<-U)
def GetCompoundKeyedPacket(self,keydict):
    """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs
    in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is
    True.  `ValueError` is raised if no packet is found or more than one packet is found."""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    keynames = list(keydict.keys())
    my_loop = self.GetLoop(keynames[0])
    for one_key in keynames:
        keyval,no_case = keydict[one_key]
        if no_case:
           my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()])
        else:
           my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval])
    if len(my_loop)!=1:
        raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop)))
    print("Compound keyed packet: %s" % my_loop[0])
    return my_loop[0]

Semantic loops. These are loops defined by a dictionary, as opposed to the syntax. dREL requires us to be able to extract a packet by key, and then attributes of this packet are the individual objects that are found in that category, regardless of whether they co-occur in one loop or child loops.

We use the dictionary ``cat_key_table'' to give us a list of keys for each category. We find the corresponding loops, extract any packets meeting the key requirements, and merge these packets.

A packet for dREL use will need to be able to derive further values using the dictionary, e.g. when an attribute of that packet is requested. In order to do this derivation, we need to store the key names and values, so that the __getattr__ method of the packet can properly derive the needed non-key values.

With a deriving dictionary we run the danger that we will generate keys for a child category for which no other values are defined. Such keys are pointless as the only information we have is that they come from the parent category, and so they can only be copies of the parent key, and therefore the child category is identical to the parent category as it has the same keys. We therefore do not generate keys of child categories; if child category items are present, then the key should already be present.

ON the other hand, if the child category keys are present but the parent keys are missing, then we in principle know that the child keys are a subset of the parent keys, but we cannot use the key to derive any values, as the keys are opaque.

The final DDLm specification allowed compound keys for categories. When combined with child categories, this means that a child key may be absent but its parent key may be present and is considered equivalent.

<Get semantic packet by compound key>= (<-U)
def GetMultiKeyedSemanticPacket(self,keydict,cat_id):
    """Return a complete packet for category `cat_id` where the keyvalues are
    provided as a dictionary of key:(value,caseless) pairs
    This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    the requested category and any children."""
    #if len(keyvalues)==1:   #simplification
    #    return self.GetKeyedSemanticPacket(keydict[1][0],cat_id)
    target_keys = self.dictionary.cat_key_table[cat_id]
    # update the dictionary passed to us with all equivalents, for
    # simplicity.
    parallel_keys = list(zip(*target_keys))  #transpose
    print('Parallel keys:' + repr(parallel_keys))
    print('Keydict:' + repr(keydict))
    start_keys = list(keydict.keys())
    for one_name in start_keys:
        key_set = [a for a in parallel_keys if one_name in a]
        for one_key in key_set:
            keydict[one_key] = keydict[one_name]
    # target_keys is a list of lists, each of which is a compound key
    p = StarPacket()
    # a little function to return the dataname for a key
    def find_key(key):
        for one_key in self.dictionary.key_equivs.get(key,[])+[key]:
            if self.has_key(one_key):
                return one_key
        return None
    for one_set in target_keys: #loop down the categories
        true_keys = [find_key(k) for k in one_set]
        true_keys = [k for k in true_keys if k is not None]
        if len(true_keys)==len(one_set):
            truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)])
            try:
                extra_packet = self.GetCompoundKeyedPacket(truekeydict)
            except KeyError:     #one or more are missing
                continue         #should try harder?
            except ValueError:
                continue
        else:
            continue
        print('Merging packet for keys ' + repr(one_set))
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    p.key = true_keys
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

Plain single key. This is the older routine where we assume that we only have a single key per category. We still have to put the single key into a list as the __getattr__ method of the StarPacket will assume that it has been passed a list of keys.

<Get semantic packet by key>= (<-U)
def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category `cat_id` where the
    category key for the category equals `keyvalue`.  This routine
    will understand any joined loops, so if separate loops in the
    datafile belong to the
    same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`),
    the returned `StarPacket` object will contain datanames from
    both categories."""
    target_keys = self.dictionary.cat_key_table[cat_id]
    target_keys = [k[0] for k in target_keys] #one only in each list
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #missing key
            try:
                test_key = self[cat_key]  #generate key if possible
                print('Test key is %s' % repr( test_key ))
                if test_key is not None and\
                not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)):
                    print('Getting packet for key %s' % repr( keyvalue ))
                    extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except:             #cannot be generated
                continue
        except ValueError:      #none/more than one, assume none
            continue
            #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue)
        p.merge_packet(extra_packet)
    # the following attributes used to calculate missing values
    for keyname in target_keys:
        if hasattr(p,keyname):
            p.key = [keyname]
            break
    if not hasattr(p,"key"):
        raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p)))
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

We might also want to remove a packet by key. We operate on the data in place, and need access to the low-level information as we have to remove both the string and value elements.

<Remove a packet>= (<-U)
def RemoveKeyedPacket(self,keyname,keyvalue):
    """Remove the packet for which dataname `keyname` takes
    value `keyvalue`.  Only the first such occurrence is
    removed."""
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

Output

The philosophy of outputting strings is to create a StringIO object, and pass this between all the routines. As there are specific rules about when a new line can occur (especially concerning semicolon-delimited strings) we subclass StringIO and fiddle with the write method.

The grammar attribute is consulted to determine what output grammar to use.

<Functions for printing out>= (<-U)
<Set the output grammar>
<Set the output length>
<Print a complete block>
<Format loop names>
<Format loop packets>
<Format a single packet item>
<Format a string>
<Format a data value>
<Create a proper ordering>
<Convert value to string>
<Do wrapping>

We adjust the write method to intelligently output lines, taking care with CIF/STAR rules for output. We allow the caller to specify: (1) a line break prior to output (e.g. for a new dataname) (2) a tab stepsize, in which case we try to pad out to this value (3) that we can do a line break if we wish (4) moving to a nested indent level, starting from the current position (5) Whether or not to align the next item with the tab stops (6) The column that this item should start at. If we are past this column, it is ignored.

We never insert newlines inside supplied strings. Tabs are applied after any requested line breaks, and both are applied before the next item is output. If the character is flagged as a delimiter, it is only output if the previous character is not a delimiter or if the next character will be a line break.

After adding any line breaks and/or tab stops, we recognise the following situations: (1) The supplied string does not overflow the line: we output, and update the length of the current line (2) The supplied string does overflow the line. (i) If we are allowed to break, we output a linefeed, and then the string. (ii) Otherwise, we output the string (3) The supplied string contains linefeeds: we update the current line length according to the number of characters from the beginning of the line.

<Subclass StringIO>= (<-U)
class CIFStringIO(StringIO):
    def __init__(self,target_width=80,**kwargs):
        StringIO.__init__(self,**kwargs)
        self.currentpos = 0
        self.target_width = target_width
        self.tabwidth = -1
        self.indentlist = [0]
        self.last_char = ""

    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,
                             delimiter=False,startcol=-1):
        """Write a string with correct linebreak, tabs and indents"""
        # do we need to break?
        if delimiter:
            if len(outstring)>1:
                raise ValueError('Delimiter %s is longer than one character' % repr( outstring ))
            output_delimiter = True
        if mustbreak:    #insert a new line and indent
            temp_string = '\n' + ' ' * self.indentlist[-1]
            StringIO.write(self,temp_string)
            self.currentpos = self.indentlist[-1]
            self.last_char = temp_string[-1]
        if self.currentpos+len(outstring)>self.target_width: #try to break
            if not delimiter and outstring[0]!='\n':          #ie <cr>;
              if canbreak:
                temp_string = '\n' + ' ' * self.indentlist[-1]
                StringIO.write(self,temp_string)
                self.currentpos = self.indentlist[-1]
                self.last_char = temp_string[-1]
            else:        #assume a break will be forced on next value
                output_delimiter = False    #the line break becomes the delimiter
        #try to match requested column
        if startcol > 0:
            if self.currentpos < startcol:
                StringIO.write(self,(startcol - self.currentpos)* ' ')
                self.currentpos = startcol
                self.last_char = ' '
            else:
                print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos))
                startcol = -1   #so that tabbing works as a backup
        #handle tabs
        if self.tabwidth >0 and do_tab and startcol < 0:
            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
            if self.currentpos < next_stop:
                StringIO.write(self,(next_stop-self.currentpos)*' ')
                self.currentpos = next_stop
                self.last_char = ' '
        #calculate indentation after tabs and col setting applied
        if newindent:           #indent by current amount
            if self.indentlist[-1] == 0:    #first time
                self.indentlist.append(self.currentpos)
                # print 'Indentlist: ' + `self.indentlist`
            else:
                self.indentlist.append(self.indentlist[-1]+2)
        elif unindent:
            if len(self.indentlist)>1:
                self.indentlist.pop()
            else:
                print('Warning: cannot unindent any further')
        #check that we still need a delimiter
        if self.last_char in [' ','\n','\t']:
            output_delimiter = False
        #now output the string - every invocation comes through here
        if (delimiter and output_delimiter) or not delimiter:
            StringIO.write(self,outstring)
        last_line_break = outstring.rfind('\n')
        if last_line_break >=0:
            self.currentpos = len(outstring)-last_line_break
        else:
            self.currentpos = self.currentpos + len(outstring)
        #remember the last character
        if len(outstring)>0:
            self.last_char = outstring[-1]

    def set_tab(self,tabwidth):
        """Set the tab stop position"""
        self.tabwidth = tabwidth

For non-default output lengths, we include a function which will set the internal attribute that controls maximum line length. As this is a per-block value, this function is most likely called by the StarFile object rather than directly.

Two values control output line formatting: self.wraplength and self.maxoutlength. self.wraplength is the value at which the line will be wrapped normally, but long strings will not force an internal wrap inside the string; self.maxoutlength is the absolute maximum length.

<Set the output length>= (<-U)
def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    """Set the maximum output line length (`maxoutlength`) and the line length to
    wrap at (`wraplength`).  The wrap length is a target only and may not always be
    possible."""
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

Setting up the output grammar. The output grammar determines the list delimiters for CIF2/STAR2, and the available delimiters for 1.0/1.1/2.0, as well as the allowed characters

<Set the output grammar>= (<-U)
def set_grammar(self,new_grammar):
    self.string_delimiters = ["'",'"',"\n;",None]
    if new_grammar in ['STAR2','2.0']:
        self.string_delimiters += ['"""',"'''"]
    if new_grammar == '2.0':
        self.list_delimiter = "  "
    elif new_grammar == 'STAR2':
        self.list_delimiter = ", "
    elif new_grammar not in ['1.0','1.1']:
        raise StarError('Request to set unknown grammar %s' % new_grammar)

Printing a section. We allow an optional order list to be given, in case the caller wants to order things in some nice way. By default, we use the item_order attribute. Naturally, looped items are grouped together according to their order in the order list.

Note that we must be careful to add spaces between data items, especially when formatting string loop data, where our string addition could get quite hairy. As we are doing so much concatenation, we use a stringIO buffer to speed it up.

As an alternative, we may have formatting hints, perhaps from a template that we have input through 'process_template'. The formatting hints specify a desired column and delimiter, and an order of output. We can always satisfy the output order, but may have to fiddle with columns and delimiters depending on the datavalue contents. The finish_at and start_from arguments cause output to stop/start when one of the datanames in the arguments is found.

We attempt some nice formatting by printing non-packet items with an apparent tab stop at 40 characters. And of course, we stop providing values.

<Print a complete block>= (<-U)
def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''):
    self.provide_value = False
    # first make an ordering
    self.create_ordering(finish_at,start_from)  #create self.output_order
    # now do it...
    if not instring:
        outstring = CIFStringIO(target_width=80)       # the returned string
    else:
        outstring = instring
    # print block delimiter
    outstring.write(blockstart,canbreak=True)
    while len(self.output_order)>0:
       #print "Remaining to output " + `self.output_order`
       itemname = self.output_order.pop(0)
       if not isinstance(itemname,int):  #no loop
               item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()]
               if len(item_spec)>0:
                   item_spec = item_spec[0]
                   col_pos = item_spec.get('column',-1)
                   name_pos = item_spec.get('name_pos',-1)
               else:
                   col_pos = -1
                   item_spec = {}
                   name_pos = -1
               if col_pos < 0: col_pos = 40
               outstring.set_tab(col_pos)
               itemvalue = self[itemname]
               outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos)
               outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)    #space after itemname
               self.format_value(itemvalue,outstring,hints=item_spec)
       else:# we are asked to print a loop block
                outstring.set_tab(10)       #guess this is OK?
                loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop']
                if loop_spec:
                    loop_indent = max(loop_spec[0],0)
                else:
                    loop_indent = indent
                outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent)
                self.format_names(outstring,indent+2,loop_no=itemname)
                self.format_packets(outstring,indent+2,loop_no=itemname)
    else:
        returnstring = outstring.getvalue()
    outstring.close()
    return returnstring

Formatting a data value. Data values may be stored as strings, numbers or compound values. We call this routine recursively to format data values. We use compound to flag that we are an embedded compound value, so that we do not insert a line break before the top-level compound delimiter. If hints is supplied, it is a dictionary containing an entry 'delimiter' that requests a particular delimiter.

<Format a data value>= (<-U)
def format_value(self,itemvalue,stringsink,compound=False,hints={}):
    """Format a Star data value"""
    global have_numpy
    delimiter = hints.get('delimiter',None)
    startcol = hints.get('column',-1)
    if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed
        raise StarError("Non-unicode value {0} found in block".format(itemvalue))
    if isinstance(itemvalue,unicode):  #need to sanitize
        stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol)
    elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy
       stringsink.set_tab(0)
       stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol)
       if len(itemvalue)>0:
           self.format_value(itemvalue[0],stringsink)
           for listval in itemvalue[1:]:
              # print 'Formatting %s' % `listval`
              stringsink.write(self.list_delimiter,do_tab=False)
              self.format_value(listval,stringsink,compound=True)
       stringsink.write(']',unindent=True)
    elif isinstance(itemvalue,dict):
       stringsink.set_tab(0)
       stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol)  #start a new line inside
       items = list(itemvalue.items())
       if len(items)>0:
           stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
           self.format_value(items[0][1],stringsink)
           for key,value in items[1:]:
               stringsink.write(self.list_delimiter)
               stringsink.write("'"+key+"'"+":",canbreak=True)
               self.format_value(value,stringsink)   #never break between key and value
       stringsink.write('}',unindent=True)
    elif isinstance(itemvalue,(float,int)) or \
         (have_numpy and isinstance(itemvalue,(numpy.number))):  #TODO - handle uncertainties
       stringsink.write(str(itemvalue),canbreak=True,startcol=startcol)   #numbers
    else:
       raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue ))

Formatting a loop section. We are passed an indent and destination string, and are expected to append a list of item names to the string indented by the indicated number of spaces. If we have loops, we add those in too.

<Format loop names>= (<-U)
def format_names(self,outstring,indent=0,loop_no=-1):
    """Print datanames from `loop_no` one per line"""
    temp_order = self.loops[loop_no][:]   #copy
    format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order])
    while len(temp_order)>0:
        itemname = temp_order.pop(0)
        req_indent = format_hints.get(itemname,{}).get('name_pos',indent)
        outstring.write(' ' * req_indent,do_tab=False)
        outstring.write(self.true_case[itemname],do_tab=False)
        outstring.write("\n",do_tab=False)

Formatting a loop packet. Our final packet will involve collecting the ith value of each item in our particular loop. Note that we have to be careful with indentation, as the <return>; digraph must be recognised.

<Format loop packets>= (<-U)
def format_packets(self,outstring,indent=0,loop_no=-1):
   alldata = [self[a] for a in self.loops[loop_no]]
   loopnames = self.loops[loop_no]
   #print 'Alldata: %s' % `alldata`
   packet_data = list(zip(*alldata))
   #print 'Packet data: %s' % `packet_data`
   #create a dictionary for quick lookup of formatting requirements
   format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames])
   for position in range(len(packet_data)):
       if position > 0:
           outstring.write("\n")    #new line each packet except first
       for point in range(len(packet_data[position])):
           datapoint = packet_data[position][point]
           format_hint = format_hints.get(loopnames[point],{})
           packstring = self.format_packet_item(datapoint,indent,outstring,format_hint)
           outstring.write(' ',canbreak=True,do_tab=False,delimiter=True)

Formatting a single packet item.

<Format a single packet item>= (<-U)
def format_packet_item(self,pack_item,indent,outstring,format_hint):
       # print 'Formatting %s' % `pack_item`
       # temporary check for any non-unicode items
       if isinstance(pack_item,str) and not isinstance(pack_item,unicode):
           raise StarError("Item {0!r} is not unicode".format(pack_item))
       if isinstance(pack_item,unicode):
           delimiter = format_hint.get('delimiter',None)
           startcol = format_hint.get('column',-1)
           outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol)
       else:
           self.format_value(pack_item,outstring,hints = format_hint)

Formatting a string. We make sure that the length of the item value is less than self.maxoutlength, or else we should split them, and so on. We check the value for terminators and impossible apostrophes and length, before deciding whether to print it and the item on a single line. We try to respect carriage returns in the string, if the caller has tried to do the formatting for us. If we are not putting apostrophes around a string, we make the first character a space, to avoid problems if the first character of a line is a semicolon.

The STAR specification states that embedded quotes are allowed so long as they are not followed by a space. So if we find any quotes followed by spaces we output a semicolon-terminated string to avoid too much messing around. This routine is called very often and could be improved.

We have to catch empty strings as well, which are legal. Another gotcha concerns 'embedded' strings; if the datavalue begins with a quote, it will be output verbatim (and misunderstood) unless spaces elsewhere force quotation. Note that non-delimited strings may not start with a reserved word ('data','save','global').

The caller is allowed to request a particular delimiter, with 'None' corresponding to no delimiter and the choices being apostrophe, double quote, or semicolon. CIF2-style triple quotes are not currently supported. The 'indent' argument allows the routine to enforce indentation of multi-line strings by the specified amount. Note that this will technically change the datavalue contents by adding spaces, although for datavalues intended only for human consumption this is irrelevant.

'lbprotocol' allows use of the line-breaking protocol from CIF1.1 to express long lines, and 'pref_protocol' allows use of the CIF2 text-prefix protocol.

<Format a string>= (<-U)
def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}):
    if hints.get("reformat",False) and "\n" in instring:
        instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"])
    allowed_delimiters = set(self.string_delimiters)
    if len(instring)==0: allowed_delimiters.difference_update([None])
    if len(instring) > (self.maxoutlength-2) or '\n' in instring:
            allowed_delimiters.intersection_update(["\n;","'''",'"""'])
    if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring:
            allowed_delimiters.difference_update([None])
    if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'):
            allowed_delimiters.difference_update([None])
    if len(instring)>5 and instring[:6].lower()=='global':
            allowed_delimiters.difference_update([None])
    if '"' in instring: allowed_delimiters.difference_update(['"',None])
    if "'" in instring: allowed_delimiters.difference_update(["'",None])
    out_delimiter = "\n;"  #default (most conservative)
    if delimiter in allowed_delimiters:
        out_delimiter = delimiter
    elif "'" in allowed_delimiters: out_delimiter = "'"
    elif '"' in allowed_delimiters: out_delimiter = '"'
    if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
    elif out_delimiter is None: return instring
    # we are left with semicolon strings
    # use our protocols:
    maxlinelength = max([len(a) for a in instring.split('\n')])
    if maxlinelength > self.maxoutlength:
        protocol_string = apply_line_folding(instring)
    else:
        protocol_string = instring
    # now check for embedded delimiters
    if "\n;" in protocol_string:
        prefix = "CIF:"
        while prefix in protocol_string: prefix = prefix + ":"
        protocol_string = apply_line_prefix(protocol_string,prefix+"> ")
    return "\n;" + protocol_string + "\n;"

Converting a value to a string.

The canonical version of a value is its string representation. This is different to its output format, which will have delimiters and various conventions applied (see below).

<Convert value to string>= (<-U)
def convert_to_string(self,dataname):
    """Convert values held in dataname value fork to string version"""
    v,is_value = self.GetFullItemValue(dataname)
    if not is_value:
        return v
    if check_stringiness(v): return v   #already strings
    # TODO...something else
    return v

Wrapping a string

If our formatting hints dictionary allows us to reformat a string, *and* the string does not contain at least three spaces in a row (implying that it is already formatted), we insert appropriate spaces and line feeds.

<Do wrapping>= (<-U)
def do_wrapping(self,instring,indent=3):
    """Wrap the provided string"""
    if "   " in instring:   #already formatted
        return instring
    self.wrapper.initial_indent = ' '*indent
    self.wrapper.subsequent_indent = ' '*indent
    # remove leading and trailing space
    instring = instring.strip()
    # split into paragraphs
    paras = instring.split("\n\n")
    wrapped_paras = [self.wrapper.fill(p) for p in paras]
    return "\n".join(wrapped_paras)

Line folding protocol

The line folding protocol allows lines to be broken by appending a backslash as the last character of a line. It is signalled by a backslash as the first character of the line following an opening semicolon. We use it to introduce line breaks where appropriate. We search for whitespace between minwraplength and maxwraplength, and if none is forthcoming we wrap at maxlength-1 (-1 to allow for the backslash).

<Apply line folding>= (U->)
def apply_line_folding(instring,minwraplength=60,maxwraplength=80):
    """Insert line folding characters into instring between min/max wraplength"""
    # first check that we need to do this
    lines = instring.split('\n')
    line_len = [len(l) for l in lines]
    if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None:
        return instring
    outstring = "\\\n"   #header
    for l in lines:
        if len(l) < maxwraplength:
            outstring = outstring + l
            if len(l) > 0 and l[-1]=='\\': #who'da thunk it?  A line ending with a backslash
                    outstring = outstring + "\\\n"  #
            outstring = outstring + "\n"  #  put back the split character
        else:
            current_bit = l
            while len(current_bit) > maxwraplength:
                space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:])
                if space_pos is not None and space_pos.start()<maxwraplength-1:
                    outstring = outstring + current_bit[:minwraplength+space_pos.start()] + "\\\n"
                    current_bit = current_bit[minwraplength+space_pos.start():]
                else:    #just blindly insert
                    outstring = outstring + current_bit[:maxwraplength-1] + "\\\n"
                    current_bit = current_bit[maxwraplength-1:]
            outstring = outstring + current_bit
            if current_bit[-1] == '\\':  #a backslash just happens to be here
                outstring = outstring + "\\\n"
            outstring = outstring + '\n'
    outstring = outstring[:-1]  #remove final newline
    return outstring

Believe it or not, a final backslash followed by whitespace is also considered to be part of the line folding protocol, even though the last line obviously cannot be folded together with the next line as there is no next line.

<Remove line folding>= (U->)
def remove_line_folding(instring):
    """Remove line folding from instring"""
    if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None:
        return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M)
    else:
        return instring

Line indenting

CIF2 introduces a line indenting protocol for embedding arbitrary text strings in a semicolon-delimited string. If the first line ends in one or two backslashes, the text before the first backslash defines an indent that should appear at the beginning of all subsequent lines. For brevity, two backslashes are used to signal that line folding should be performed after indenting. Alternatively, the line folding signal will simply correspond to a second 'header' line in the indented text consisting of the indent followed by a backslash, optional whitespace, and a line feed.

<Apply line indent>= (U->)
def apply_line_prefix(instring,prefix):
    """Prefix every line in instring with prefix"""
    if prefix[0] != ";" and "\\" not in prefix:
        header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring)
        if header is not None:
            print('Found line folded string for prefixing...')
            not_header = instring[header.end():]
            outstring = prefix + "\\\\\n" + prefix
        else:
            print('No folding in input string...')
            not_header = instring
            outstring = prefix + "\\\n" + prefix
        outstring = outstring + not_header.replace("\n","\n"+prefix)
        return outstring
    raise StarError("Requested prefix starts with semicolon or contains a backslash: " + prefix)

Line indents are signalled by one or two backslashes at the end of the first line. If this is detected,the text before the backslash is removed from every line. We do not use regular expressions for the replacement in case the prefix contains significant characters.

<Remove line indent>= (U->)
def remove_line_prefix(instring):
    """Remove prefix from every line if present"""
    prefix_match = re.match("(?P<prefix>[^;\\\n][^\n\\\\]+)(?P<folding>\\\\{1,2}[ \t\v\f]*\n)",instring)
    if prefix_match is not None:
        prefix_text = prefix_match.group('prefix')
        print('Found prefix %s' % prefix_text)
        prefix_end = prefix_match.end('folding')
        # keep any line folding instructions
        if prefix_match.group('folding')[:2]=='\\\\':  #two backslashes
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return "\\" + outstring  #keep line folding first line
        else:
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return outstring[1:]   #drop first line ending, no longer necessary
    else:
        return instring


Templating

A ``template'' is a CifFile containing a single block, where the datanames are laid out in the way that the user desires. The layout elements that are picked up from the template CifFile are: (1) order (2) column position of datavalues (only the first row of a loop block counts) (3) delimiters (4) column position of datanames. Within loops all items will be indented as for the final name in the loop header. The information that is gleaned is converted to entries in the formatting_hints table which are then consulted when writing out. Note that the order from formatting_hints will override the item_order information.

Additionally, if a semicolon-delimited value has a tab or sequence of 2 or more spaces after a line ending, it is assumed to be free text and the text values will be neatly formatted with the same indentation as found after the first line ending in the value.

Constraints on the template: (1) There should only ever be one dataname on each line (2) loop_ and datablock tokens should appear as the only non-blank characters on their lines (3) Comments are flagged by a '#' as the first character (4) Blank lines are acceptable (5) Datavalues should use only alphanumeric characters (6) Semicolon-delimited strings are not recognised in loops

<Input a guide template>= (U->)
def process_template(template_file):
    """Process a template datafile to formatting instructions"""
    template_as_cif = StarFile(template_file,grammar="2.0").first_block()
    if isinstance(template_file,(unicode,str)):
        template_string = open(template_file).read()
    else:   #a StringIO object
        template_file.seek(0)   #reset
        template_string = template_file.read()
    #template_as_lines = template_string.split("\n")
    #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
    #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
    #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
    form_hints = []   #ordered array of hint dictionaries
    find_indent = "^ +"
    for item in template_as_cif.item_order:  #order of input
        if not isinstance(item,int):    #not nested
            hint_dict = {"dataname":item}
            # find the line in the file
            start_pos = re.search("(^[ \t]*(?P<name>" + item + ")[ \t\n]+)(?P<spec>([\S]+)|(^;))",template_string,re.I|re.M)
            if start_pos.group("spec") != None:
                spec_pos = start_pos.start("spec")-start_pos.start(0)
                spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3]
                if spec_char[0] in '\'";':
                    hint_dict.update({"delimiter":spec_char[0]})
                    if spec_char == '"""' or spec_char == "'''":
                        hint_dict.update({"delimiter":spec_char})
                if spec_char[0] != ";":   #so we need to work out the column number
                    hint_dict.update({"column":spec_pos})
                else:                  #need to put in the carriage return
                    hint_dict.update({"delimiter":"\n;"})
                    # can we format the text?
                    text_val = template_as_cif[item]
                    hint_dict["reformat"] = "\n\t" in text_val or "\n  " in text_val
                    if hint_dict["reformat"]:   #find the indentation
                        p = re.search(find_indent,text_val,re.M)
                        if p.group() is not None:
                            hint_dict["reformat_indent"]=p.end() - p.start()
                if start_pos.group('name') != None:
                    name_pos = start_pos.start('name') - start_pos.start(0)
                    hint_dict.update({"name_pos":name_pos})
            #print '%s: %s' % (item,`hint_dict`)
            form_hints.append(hint_dict)
        else:           #loop block
            testnames = template_as_cif.loops[item]
            total_items = len(template_as_cif.loops[item])
            testname = testnames[0]
            #find the loop spec line in the file
            loop_regex = "(^[ \t]*(?P<loop>loop_)[ \t\n\r]+(?P<name>" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P<packet>(.(?!_loop|_[\S]+))*))" % (total_items - 1)
            loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
            loop_so_far = loop_line.end()
            packet_text = loop_line.group('packet')
            loop_indent = loop_line.start('loop') - loop_line.start(0)
            form_hints.append({"dataname":'loop','name_pos':loop_indent})
            packet_regex = "[ \t]*(?P<all>(?P<sqqq>'''([^\n\r\f']*)''')|(?P<sq>'([^\n\r\f']*)'+)|(?P<dq>\"([^\n\r\"]*)\"+)|(?P<none>[^\s]+))"
            packet_pos = re.finditer(packet_regex,packet_text)
            line_end_pos = re.finditer("^",packet_text,re.M)
            next_end = next(line_end_pos).end()
            last_end = next_end
            for loopname in testnames:
                #find the name in the file for name pos
                name_regex = "(^[ \t]*(?P<name>" + loopname + "))"
                name_match = re.search(name_regex,template_string,re.I|re.M|re.S)
                loop_name_indent = name_match.start('name')-name_match.start(0)
                hint_dict = {"dataname":loopname,"name_pos":loop_name_indent}
                #find the value
                thismatch = next(packet_pos)
                while thismatch.start('all') > next_end:
                    try:
                        last_end = next_end
                        next_end = next(line_end_pos).start()
                        print('next end %d' % next_end)
                    except StopIteration:
                        break
                print('Start %d, last_end %d' % (thismatch.start('all'),last_end))
                col_pos = thismatch.start('all') - last_end + 1
                if thismatch.group('none') is None:
                    if thismatch.group('sqqq') is not None:
                        hint_dict.update({'delimiter':"'''"})
                    else:
                        hint_dict.update({'delimiter':thismatch.groups()[0][0]})
                hint_dict.update({'column':col_pos})
                print('%s: %s' % (loopname,repr( hint_dict )))
                form_hints.append(hint_dict)
    return form_hints

Creating a proper ordering for output from the template information. When we output, we expect the ordering to consist of a sequence of datanames or loop references. Our templated ordering is essentially a list of datanames, so we now have to find which loops each dataname corresponds to and adjust each loops ordering accordingly. For dictionary use we allow only a segment of the file to be output be specifying a finish_at/start_from dataname. For consistency, we default to outputting nothing if start_from is not found, and outputting everything if finish_at is not found.

<Create a proper ordering>= (<-U)
def create_ordering(self,finish_at,start_from):
    """Create a canonical ordering that includes loops using our formatting hints dictionary"""
    requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop'])
    new_order = []
    for item in requested_order:
       if isinstance(item,unicode) and item.lower() in self.item_order:
           new_order.append(item.lower())
       elif item in self:    #in a loop somewhere
           target_loop = self.FindLoop(item)
           if target_loop not in new_order:
               new_order.append(target_loop)
               # adjust loop name order
               loopnames = self.loops[target_loop]
               loop_order = [i for i in requested_order if i in loopnames]
               unordered = [i for i in loopnames if i not in loop_order]
               self.loops[target_loop] = loop_order + unordered
    extras = list([i for i in self.item_order if i not in new_order])
    self.output_order = new_order + extras
    # now handle partial output
    if start_from != '':
        if start_from in requested_order:
            sfi = requested_order.index(start_from)
            loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0]
            candidates = list([k for k in self.output_order if k in requested_order[sfi:]])
            cand_pos = len(new_order)
            if len(candidates)>0:
                cand_pos = self.output_order.index(candidates[0])
            if len(loop_order)>0:
                cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
            if cand_pos < len(self.output_order):
                print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from))
                self.output_order = self.output_order[cand_pos:]
            else:
                print('Start is beyond end of output list')
                self.output_order = []
        elif start_from in extras:
           self.output_order = self.output_order[self.output_order.index(start_from):]
        else:
           self.output_order = []
    if finish_at != '':
        if finish_at in requested_order:
            fai = requested_order.index(finish_at)
            loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0])
            candidates = list([k for k in self.output_order if k in requested_order[fai:]])
            cand_pos = len(new_order)
            if len(candidates)>0:
                cand_pos = self.output_order.index(candidates[0])
            if len(loop_order)>0:
                cand_pos = min(cand_pos,self.output_order.index(loop_order[0]))
            if cand_pos < len(self.output_order):
                print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at))
                self.output_order = self.output_order[:cand_pos]
            else:
                print('All of block output')
        elif finish_at in extras:
           self.output_order = self.output_order[:self.output_order.index(finish_at)]
    #print('Final order: ' + repr(self.output_order))

Merging. Normally merging of dictionaries is done at the data file level, i.e. a whole block is replaced or added. However, in 'overlay' mode, individual keys are added/replaced, which is a block level operation.

Looped item overlaps are tricky. We distinguish two cases: at least one key in common, and all keys in common. The latter implies addition of rows only. The former implies deletion of all co-occuring looped items (as they will otherwise have data of different lengths) and therefore either completely replacing the previous item, or adding the new data to the end, and including the other co-looped items. But this would mean that we were passed a loop block with different data lengths in the new object, which is illegal, so we can only add to the end if the new dictionary contains a subset of the attributes in the current dictionary. Therefore we have the following rules

(1) Identical attributes in new and old -> append (2) New contains subset of old -> append values for common items and delete extra looped items (3) Old contains subset of new -> new completely replaces old

The match_att keyword is used when old and new blocks have been matched based on an internal attribute (usually _name or _item.name). This attribute should not become looped in overlay mode, obviously, so we need to have a record of it just in case.

The rel_keys keyword contains a list of datanames which act as unique keys (in a database sense) inside loop structures. If any keys match in separate datablocks, the row will not be added, but simply replaced.

<Merge with another block>= (<-U)
def merge(self,new_block,mode="strict",match_att=[],match_function=None,
               rel_keys = []):
    if mode == 'strict':
       for key in new_block.keys():
           if key in self and key not in match_att:
              raise StarError( "Identical keys %s in strict merge mode" % key)
           elif key not in match_att:           #a new dataname
               self[key] = new_block[key]
       # we get here if there are no keys in common, so we can now copy
       # the loops and not worry about overlaps
       for one_loop in new_block.loops.values():
           self.CreateLoop(one_loop)
       # we have lost case information
       self.true_case.update(new_block.true_case)
    elif mode == 'replace':
       newkeys = list(new_block.keys())
       for ma in match_att:
          try:
               newkeys.remove(ma)        #don't touch the special ones
          except ValueError:
               pass
       for key in new_block.keys():
              if isinstance(key,unicode):
                  self[key] = new_block[key]
       # creating the loop will remove items from other loops
       for one_loop in new_block.loops.values():
           self.CreateLoop(one_loop)
       # we have lost case information
       self.true_case.update(new_block.true_case)
    elif mode == 'overlay':
       print('Overlay mode, current overwrite is %s' % self.overwrite)
       raise StarError('Overlay block merge mode not implemented')
       save_overwrite = self.overwrite
       self.overwrite = True
       for attribute in new_block.keys():
           if attribute in match_att: continue      #ignore this one
           new_value = new_block[attribute]
           #non-looped items
           if new_block.FindLoop(attribute)<0:     #not looped
              self[attribute] = new_value
       my_loops = self.loops.values()
       perfect_overlaps = [a for a in new_block.loops if a in my_loops]
       for po in perfect_overlaps:
          loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
          try:
              newkeypos = map(lambda a:newkeys.index(a),loop_keys)
              newkeypos = newkeypos[0]      #one key per loop for now
              loop_keys = loop_keys[0]
          except (ValueError,IndexError):
              newkeypos = []
              overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
              new_data = map(lambda a:new_block[a],overlaps) #new packet data
              packet_data = transpose(overlap_data)
              new_p_data = transpose(new_data)
              # remove any packets for which the keys match between old and new; we
              # make the arbitrary choice that the old data stays
              if newkeypos:
                  # get matching values in new list
                  print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos])))
                  key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                  # filter out any new data with these key values
                  new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                  if new_p_data:
                      new_data = transpose(new_p_data)
                  else: new_data = []
              # wipe out the old data and enter the new stuff
              byebyeloop = self.GetLoop(overlaps[0])
              # print("Removing '%r' with overlaps '%r'" % (byebyeloop, overlaps))
              # Note that if, in the original dictionary, overlaps are not
              # looped, GetLoop will return the block itself.  So we check
              # for this case...
              if byebyeloop != self:
                  self.remove_loop(byebyeloop)
              self.AddLoopItem((overlaps,overlap_data))  #adding old packets
              for pd in new_p_data:                             #adding new packets
                 if pd not in packet_data:
                    for i in range(len(overlaps)):
                        #don't do this at home; we are appending
                        #to something in place
                        self[overlaps[i]].append(pd[i])
       self.overwrite = save_overwrite

<Define an error class>= (<-U)
class StarError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar Format error: '+ self.value

class StarLengthError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar length error: ' + self.value

class StarDerivationError(Exception):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed, None returned" % self.fail_name

#
# This is subclassed from AttributeError in order to allow hasattr
# to work.
#
class StarDerivationFailure(AttributeError):
    def __init__(self,fail_name):
        self.fail_name = fail_name
    def __str__(self):
        return "Derivation of %s failed" % self.fail_name

Utility functions

These functions do not depend on knowing the internals of the various classes and are therefore kept outside of the class definitions to allow general use.

<Utility functions>= (<-U)
<Apply line folding>
<Remove line folding>
<Apply line indent>
<Remove line indent>
<List manipulations>
<Check stringiness>
<Input a guide template>

Listify - used to allow uniform treatment of datanames - otherwise sequence functions might operate on a string instead of a list.

<List manipulations>= (<-U)
def listify(item):
    if isinstance(item,unicode): return [item]
    else: return item

#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = []
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

# This routine optimised to return as quickly as possible
# as it is called a lot.
def not_none(itemlist):
    """Return true only if no values of None are present"""
    if itemlist is None:
        return False
    if not isinstance(itemlist,(tuple,list)):
        return True
    for x in itemlist:
       if not not_none(x): return False
    return True


<Selection of iterators>= (<-U)
<A load iterator>
<A recursive iterator>
<A one-level iterator>

When loading values, we want to iterate over the items until a "stop_" token is found - this is communicated via the "popout" attribute changing to True. We save the __iter__ method for iterating over packets. Also, when a new packet is begun, all subloops should be extended correspondingly. We are in a special situation where we do not enforce length matching, as we assume that things will be loaded in as we go.

Each yield returns a list which should be appended to with a unitary item. So, as the number of packets increases, we need to make sure that the lowest level lists are extended as needed with empty lists.

<A load iterator>= (<-U)
def load_iter(self,coords=[]):
    count = 0        #to create packet index
    while not self.popout:
        # ok, we have a new packet:  append a list to our subloops
        for aloop in self.loops:
            aloop.new_enclosing_packet()
        for iname in self.item_order:
            if isinstance(iname,LoopBlock):       #into a nested loop
                for subitems in iname.load_iter(coords=coords+[count]):
                    # print 'Yielding %s' % `subitems`
                    yield subitems
                # print 'End of internal loop'
            else:
                if self.dimension == 0:
                    # print 'Yielding %s' % `self[iname]`
                    yield self,self[iname]
                else:
                    backval = self.block[iname]
                    for i in range(len(coords)):
                       # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                       backval = backval[coords[i]]
                    yield self,backval
        count = count + 1      # count packets
    self.popout = False        # reinitialise
    # print 'Finished iterating'
    yield self,'###Blank###'     #this value should never be used

# an experimental fast iterator for level-1 loops (ie CIF)
def fast_load_iter(self):
    targets = map(lambda a:self.block[a],self.item_order)
    while targets:
        for target in targets:
            yield self,target

# Add another list of the required shape to take into account a new outer packet
def new_enclosing_packet(self):
    if self.dimension > 1:      #otherwise have a top-level list
        for iname in self.keys():  #includes lower levels
            target_list = self[iname]
            for i in range(3,self.dimension): #dim 2 upwards are lists of lists of...
                target_list = target_list[-1]
            target_list.append([])
            # print '%s now %s' % (iname,`self[iname]`)

We recursively expand out all values in nested loops and return a simple dictionary type. Although it only seems to make sense to call this from a dimension 0 LoopBlock, if we are not a level 0 LoopBlock, we drill down until we get a simple value to return, then start looping.

We want to build up a return dictionary by adding keys from the deeper loops, but if we simply use the dictionary update method, we will find that we have stale keys from previous inner loops. Therefore, we keep our values as (key,value) tuples which we turn into a Star packet at the last moment.

This is now updated to return StarPackets, which are like lists except that they also have attributes set.

<A recursive iterator>= (<-U)
def recursive_iter(self,dict_so_far={},coord=[]):
    # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
    my_length = 0
    top_items = self.block.items()
    top_values = self.block.values()       #same order as items
    drill_values = self.block.values()
    for dimup in range(0,self.dimension):  #look higher in the tree
        if len(drill_values)>0:            #this block has values
            drill_values=drill_values[0]   #drill in
        else:
            raise StarError("Malformed loop packet %s" % repr( top_items[0] ))
    my_length = len(drill_values[0])       #length of 'string' entry
    if self.dimension == 0:                #top level
        for aloop in self.loops:
            for apacket in aloop.recursive_iter():
                # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) )
                prep_yield = StarPacket(top_values+apacket.values())  #straight list
                for name,value in top_items + apacket.items():
                    setattr(prep_yield,name,value)
                yield prep_yield
    else:                                  #in some loop
        for i in range(my_length):
            kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
            kvvals = map(lambda a:a[1],kvpairs)   #just values
            # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs ))
            if self.loops:
              for aloop in self.loops:
                for apacket in aloop.recursive_iter(coord=coord+[i]):
                    # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) )
                    prep_yield = StarPacket(kvvals+apacket.values())
                    for name,value in kvpairs + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
            else:           # we're at the bottom of the tree
                # print "Recursive yielding %s" % repr( dict(kvpairs) )
                prep_yield = StarPacket(kvvals)
                for name,value in kvpairs:
                    setattr(prep_yield,name,value)
                yield prep_yield

# small function to use the coordinates.
def coord_to_group(self,dataname,coords):
      if not isinstance(dataname,unicode):
         return dataname     # flag inner loop processing
      newm = self[dataname]          # newm must be a list or tuple
      for c in coords:
          # print "Coord_to_group: %s ->" % (repr( newm )),
          newm = newm[c]
          # print repr( newm )
      return newm

Return a series of LoopBlocks with the appropriate packet chosen. This does not loop over interior blocks, so called at the top level it just returns the whole star block.

<A one-level iterator>= (<-U)
def flat_iterator(self):
        my_length = 0
        top_keys = self.block.keys()
        if len(top_keys)>0:
            my_length = len(self.block[top_keys[0]])
        for pack_no in range(my_length):
            yield(self.collapse(pack_no))

<API documentation flags>= (<-U)
#No documentation flags
pycifrw-4.4/src/StarFile.m.html000066400000000000000000013433061345362224200165020ustar00rootroot00000000000000 StarFile API documentation Top

StarFile module

__copyright = """
PYCIFRW License Agreement (Python License, Version 2)
-----------------------------------------------------

1. This LICENSE AGREEMENT is between the Australian Nuclear Science
and Technology Organisation ("ANSTO"), and the Individual or
Organization ("Licensee") accessing and otherwise using this software
("PyCIFRW") in source or binary form and its associated documentation.

2. Subject to the terms and conditions of this License Agreement,
ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use PyCIFRW alone
or in any derivative version, provided, however, that this License
Agreement and ANSTO's notice of copyright, i.e., "Copyright (c)
2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or
in any derivative version prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates PyCIFRW or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee
hereby agrees to include in any such work a brief summary of the
changes made to PyCIFRW.

4. ANSTO is making PyCIFRW available to Licensee on an "AS IS"
basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between ANSTO
and Licensee. This License Agreement does not grant permission to use
ANSTO trademarks or trade name in a trademark sense to endorse or
promote products or services of Licensee, or any third party.

8. By copying, installing or otherwise using PyCIFRW, Licensee agrees
to be bound by the terms and conditions of this License Agreement.

"""


from urllib import *         # for arbitrary opening
import re
import copy
# For Python 2.6 or higher compatibility
try: 
    set
except NameError:
    import sets
    set = sets.Set

class StarList(list):
    pass

class StarDict(dict):
    pass


class LoopBlock(object):
    def __init__(self,parent_block,dataname):
        self.loop_no = parent_block.FindLoop(dataname)
        if self.loop_no < 0:
            raise KeyError, '%s is not in a loop structure' % dataname
        self.parent_block = parent_block

    def keys(self):
        return self.parent_block.loops[self.loop_no]

    def values(self):
        return [self.parent_block[a] for a in self.keys()]

    def items(self):
        return zip(self.keys(),self.values())

    def __getitem__(self,dataname):
        if isinstance(dataname,int):   #a packet request
            return self.GetPacket(dataname)
        if dataname in self.keys():
            return self.parent_block[dataname]
        else:
            raise KeyError, '%s not in loop block' % dataname

    def __setitem__(self,dataname,value):
        self.parent_block[dataname] = value
        self.parent_block.AddLoopName(self.keys()[0],dataname)

    def has_key(self,key):
        return key in self.parent_block.loops[self.loop_no]

    def __iter__(self):
        packet_list = zip(*self.values())
        names = self.keys()
        for p in packet_list:
            r = StarPacket(p)
            for n in range(len(names)):
                setattr(r,names[n].lower(),r[n])
            yield r

    # for compatibility
    def __getattr__(self,attname):
        return getattr(self.parent_block,attname)

    def load_iter(self,coords=[]):
        count = 0        #to create packet index 
        while not self.popout:
            # ok, we have a new packet:  append a list to our subloops
            for aloop in self.loops:
                aloop.new_enclosing_packet()
            for iname in self.item_order:
                if isinstance(iname,LoopBlock):       #into a nested loop
                    for subitems in iname.load_iter(coords=coords+[count]):
                        # print 'Yielding %s' % `subitems`
                        yield subitems
                    # print 'End of internal loop'
                else:
                    if self.dimension == 0:
                        # print 'Yielding %s' % `self[iname]`
                        yield self,self[iname]
                    else:
                        backval = self.block[iname]
                        for i in range(len(coords)):
                           # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                           backval = backval[coords[i]]
                        yield self,backval
            count = count + 1      # count packets
        self.popout = False        # reinitialise
        # print 'Finished iterating'
        yield self,'###Blank###'     #this value should never be used

    # an experimental fast iterator for level-1 loops (ie CIF)
    def fast_load_iter(self):
        targets = map(lambda a:self.block[a],self.item_order)
        while targets:
            for target in targets:
                yield self,target

    # Add another list of the required shape to take into account a new outer packet
    def new_enclosing_packet(self):
        if self.dimension > 1:      #otherwise have a top-level list
            for iname in self.keys():  #includes lower levels
                target_list = self[iname]
                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 
                    target_list = target_list[-1]
                target_list.append([])
                # print '%s now %s' % (iname,`self[iname]`)

    def recursive_iter(self,dict_so_far={},coord=[]):
        # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
        my_length = 0
        top_items = self.block.items()
        top_values = self.block.values()       #same order as items
        drill_values = self.block.values()
        for dimup in range(0,self.dimension):  #look higher in the tree
            if len(drill_values)>0:            #this block has values
                drill_values=drill_values[0]   #drill in
            else:
                raise StarError("Malformed loop packet %s" % `top_items[0]`)
        my_length = len(drill_values[0])       #length of 'string' entry
        if self.dimension == 0:                #top level
            for aloop in self.loops:
                for apacket in aloop.recursive_iter():
                    # print "Recursive yielding %s" % `dict(top_items + apacket.items())`
                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
                    for name,value in top_items + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
        else:                                  #in some loop
            for i in range(my_length):
                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
                kvvals = map(lambda a:a[1],kvpairs)   #just values
                # print "Recursive kvpairs at %d: %s" % (i,`kvpairs`)
                if self.loops:
                  for aloop in self.loops:
                    for apacket in aloop.recursive_iter(coord=coord+[i]):
                        # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`
                        prep_yield = StarPacket(kvvals+apacket.values())
                        for name,value in kvpairs + apacket.items():
                            setattr(prep_yield,name,value)
                        yield prep_yield
                else:           # we're at the bottom of the tree
                    # print "Recursive yielding %s" % `dict(kvpairs)`
                    prep_yield = StarPacket(kvvals)
                    for name,value in kvpairs:
                        setattr(prep_yield,name,value)
                    yield prep_yield

    # small function to use the coordinates. 
    def coord_to_group(self,dataname,coords):
          if not isinstance(dataname,basestring):
             return dataname     # flag inner loop processing
          newm = self[dataname]          # newm must be a list or tuple
          for c in coords:
              # print "Coord_to_group: %s ->" % (`newm`),
              newm = newm[c]
              # print `newm`
          return newm 

    def flat_iterator(self):
        if self.dimension == 0:   
            yield copy.copy(self)
        else:
            my_length = 0
            top_keys = self.block.keys()
            if len(top_keys)>0:
                my_length = len(self.block[top_keys[0]])
            for pack_no in range(my_length):
                yield(self.collapse(pack_no))
            

    def RemoveItem(self,itemname):
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if self.has_key(testkey):
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)
     
    def RemoveLoopItem(self,itemname):
        self.RemoveItem(itemname)

    def GetLoop(self,keyname):
        """Return a LoopBlock object containing keyname"""
        return LoopBlock(self,keyname)

    def GetPacket(self,index):
        thispack = StarPacket([])
        for myitem in self.parent_block.loops[self.loop_no]:
            thispack.append(self[myitem][index])
            setattr(thispack,myitem,thispack[-1])
        return thispack 

    def AddPacket(self,packet):
        for myitem in self.parent_block.loops[self.loop_no]:
            old_values = self.parent_block[myitem]
            old_values.append(packet.__getattribute__(myitem))
            self.parent_block[myitem] = old_values
        
    def GetItemOrder(self):
        return self.parent_block.loops[self.loop_no][:]


    def GetItemOrder(self):
        return self.parent_block.loops[self.loop_no][:]

    def ChangeItemOrder(self,itemname,newpos):
        self.parent_block.loops[self.loop_no].remove(itemname.lower())
        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

    def GetItemPosition(self,itemname):
        import string
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not self.has_key(itemname):
            raise ValueError, 'No such dataname %s' % itemname
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try: 
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError, 'Item does not exist'

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with [[keyname]]"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError, '%s is not in any loop' % keyname

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue 
        
    def AddToLoop(self,dataname,loopdata):
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
               % (`bad_vals`,loop_len)
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()


class StarBlock(object):
    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
                 characterset='ascii',maxnamelength=-1):
        self.block = {}    #the actual data storage (lower case keys)
        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
        self.item_order = []  #lower case, loops referenced by integer
        self.formatting_hints = {}
        self.true_case = {} #transform lower case to supplied case
        self.provide_value = False   #prefer string version always
        self.dictionary = None      #DDLm dictionary
        self.popout = False         #used during load iteration
        self.curitem = -1           #used during iteration
        self.maxoutlength = maxoutlength
        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
        self.wraplength = wraplength
        self.overwrite = overwrite
        self.characterset = characterset
        if self.characterset == 'ascii':
            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
        elif self.characterset == 'unicode':
            self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
        else:
            raise StarError("No character set specified")
        if isinstance(data,(tuple,list)):
            for item in data:
                self.AddLoopItem(item)
        elif isinstance(data,StarBlock):
            self.block = data.block.copy() 
            self.item_order = data.item_order[:]
            self.true_case = data.true_case.copy()
            # loops as well
            self.loops = data.loops.copy()

    def setmaxnamelength(self,maxlength):
        """Set the maximum allowable dataname length (-1 for no check)"""
        self.maxnamelength = maxlength
        if maxlength > 0:
            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
            if len(bad_names)>0:
                raise StarError, 'Datanames too long: ' + `bad_names`

    def __str__(self):
        return self.printsection()

    def __setitem__(self,key,value):
        if key == "saves":
            raise StarError("""Setting the saves key is deprecated. Add the save block to
    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
        self.AddItem(key,value)

    def __getitem__(self,key):
        if key == "saves":
            raise StarError("""The saves key is deprecated. Access the save block from
    the enclosing block collection (e.g. CIF or STAR file object)""") 
        try:
           rawitem,is_value = self.GetFullItemValue(key)
        except KeyError:
           if self.dictionary:
               # send the dictionary the required key and a pointer to us
               new_value = self.dictionary.derive_item(key,self,store_value=True)
               print 'Set %s to derived value %s' % (key, `new_value`)
               return new_value
           else:
               raise KeyError, 'No such item: %s' % key
        # we now have an item, we can try to convert it to a number if that is appropriate
        # note numpy values are never stored but are converted to lists
        if not self.dictionary or not self.dictionary.has_key(key): return rawitem
        print '%s: is_value %s provide_value %s value %s' % (key,`is_value`,`self.provide_value`,`rawitem`)
        if is_value:
            if self.provide_value: return rawitem
            else:
               print 'Turning %s into string' % `rawitem`
               return self.convert_to_string(key)
        else:    # a string
            if self.provide_value and rawitem != '?' and rawitem != ".":
                return self.dictionary.change_type(key,rawitem)
            return rawitem   #can't do anything
           
    def __delitem__(self,key):
        self.RemoveItem(key)

    def __len__(self):
        blen = len(self.block)
        return blen    

    def __nonzero__(self):
        if self.__len__() > 0: return 1
        return 0

    # keys returns all internal keys
    def keys(self):
        return self.block.keys()    #always lower case

    def values(self):
        return [self[a] for a in self.keys()]

    def items(self):
        return [a for a in zip(self.keys(),self.values())]

    def has_key(self,key):
        if isinstance(key,basestring) and key.lower() in self.keys():
            return 1
        return 0

    def get(self,key,default=None):
        if self.has_key(key):
            retval = self.__getitem__(key)
        else:
            retval = default
        return retval

    def clear(self):
        self.block = {}
        self.loops = {}
        self.item_order = []
        self.true_case = {}

    # doesn't appear to work
    def copy(self):
        newcopy = StarBlock()
        newcopy.block = self.block.copy()
        newcopy.loops = []
        newcopy.item_order = self.item_order[:]
        newcopy.true_case = self.true_case.copy()
        newcopy.loops = self.loops.copy()
    #    return self.copy.im_class(newcopy)   #catch inheritance
        return newcopy
     
    def update(self,adict):
        for key in adict.keys():
            self.AddItem(key,adict[key])

    def GetItemPosition(self,itemname):
        import string
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not self.has_key(itemname):
            raise ValueError, 'No such dataname %s' % itemname
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def ChangeItemOrder(self,itemname,newpos):
        """Move itemname to newpos"""
        if isinstance(itemname,basestring):
            true_name = itemname.lower()
        else:
            true_name = itemname
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)

    def GetItemOrder(self):
        return self.item_order[:]

    def AddItem(self,key,value,precheck=False):
        if not isinstance(key,basestring):
             raise TypeError, 'Star datanames are strings only (got %s)' % `key`
        if not precheck:
             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters   
        # check for overwriting
        if self.has_key(key):
             if not self.overwrite:
                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
        # put the data in the right container
        regval,empty_val = self.regularise_data(value)
        # check for pure string data
        pure_string = check_stringiness(regval)
        if not precheck:
            self.check_item_value(regval)
        # update ancillary information first
        lower_key = key.lower()
        if not self.has_key(lower_key) and self.FindLoop(lower_key)<0:      #need to add to order
            self.item_order.append(lower_key)
        # always remove from our case table in case the case is different
        else:
            del self.true_case[lower_key]
        self.true_case[lower_key] = key
        if pure_string:
            self.block.update({lower_key:[regval,empty_val]})
        else:
            self.block.update({lower_key:[empty_val,regval]})  
            
    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
        # print "Received data %s" % `incomingdata`
        # we accept tuples, strings, lists and dicts!!
        # Direct insertion: we have a string-valued key, with an array
        # of values -> single-item into our loop
        if isinstance(incomingdata[0],(tuple,list)):
           # a whole loop
           keyvallist = zip(incomingdata[0],incomingdata[1])
           for key,value in keyvallist:
               self.AddItem(key,value)
           self.CreateLoop(incomingdata[0])
        elif not isinstance(incomingdata[0],basestring):
             raise TypeError, 'Star datanames are strings only (got %s)' % `incomingdata[0]`
        else:
            self.AddItem(incomingdata[0],incomingdata[1])

    def check_data_name(self,dataname,maxlength=-1): 
        if maxlength > 0:
            self.check_name_length(dataname,maxlength)
        if dataname[0]!='_':
            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
        if self.characterset=='ascii':
            if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
        else:
            # print 'Checking %s for unicode characterset conformance' % dataname
            if len (filter (lambda a: ord(a) < 33, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
            if len (filter (lambda a: ord(a) > 126 and ord(a) < 160, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
            if len (filter (lambda a: ord(a) > 0xD7FF and ord(a) < 0xE000, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
            if len (filter (lambda a: ord(a) > 0xFDCF and ord(a) < 0xFDF0, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
            if len (filter (lambda a: ord(a) == 0xFFFE or ord(a) == 0xFFFF, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
            if len (filter (lambda a: ord(a) > 0x10000 and (ord(a) & 0xE == 0xE) , dataname)) > 0:
                print '%s fails' % dataname
                for a in dataname: print '%x' % ord(a),
                print
                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

    def check_name_length(self,dataname,maxlength):
        if len(dataname)>maxlength:
            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
        return

    def check_item_value(self,item):
        test_item = item
        if not isinstance(item,(list,dict,tuple)):
           test_item = [item]         #single item list
        def check_one (it):
            if isinstance(it,basestring):
                if it=='': return
                me = self.char_check.match(it)            
                if not me:
                    print "Fail value check: %s" % it
                    raise StarError, u'Bad character in %s' % it
                else:
                    if me.span() != (0,len(it)):
                        print "Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],`it`)
                        raise StarError,u'Data item "' + `it` +  u'"... contains forbidden characters'
        map(check_one,test_item)

    def regularise_data(self,dataitem):
        """Place dataitem into a list if necessary"""
        from numbers import Number
        if isinstance(dataitem,(Number,basestring,StarList,StarDict)):
            return dataitem,None
        if isinstance(dataitem,(tuple,list)):
            return dataitem,[None]*len(dataitem)
        # so try to make into a list
        try:
            regval = list(dataitem)
        except TypeError, value:
            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
        return regval,[None]*len(regval)
        
    def RemoveItem(self,itemname):
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if self.has_key(testkey):
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)
     
    def RemoveLoopItem(self,itemname):
        self.RemoveItem(itemname)

    def GetItemValue(self,itemname):
        """Return value of itemname"""
        return self.GetFullItemValue(itemname)[0]

    def GetFullItemValue(self,itemname):
        """Return value of itemname and whether or not it is a native value"""
        try:
            s,v = self.block[itemname.lower()]
        except KeyError:
            raise KeyError, 'Itemname %s not in datablock' % itemname
        # prefer string value unless all are None
        # are we a looped value?
        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
            if s is not None or (isinstance(s,StarList) and None not in s): 
                return s,False    #a string value
            else:
                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
        elif None not in s: 
            return s,False     #a list of string values
        else: 
            if len(v)>0:   
                return v,not isinstance(v[0],StarList)
            return v,True

    def CreateLoop(self,datanames,order=-1):
           """[[datanames]] is a list of datanames that together form a loop.  They should
           all contain the same number of elements (possibly 0). If [[order]] is given, the loop will
           appear at this position in the block. A loop counts as a single position."""
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError, 'Request to loop datanames %s with different lengths: %s' % (`datanames`,`len_set`)
           elif len(listed_values) != 0:
               raise ValueError, 'Request to loop datanames where some are single values and some are not'
           if len(self.loops)>0:
               loopno = max(self.loops.keys()) + 1
           else:
               loopno = 1
           # store as lower case
           lc_datanames = [d.lower() for d in datanames]
           # remove these datanames from all other loops
           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
           self.loops[loopno] = list(lc_datanames)
           if order >= 0:
               self.item_order.insert(order,loopno)
           else:
               self.item_order.append(loopno)
           # remove these datanames from item ordering
           self.item_order = [a for a in self.item_order if a not in lc_datanames] 
          
    def AddLoopName(self,oldname, newname):
        """Add [[newname]] to the loop containing [[oldname]]"""
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError, '%s not in loop' % oldname
        if newname in self.loops[loop_no]:
            return
        # check length
        loop_len = len(self[oldname])
        if len(self[newname]) != loop_len:
            raise ValueError, 'Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)
        # remove from any other loops
        [self.loops[a].remove(newname) for a in self.loops if newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(newname)
        
    def FindLoop(self,keyname):
        """Find the loop that contains keyname and return its numerical index,-1 if not present"""
        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
        if len(loop_no)>0:
            return loop_no[0]
        else:
            return -1

    def GetLoop(self,keyname):
        """Return a LoopBlock object containing keyname"""
        return LoopBlock(self,keyname)

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try: 
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError, 'Item does not exist'

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with [[keyname]]"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError, '%s is not in any loop' % keyname

    def AddLoopName(self,oldname, newname):
        """Add [[newname]] to the loop containing [[oldname]]"""
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError, '%s not in loop' % oldname
        if newname in self.loops[loop_no]:
            return
        # check length
        loop_len = len(self[oldname])
        if len(self[newname]) != loop_len:
            raise ValueError, 'Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)
        # remove from any other loops
        [self.loops[a].remove(newname) for a in self.loops if newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(newname)
        
    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue 
        
    def AddToLoop(self,dataname,loopdata):
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
               % (`bad_vals`,loop_len)
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

    def Loopify(self,datanamelist):
        self.CreateLoop(datanamelist)

    def RemoveKeyedPacket(self,keyname,keyvalue):
        packet_coord = list(self[keyname]).index(keyvalue)
        loopnames = self.GetLoopNames(keyname)
        for dataname in loopnames:
            self.block[dataname][0] = list(self.block[dataname][0])
            del self.block[dataname][0][packet_coord]
            self.block[dataname][1] = list(self.block[dataname][1])
            del self.block[dataname][1][packet_coord]

    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
        """Return the loop packet where [[keyname]] has value [[keyvalue]]. Ignore case if no_case is true"""
        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
        my_loop = self.GetLoop(keyname)
        if no_case:
           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
        else:
           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
        if len(one_pack)!=1:
            raise ValueError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))
        #print "Keyed packet: %s" % one_pack[0]
        return one_pack[0]

    def GetKeyedSemanticPacket(self,keyvalue,cat_id):
        """Return a complete packet for category cat_id"""
        target_keys = self.dictionary.cat_key_table[cat_id]
        p = StarPacket()
        # set case-sensitivity flag
        lcase = False
        if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
            lcase = True
        for cat_key in target_keys:
            try:
                extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except KeyError:        #try to create the key
                key_vals = self[cat_key]   #will create a key column
            p.merge_packet(self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase))
        # the following attributes used to calculate missing values
        p.key = target_keys[0]
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p


    def SetOutputLength(self,wraplength=80,maxoutlength=2048):
        if wraplength > maxoutlength:
            raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
        self.wraplength = wraplength
        self.maxoutlength = maxoutlength

    def printsection(self,instring='',ordering=[],blockstart="",blockend="",indent=0):
        import string
        # first make an ordering
        self.create_ordering(ordering)
        # now do it...
        if not instring:
            outstring = CIFStringIO(target_width=80)       # the returned string
        else:
            outstring = instring
        # print loop delimiter
        outstring.write(blockstart,canbreak=True)
        while len(self.output_order)>0:
           #print "Remaining to output " + `self.output_order`
           itemname = self.output_order.pop(0)
           item_spec = [i for i in ordering if i['dataname'].lower()==itemname.lower()]
           if len(item_spec)>0:
               col_pos = item_spec[0].get('column',-1)
           else:
               col_pos = -1
               item_spec = {}
               if not isinstance(itemname,int):  #no loop
                   if col_pos < 0: col_pos = 40
                   outstring.set_tab(col_pos)
                   itemvalue = self[itemname]
                   outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False)
                   outstring.write(' ',canbreak=True,do_tab=False)    #space after itemname
                   self.format_value(itemvalue,outstring,hints=item_spec)
               else:   # we are asked to print a loop block
                    #first make sure we have sensible coords.  Length should be one
                    #less than the current dimension
                    outstring.set_tab(10)       #guess this is OK?
                    outstring.write(' '*indent,mustbreak=True,do_tab=False); outstring.write('loop_\n',do_tab=False)
                    self.format_names(outstring,indent+2,loop_no=itemname)
                    self.format_packets(outstring,indent+2,loop_no=itemname)
        else:
            returnstring = outstring.getvalue()
        outstring.close()
        return returnstring

    def format_names(self,outstring,indent=0,loop_no=-1):
        """Print datanames from [[loop_no]] one per line"""
        temp_order = self.loops[loop_no][:]   #copy
        while len(temp_order)>0:
            itemname = temp_order.pop(0)
            outstring.write(' ' * indent,do_tab=False) 
            outstring.write(self.true_case[itemname],do_tab=False)
            outstring.write("\n",do_tab=False)

    def format_packets(self,outstring,indent=0,loop_no=-1):
       import cStringIO
       import string
       alldata = [self[a] for a in self.loops[loop_no]]
       print 'Alldata: %s' % `alldata`
       packet_data = apply(zip,alldata)
       print 'Packet data: %s' % `packet_data`
       for position in range(len(packet_data)):
           for point in range(len(packet_data[position])):
               datapoint = packet_data[position][point]
               packstring = self.format_packet_item(datapoint,indent,outstring)
           outstring.write("\n",do_tab=False)
               
    def format_packet_item(self,pack_item,indent,outstring):
           # print 'Formatting %s' % `pack_item`
           if isinstance(pack_item,basestring):
               outstring.write(self._formatstring(pack_item)) 
           else: 
               self.format_value(pack_item,outstring)
           outstring.write(' ',canbreak=True,do_tab=False)

    def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,
        lbprotocol=True,pref_protocol=True):
        import string
        if standard == 'CIF2':
            allowed_delimiters = set(['"',"'",";",None,'"""',"'''"])
        else:
            allowed_delimiters = set(['"',"'",";",None])
        if len(instring)==0: allowed_delimiters.difference_update([None])
        if len(instring) > (self.maxoutlength-2) or '\n' in instring:
                allowed_delimiters.intersection_update([";","'''",'"""'])
        if ' ' in instring or '\t' in instring or '\v' in instring or '_' in instring or ',' in instring:
                allowed_delimiters.difference_update([None])
        if '"' in instring: allowed_delimiters.difference_update(['"',None])
        if "'" in instring: allowed_delimiters.difference_update(["'",None])
        out_delimiter = ";"  #default (most conservative)
        if delimiter in allowed_delimiters:
            out_delimiter = delimiter
        elif "'" in allowed_delimiters: out_delimiter = "'"
        elif '"' in allowed_delimiters: out_delimiter = '"'
        if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
        elif out_delimiter is None: return instring
        # we are left with semicolon strings
        outstring = "\n;"
        # if there are returns in the string, try to work with them
        while 1:
            retin = string.find(instring,'\n')+1
            if retin < self.maxoutlength and retin > 0:      # honour this break
                outstring = outstring + instring[:retin]
                instring = instring[retin:]
            elif len(instring)0:
               self.format_value(itemvalue[0],stringsink)
               for listval in itemvalue[1:]:
                  print 'Formatting %s' % `listval`
                  stringsink.write(', ',do_tab=False)
                  self.format_value(listval,stringsink,compound=True)
           stringsink.write(']',unindent=True)
        elif isinstance(itemvalue,StarDict):
           stringsink.set_tab(0)
           stringsink.write('{',newindent=True,mustbreak=compound)  #start a new line inside
           items = itemvalue.items()
           if len(items)>0:
               stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
               self.format_value(items[0][1],stringsink)
               for key,value in items[1:]:
                   stringsink.write(', ')
                   stringsink.write("'"+key+"'"+":",canbreak=True)
                   self.format_value(value,stringsink)   #never break between key and value
           stringsink.write('}',unindent=True)
        else: 
           stringsink.write(str(itemvalue),canbreak=True)   #numbers

    def process_template(self,template_string):
        """Process a template datafile to formatting instructions"""
        template_as_cif = StarFile(StringIO(template_string),grammar="DDLm").first_block()
        #template_as_lines = template_string.split("\n")
        #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
        #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
        #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
        self.form_hints = []   #ordered array of hint dictionaries
        for item in template_as_cif.item_order:  #order of input
            if not isinstance(item,int):    #not nested
                hint_dict = {"dataname":item}
                # find the line in the file
                start_pos = re.search("(^[ \t]*" + item + "[ \t\n]+)(?P([\S]+)|(^;))",template_string,re.I|re.M)
                if start_pos.group("spec") != None:
                    spec_pos = start_pos.start("spec")-start_pos.start(0)
                    spec_char = template_string[start_pos.start("spec")]
                    if spec_char in '\'";':
                        hint_dict.update({"delimiter":spec_char})
                    if spec_char != ";":   #so we need to work out the column number
                        hint_dict.update({"column":spec_pos})
                print '%s: %s' % (item,`hint_dict`)
                self.form_hints.append(hint_dict)
            else:           #loop block
                testnames = template_as_cif.loops[item]
                total_items = len(template_as_cif.loops[item])
                testname = testnames[0]
                #find the loop spec line in the file
                loop_regex = "(^[ \t]*loop_[ \t\n\r]+" + testname + "([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P(.(?!_loop|_[\S]+))*))" % (total_items - 1)
                loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
                loop_so_far = loop_line.end()
                packet_text = loop_line.group('packet')
                packet_regex = "[ \t]*(?P(?P'([^\n\r\f']*)'+)|(?P\"([^\n\r\"]*)\"+)|(?P[^\s]+))"
                packet_pos = re.finditer(packet_regex,packet_text)
                line_end_pos = re.finditer("^",packet_text,re.M)
                next_end = line_end_pos.next().end()
                last_end = next_end
                for loopname in testnames:
                    hint_dict = {"dataname":loopname}
                    thismatch = packet_pos.next()
                    while thismatch.start('all') > next_end:
                        try: 
                            last_end = next_end
                            next_end = line_end_pos.next().start()
                            print 'next end %d' % next_end
                        except StopIteration:
                            pass 
                    print 'Start %d, last_end %d' % (thismatch.start('all'),last_end)
                    col_pos = thismatch.start('all') - last_end
                    if thismatch.group('none') is None:
                        hint_dict.update({'delimiter':thismatch.groups()[0][0]})
                    hint_dict.update({'column':col_pos})
                    print '%s: %s' % (loopname,`hint_dict`)
                    self.form_hints.append(hint_dict)
        return

    def create_ordering(self,order_dict):
        """Create a canonical ordering that includes loops using our formatting hints dictionary"""
        requested_order = [i['dataname'] for i in order_dict]
        new_order = []
        for item in requested_order:
           if isinstance(item,basestring) and item.lower() in self.item_order:
               new_order.append(item.lower())
           elif self.has_key(item):    #in a loop somewhere
               target_loop = self.FindLoop(item)
               if target_loop not in new_order:
                   new_order.append(target_loop)
        extras = [i for i in self.item_order if i not in new_order]
        self.output_order = new_order + extras
        print 'Final order: ' + `self.output_order`

    def convert_to_string(self,dataname):
        """Convert values held in dataname value fork to string version"""
        v,is_value = self.GetFullItemValue(dataname)
        if not is_value:
            return v
        if check_stringiness(v): return v   #already strings
        # TODO...something else
        return v


    def merge(self,new_block,mode="strict",match_att=[],match_function=None,
                   rel_keys = []):
        if mode == 'strict':
           for key in new_block.keys(): 
               if self.has_key(key) and key not in match_att:
                  raise CifError( "Identical keys %s in strict merge mode" % key)
               elif key not in match_att:           #a new dataname
                   self[key] = new_block[key]
           # we get here if there are no keys in common, so we can now copy
           # the loops and not worry about overlaps
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'replace':
           newkeys = new_block.keys()
           for ma in match_att:
              try:
                   newkeys.remove(ma)        #don't touch the special ones
              except ValueError:
                   pass
           for key in new_block.keys(): 
                  if isinstance(key,basestring):
                      self[key] = new_block[key] 
           # creating the loop will remove items from other loops
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'overlay':
           print 'Overlay mode, current overwrite is %s' % self.overwrite
           raise StarError, 'Overlay block merge mode not implemented'
           save_overwrite = self.overwrite
           self.overwrite = True
           for attribute in new_block.keys():
               if attribute in match_att: continue      #ignore this one
               new_value = new_block[attribute]
               #non-looped items
               if new_block.FindLoop(attribute)<0:     #not looped
                  self[attribute] = new_value 
           my_loops = self.loops.values()
           perfect_overlaps = [a for a in new_block.loops if a in my_loops]
           for po in perfect_overlaps:
              loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
              try:
                  newkeypos = map(lambda a:newkeys.index(a),loop_keys)
                  newkeypos = newkeypos[0]      #one key per loop for now
                  loop_keys = loop_keys[0] 
              except (ValueError,IndexError):
                  newkeypos = []
                  overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
                  new_data = map(lambda a:new_block[a],overlaps) #new packet data
                  packet_data = transpose(overlap_data)
                  new_p_data = transpose(new_data)
                  # remove any packets for which the keys match between old and new; we
                  # make the arbitrary choice that the old data stays
                  if newkeypos:
                      # get matching values in new list
                      print "Old, new data:\n%s\n%s" % (`overlap_data[newkeypos]`,`new_data[newkeypos]`)
                      key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                      # filter out any new data with these key values
                      new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                      if new_p_data:
                          new_data = transpose(new_p_data)
                      else: new_data = []
                  # wipe out the old data and enter the new stuff
                  byebyeloop = self.GetLoop(overlaps[0])
                  # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`)
                  # Note that if, in the original dictionary, overlaps are not
                  # looped, GetLoop will return the block itself.  So we check
                  # for this case...
                  if byebyeloop != self:
                      self.remove_loop(byebyeloop)
                  self.AddLoopItem((overlaps,overlap_data))  #adding old packets
                  for pd in new_p_data:                             #adding new packets
                     if pd not in packet_data:
                        for i in range(len(overlaps)):
                            #don't do this at home; we are appending
                            #to something in place
                            self[overlaps[i]].append(pd[i]) 
           self.overwrite = save_overwrite

    def assign_dictionary(self,dic):
        if not dic.diclang=="DDLm":
            print "Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri
            return
        self.dictionary = dic

    def unassign_dictionary(self):
        """Remove dictionary-dependent behaviour"""
        self.dictionary = None



class StarPacket(list):
    def merge_packet(self,incoming):
        """Merge contents of incoming packet with this packet"""
        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
        self.append(incoming)
        for na in new_attrs:
            setattr(self,na,getattr(incoming,na))

    def __getattr__(self,att_name):
        """Derive a missing attribute"""
        if att_name.lower() in self.__dict__:
            return getattr(self,att_name.lower())
        if att_name in ('cif_dictionary','fulldata','key'):
            raise AttributeError, 'Programming error: cannot compute value of %s' % att_name
        d = self.cif_dictionary
        c = self.fulldata
        k = self.key
        d.derive_item(att_name,c,store_value=True)
        # 
        # now pick out the new value
        keyval = getattr(self,k)
        full_pack = c.GetKeyedPacket(k,keyval)
        return getattr(full_pack,att_name)
        
class BlockCollection(object):
    def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
                 characterset='ascii',scoping='instance',parent_id=None,**kwargs):
        import collections
        self.dictionary = {}
        self.standard = standard
        self.lower_keys = set()           # short_cuts
        self.renamed = {}
        self.characterset = characterset
        self.PC = collections.namedtuple('PC',['block_id','parent'])
        self.child_table = {}
        self.visible_keys = []            # for efficiency
        self.parent_id = parent_id
        self.scoping = scoping  #will trigger setting of child table
        self.blocktype = blocktype
        if isinstance(datasource,BlockCollection):
            self.merge_fast(datasource)
            self.scoping = scoping   #reset visibility
        elif isinstance(datasource,dict):
            for key,value in datasource.items():
                 self[key]= value
        self.header_comment = ''
     
    def unlock(self):
        """Allow overwriting of all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite=True

    def lock(self):
        """Disallow overwriting for all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite = False

    def __str__(self):
        return self.WriteOut()

    def __setitem__(self,key,value):
        self.NewBlock(key,value,parent=None)

    def __getitem__(self,key):
        if isinstance(key,basestring):
           lowerkey = key.lower()
           if lowerkey in self.lower_keys:
               return self.dictionary[lowerkey]
           #print 'Visible keys:' + `self.visible_keys`
           #print 'All keys' + `self.lower_keys`
           #print 'Child table' + `self.child_table`
           raise KeyError,'No such item %s' % key

    # we have to get an ordered list of the current keys,
    # as we'll have to delete one of them anyway.
    # Deletion will delete any key regardless of visibility

    def __delitem__(self,key):
        dummy = self[key]   #raise error if not present
        lowerkey = key.lower()
        # get rid of all children recursively as well
        children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
        for child in children:
            del self[child]   #recursive call
        del self.dictionary[lowerkey]
        del self.child_table[lowerkey]
        try:
            self.visible_keys.remove(lowerkey)
        except KeyError:
            pass
        self.lower_keys.remove(lowerkey)

    def __len__(self):
        return len(self.visible_keys)

    def __contains__(self,item):
        """Support the 'in' operator"""
        return self.has_key(item)

    # We iterate over all visible
    def __iter__(self):
        for one_block in self.keys():
            yield self[one_block]

    # TODO: handle different case
    def keys(self):
        return self.visible_keys

    # changes to take case independence into account
    def has_key(self,key):
        if not isinstance(key,basestring): return 0
        if key.lower() in self.visible_keys:
            return 1
        return 0

    def get(self,key,default=None):
        if self.has_key(key):     # take account of case
            return self.__getitem__(key)
        else:
            return default

    def clear(self):
        self.dictionary.clear()
        self.lower_keys = set()
        self.child_table = {}
        self.visible_keys = []

    def copy(self):   
        newcopy = self.dictionary.copy()  #all blocks
        newcopy = BlockCollection('',newcopy,parent_id=self.parent_id)
        newcopy.child_table = self.child_table.copy()
        newcopy.lower_keys = self.lower_keys
        newcopy.characterset = self.characterset
        newcopy.scoping = self.scoping  #this sets visible keys
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self[key] = adict[key]

    def items(self):
        return [(a,self[a]) for a in self.keys()]

    def first_block(self):
        """Return the 'first' block.  This is not necessarily the first block in the file."""
        if self.keys():
            return self[self.keys()[0]]

    def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
        if blockcontents is None:
            blockcontents = StarBlock()
        if self.standard == "CIF":
            blockcontents.setmaxnamelength(75)
        if len(blockname)>75:
                 raise StarError , 'Blockname %s is longer than 75 characters' % blockname
        if fix:
            newblockname = re.sub('[  \t]','_',blockname)
        else: newblockname = blockname
        new_lowerbn = newblockname.lower()
        if new_lowerbn in self.lower_keys:
            if self.standard is not None:    #already there
               toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
               if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
                  while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
               elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
                  replace_name = new_lowerbn            
                  while replace_name in self.lower_keys: replace_name = replace_name + '+'
                  self._rekey(new_lowerbn,replace_name)
                  # now continue on to add in the new block
                  if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                      parent = replace_name
               else:
                  raise StarError( "Attempt to replace existing block " + blockname)
            else:
               del self[new_lowerbn]
        self.dictionary.update({new_lowerbn:blockcontents})
        self.lower_keys.add(new_lowerbn)
        if parent is None:
           self.child_table[new_lowerbn]=self.PC(newblockname,None)
           self.visible_keys.append(new_lowerbn)
        else:
           if parent.lower() in self.lower_keys:
              if self.scoping == 'instance':
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
              else:
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
                 self.visible_keys.append(new_lowerbn)
           else:
               print 'Warning:Parent block %s does not exist for child %s' % (parent,newblockname)  
        return new_lowerbn  #in case calling routine wants to know

    def _rekey(self,oldname,newname,block_id=''):
        """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
           does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
        move_block = self[oldname]    #old block
        is_visible = oldname in self.visible_keys
        move_block_info = self.child_table[oldname]    #old info
        move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
        # now rewrite the necessary bits
        self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
        del self[oldname]   #do this after updating child table so we don't delete children
        self.dictionary.update({newname:move_block})
        self.lower_keys.add(newname)
        if block_id == '':
           self.child_table.update({newname:move_block_info})
        else:
           self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
        if is_visible: self.visible_keys += [newname]

    def rename(self,oldname,newname):
        """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
           conformance checks are conducted."""
        realoldname = oldname.lower()
        realnewname = newname.lower()
        if realnewname in self.lower_keys:
            raise StarError,'Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)
        if realoldname not in self.lower_keys:
            raise KeyError,'Cannot find old block %s' % realoldname
        self._rekey(realoldname,realnewname,block_id=newname)
        
    def merge_fast(self,new_bc,parent=None):
        """Do a fast merge"""
        if self.standard is None:
            mode = 'replace' 
        else:
            mode = 'strict'
        overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
        if overlap_flag and mode != 'replace':
            double_keys = self.lower_keys.intersection(new_bc.lower_keys)
            for dup_key in double_keys:
                  our_parent = self.child_table[dup_key].parent
                  their_parent = new_bc.child_table[dup_key].parent
                  if (our_parent is None and their_parent is not None and parent is None) or\
                      parent is not None:  #rename our block
                    start_key = dup_key
                    while start_key in self.lower_keys: start_key = start_key+'+'
                    self._rekey(dup_key,start_key)
                    if parent.lower() == dup_key:  #we just renamed the prospective parent!
                        parent = start_key
                  elif our_parent is not None and their_parent is None and parent is None:
                    start_key = dup_key
                    while start_key in new_bc.lower_keys: start_key = start_key+'+'
                    new_bc._rekey(dup_key,start_key)
                  else: 
                    raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
        self.dictionary.update(new_bc.dictionary) 
        self.lower_keys.update(new_bc.lower_keys)
        self.visible_keys += (list(new_bc.lower_keys))
        self.child_table.update(new_bc.child_table)
        if parent is not None:     #redo the child_table entries
              reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
              reparent_dict = [(a[0],self.PC(a[1],parent.lower())) for a in reparent_list]
              self.child_table.update(dict(reparent_dict))

    def merge(self,new_bc,mode=None,parent=None,single_block=[],
                   idblock="",match_att=[],match_function=None):
        if mode is None:
            if self.standard is None:
               mode = 'replace'
            else:
               mode = 'strict'
        if single_block:
            self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                                   match_att=match_att,
                                                   match_function=match_function)
            return None
        base_keys = [a[1].block_id for a in self.child_table.items()]
        block_to_item = base_keys   #default
        new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
        if match_att:
            #make a blockname -> item name map
            if match_function:
                block_to_item = map(lambda a:match_function(self[a]),self.keys())
            else:
                block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())
            #print `block_to_item`
        for key in new_keys:        #run over incoming blocknames
            if key == idblock: continue    #skip dictionary id
            basekey = key           #default value
            if len(match_att)>0:
               attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
            else:
               attval = 0
            for ii in range(len(block_to_item)):  #do this way to get looped names
                thisatt = block_to_item[ii]       #keyname in old block
                #print "Looking for %s in %s" % (attval,thisatt)
                if attval == thisatt or \
                   (isinstance(thisatt,list) and attval in thisatt):
                      basekey = base_keys.pop(ii)
                      block_to_item.remove(thisatt)
                      break
            if not self.has_key(basekey) or mode=="replace":
                new_parent = new_bc.get_parent(key)
                if parent is not None and new_parent is None:
                   new_parent = parent
                self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
            else:
                if mode=="strict":
                    raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
                elif mode=="overlay":
                    # print "Merging block %s with %s" % (basekey,key)
                    self[basekey].merge(new_bc[key],mode,match_att=match_att)
                else:  
                    raise StarError( "Merge called with unknown mode %s" % mode)
         
    def checknamelengths(self,target_block,maxlength=-1):
        if maxlength < 0:
            return
        else:
            toolong = filter(lambda a:len(a)>maxlength, target_block.keys())
        outstring = ""
        for it in toolong: outstring += "\n" + it
        if toolong:
           raise StarError( 'Following data names too long:' + outstring)

    def get_all(self,item_name):
        raw_values = map(lambda a:self[a].get(item_name),self.keys())
        raw_values = filter(lambda a:a != None, raw_values)
        ret_vals = []
        for rv in raw_values:
            if isinstance(rv,list):
                for rvv in rv:
                    if rvv not in ret_vals: ret_vals.append(rvv)
            else:
                if rv not in ret_vals: ret_vals.append(rv)
        return ret_vals

    def __setattr__(self,attr_name,newval):
        if attr_name == 'scoping':
            if newval not in ('dictionary','instance'):
                raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
            if newval == 'dictionary':
                self.visible_keys = [a for a in self.lower_keys] 
            else:
                #only top-level datablocks visible
                self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
        object.__setattr__(self,attr_name,newval)

    def get_parent(self,blockname):
        """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
        possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
        try:
            first = possibles.next()   #get first one
        except:
            raise StarError('no parent for %s' % blockname)
        try:
           second = possibles.next()
        except StopIteration:
           return first[1].parent
        raise StarError('More than one parent for %s' % blockname)

    def get_roots(self):
        """Get the top-level blocks"""
        return [a for a in self.child_table.items() if a[1].parent==None]

    def get_children(self,blockname,include_parent=False,scoping='dictionary'):
        """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
        True, the parent block will also be included in the block collection as the root."""
        newbc = BlockCollection()
        block_lower = blockname.lower()
        proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
        newbc.child_table = dict(proto_child_table)
        if not include_parent:
           newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        if include_parent:
            newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
            newbc.lower_keys.add(block_lower)
            newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
        newbc.scoping = scoping
        return newbc

    def get_immediate_children(self,parentname):
        """Get the next level of children of the given block as a list, without nested levels"""
        child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
        return child_handles

    def get_child_list(self,parentname):
        """Get a list of all child categories"""
        child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
        return child_handles

    def is_child_of_parent(self,parentname,blockname):
        """Recursively search for children of blockname, case is important for now"""
        checkname = parentname.lower()
        more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
        if blockname.lower() in more_children:
           return True
        else:
           for one_child in more_children:
               if self.is_child_of_parent(one_child,blockname): return True
        return False
           
    def set_parent(self,parentname,childname):
        """Set the parent block"""
        # first check that both blocks exist
        if parentname.lower() not in self.lower_keys:
            raise KeyError('Parent block %s does not exist' % parentname)
        if childname.lower() not in self.lower_keys:
            raise KeyError('Child block %s does not exist' % childname)
        old_entry = self.child_table[childname.lower()]
        self.child_table[childname.lower()]=self.PC(old_entry.block_id,
               parentname.lower())
        self.scoping = self.scoping #reset visibility
            
    def WriteOut(self,comment='',wraplength=80,maxoutlength=2048):
        import cStringIO
        if not comment:
            comment = self.header_comment
        outstring = cStringIO.StringIO()
        outstring.write(comment)
        # loop over top-level
        top_block_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent is None]
        for blockref,blockname in top_block_names:
            outstring.write('\n' + 'data_' +blockname+'\n')
            child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==blockref]
            if self.standard == 'Dic':              #put contents before save frames
              self[blockref].SetOutputLength(wraplength,maxoutlength)
              outstring.write(str(self[blockref]))
            for child_ref,child_name in child_names:
                outstring.write('\n' + 'save_' + child_name + '\n')
                self.block_to_string(child_ref,child_name,outstring,4)    
                outstring.write('\n' + 'save_'+ '\n')   
            if self.standard != 'Dic':              #put contents after save frames
                self[blockref].SetOutputLength(wraplength,maxoutlength)
                outstring.write(str(self[blockref]))
        returnstring =  outstring.getvalue()
        outstring.close()
        return returnstring

    def block_to_string(self,block_ref,block_id,outstring,indentlevel=0):
        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children"""
        child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==block_ref]
        if self.standard == 'Dic':
            outstring.write(str(self[block_ref]))
        for child_ref,child_name in child_names:
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string(child_ref,child_name,outstring,indentlevel)
            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
        if self.standard != 'Dic':
            outstring.write(str(self[block_ref]))
        

class StarFile(BlockCollection):
    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
                scoping='instance',grammar='1.1',scantype='standard',
                **kwargs):
        super(StarFile,self).__init__(datasource=datasource,**kwargs)
        self.my_uri = getattr(datasource,'my_uri','')
        if maxoutlength == 0:
            self.maxoutlength = 2048 
        else:
            self.maxoutlength = maxoutlength
        self.scoping = scoping
        if isinstance(datasource,basestring) or hasattr(datasource,"read"):
            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
            maxlength = maxinlength)
        self.header_comment = \
"""#\\#STAR
##########################################################################
#               STAR Format file 
#               Produced by PySTARRW module
# 
#  This is a STAR file.  STAR is a superset of the CIF file type.  For
#  more information, please refer to International Tables for Crystallography,
#  Volume G, Chapter 2.1
#
##########################################################################
"""
    def set_uri(self,my_uri): self.my_uri = my_uri


from StringIO import StringIO
import math
class CIFStringIO(StringIO):
    def __init__(self,target_width=80,**kwargs):
        StringIO.__init__(self,**kwargs)
        self.currentpos = 0
        self.target_width = target_width
        self.tabwidth = -1
        self.indentlist = [0]

    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,startcol=-1):
        """Write a string with correct linebreak, tabs and indents"""
        # do we need to break?
        if mustbreak:    #insert a new line and indent
            StringIO.write(self,'\n' + ' '*self.indentlist[-1])
            self.currentpos = self.indentlist[-1]
        if self.currentpos+len(outstring)>self.target_width: #try to break
            if canbreak:
                StringIO.write(self,'\n'+' '*self.indentlist[-1])
                self.currentpos = self.indentlist[-1]
        if newindent:           #indent by current amount
            if self.indentlist[-1] == 0:    #first time
                self.indentlist.append(self.currentpos)
                print 'Indentlist: ' + `self.indentlist`
            else:
                self.indentlist.append(self.indentlist[-1]+2)
        elif unindent:
            if len(self.indentlist)>1:
                self.indentlist.pop()
            else:
                print 'Warning: cannot unindent any further'
        #handle tabs
        if self.tabwidth >0 and do_tab:
            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
            if self.currentpos < next_stop:
                StringIO.write(self,(next_stop-self.currentpos)*' ')
                self.currentpos = next_stop
        #now output the string
        StringIO.write(self,outstring)
        last_line_break = outstring.rfind('\n')
        if last_line_break >=0:
            self.currentpos = len(outstring)-last_line_break
        else:
            self.currentpos = self.currentpos + len(outstring)
        
    def set_tab(self,tabwidth):
        """Set the tab stop position"""
        self.tabwidth = tabwidth

class StarError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar Format error: '+ self.value 

class StarLengthError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar length error: ' + self.value

def ReadStar(filename,prepared = StarFile(),maxlength=-1,
             scantype='standard',grammar='1.1',CBF=False):
    import string
    import codecs
    # save desired scoping
    save_scoping = prepared.scoping
    if grammar=="1.1":
        import YappsStarParser_1_1 as Y
    elif grammar=="1.0":
        import YappsStarParser_1_0 as Y
    elif grammar=="DDLm":
        import YappsStarParser_DDLm as Y
    if isinstance(filename,basestring):
        filestream = urlopen(filename)
    else:
        filestream = filename   #already opened for us
    my_uri = ""
    if hasattr(filestream,"geturl"): 
        my_uri = filestream.geturl()
    text = unicode(filestream.read(),"utf8")
    if isinstance(filename,basestring): #we opened it, we close it
        filestream.close()
    if not text:      # empty file, return empty block
        return StarFile().set_uri(my_uri)
    # filter out non-ASCII characters in CBF files if required.  We assume
    # that the binary is enclosed in a fixed string that occurs
    # nowhere else.
    if CBF:
       text_bits  = text.split("-BINARY-FORMAT-SECTION-") 
       text = text_bits[0] 
       for section in range(2,len(text_bits),2):
           text = text+" (binary omitted)"+text_bits[section]
    # we recognise ctrl-Z as end of file
    endoffile = text.find('\x1a')
    if endoffile >= 0: 
        text = text[:endoffile]
    split = string.split(text,'\n')
    if maxlength > 0:
        toolong = filter(lambda a:len(a)>maxlength,split)
        if toolong:
            pos = split.index(toolong[0])
            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    if scantype == 'standard':
            parser = Y.StarParser(Y.StarParserScanner(text))
    else:
            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
    proto_star = None
    try:
        proto_star = getattr(parser,"input")(prepared)
    except Y.yappsrt.SyntaxError,e:
           input = parser._scanner.input
           Y.yappsrt.print_error(input, e, parser._scanner)
    except Y.yappsrt.NoMoreTokens:
           print >>sys.stderr, 'Could not complete parsing; stopped around here:'
           print >>sys.stderr, parser._scanner
    except ValueError:
           pass
    if proto_star == None:
        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
        errorstring = errorstring + '\nParser status: %s' % `parser._scanner`
        raise StarError( errorstring)
    # set visibility correctly
    proto_star.scoping = 'dictionary'
    proto_star.set_uri(my_uri)
    proto_star.scoping = save_scoping
    return proto_star

def get_dim(dataitem,current=0,packlen=0):
    zerotypes = [int, long, float, basestring]
    if type(dataitem) in zerotypes:
        return current, packlen
    if not dataitem.__class__ == ().__class__ and \
       not dataitem.__class__ == [].__class__:
       return current, packlen
    elif len(dataitem)>0: 
    #    print "Get_dim: %d: %s" % (current,`dataitem`)
        return get_dim(dataitem[0],current+1,len(dataitem))
    else: return current+1,0
    
def apply_line_folding(instring,minwraplength=60,maxwraplength=80):
    """Insert line folding characters into instring between min/max wraplength"""
    # first check that we need to do this
    lines = instring.split('\n')
    line_len = [len(l) for l in lines]
    if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None:
        return instring
    outstring = "\\\n"   #header
    for l in lines:
        if len(l) < maxwraplength:
            outstring = outstring + l
            if len(l) > 0 and l[-1]=='\\': #who'da thunk it?  A line ending with a backslash
                    outstring = outstring + "\\\n"  #
            outstring = outstring + "\n"  #  put back the split character
        else:
            current_bit = l
            while len(current_bit) > maxwraplength:
                space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:])
                if space_pos is not None and space_pos.start()[^;\\\n][^\n\\\\]+)(?P\\\\{1,2}[ \t\v\f]*\n)",instring)
    if prefix_match is not None:
        prefix_text = prefix_match.group('prefix')
        print 'Found prefix %s' % prefix_text
        prefix_end = prefix_match.end('folding')
        # keep any line folding instructions
        if prefix_match.group('folding')[:2]=='\\\\':  #two backslashes
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return "\\" + outstring  #keep line folding first line
        else:
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return outstring[1:]   #drop first line ending, no longer necessary
    else:
        return instring
    

def listify(item):
    if isinstance(item,basestring): return [item]
    else: return item

#Transpose the list of lists passed to us
def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = [] 
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl


def check_stringiness(data):
   """Check that the contents of data are all strings"""
   if not hasattr(data,'dtype'):   #so not Numpy
       from numbers import Number
       if isinstance(data,Number): return False
       elif isinstance(data,basestring): return True
       elif data is None:return False  #should be data are None :)
       else:
           for one_item in data:
               if not check_stringiness(one_item): return False
           return True   #all must be strings
   else:   #numerical python
       import numpy
       if data.ndim == 0:    #a bare value
           if data.dtype.kind in ['S','U']: return True
           else: return False
       else:
           for one_item in numpy.nditer(data):
               print 'numpy data: ' + `one_item`
               if not check_stringiness(one_item): return False
           return True


#No documentation flags

Functions

def ReadStar(

filename, prepared=<__pdoc_file_module__.StarFile object at 0x4094b70c>, maxlength=-1, scantype='standard', grammar='1.1', CBF=False)

def ReadStar(filename,prepared = StarFile(),maxlength=-1,
             scantype='standard',grammar='1.1',CBF=False):
    import string
    import codecs
    # save desired scoping
    save_scoping = prepared.scoping
    if grammar=="1.1":
        import YappsStarParser_1_1 as Y
    elif grammar=="1.0":
        import YappsStarParser_1_0 as Y
    elif grammar=="DDLm":
        import YappsStarParser_DDLm as Y
    if isinstance(filename,basestring):
        filestream = urlopen(filename)
    else:
        filestream = filename   #already opened for us
    my_uri = ""
    if hasattr(filestream,"geturl"): 
        my_uri = filestream.geturl()
    text = unicode(filestream.read(),"utf8")
    if isinstance(filename,basestring): #we opened it, we close it
        filestream.close()
    if not text:      # empty file, return empty block
        return StarFile().set_uri(my_uri)
    # filter out non-ASCII characters in CBF files if required.  We assume
    # that the binary is enclosed in a fixed string that occurs
    # nowhere else.
    if CBF:
       text_bits  = text.split("-BINARY-FORMAT-SECTION-") 
       text = text_bits[0] 
       for section in range(2,len(text_bits),2):
           text = text+" (binary omitted)"+text_bits[section]
    # we recognise ctrl-Z as end of file
    endoffile = text.find('\x1a')
    if endoffile >= 0: 
        text = text[:endoffile]
    split = string.split(text,'\n')
    if maxlength > 0:
        toolong = filter(lambda a:len(a)>maxlength,split)
        if toolong:
            pos = split.index(toolong[0])
            raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength))
    if scantype == 'standard':
            parser = Y.StarParser(Y.StarParserScanner(text))
    else:
            parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex'))
    proto_star = None
    try:
        proto_star = getattr(parser,"input")(prepared)
    except Y.yappsrt.SyntaxError,e:
           input = parser._scanner.input
           Y.yappsrt.print_error(input, e, parser._scanner)
    except Y.yappsrt.NoMoreTokens:
           print >>sys.stderr, 'Could not complete parsing; stopped around here:'
           print >>sys.stderr, parser._scanner
    except ValueError:
           pass
    if proto_star == None:
        errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval
        errorstring = errorstring + '\nParser status: %s' % `parser._scanner`
        raise StarError( errorstring)
    # set visibility correctly
    proto_star.scoping = 'dictionary'
    proto_star.set_uri(my_uri)
    proto_star.scoping = save_scoping
    return proto_star

def apply_line_folding(

instring, minwraplength=60, maxwraplength=80)

Insert line folding characters into instring between min/max wraplength

def apply_line_folding(instring,minwraplength=60,maxwraplength=80):
    """Insert line folding characters into instring between min/max wraplength"""
    # first check that we need to do this
    lines = instring.split('\n')
    line_len = [len(l) for l in lines]
    if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None:
        return instring
    outstring = "\\\n"   #header
    for l in lines:
        if len(l) < maxwraplength:
            outstring = outstring + l
            if len(l) > 0 and l[-1]=='\\': #who'da thunk it?  A line ending with a backslash
                    outstring = outstring + "\\\n"  #
            outstring = outstring + "\n"  #  put back the split character
        else:
            current_bit = l
            while len(current_bit) > maxwraplength:
                space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:])
                if space_pos is not None and space_pos.start()

def apply_line_prefix(

instring, prefix)

Prefix every line in instring with prefix

def apply_line_prefix(instring,prefix):
    """Prefix every line in instring with prefix"""
    if prefix[0] != ";" and "\\" not in prefix:
        header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring)
        if header is not None:
            print 'Found line folded string for prefixing...'
            not_header = instring[header.end():]
            outstring = prefix + "\\\\\n" + prefix
        else:
            print 'No folding in input string...'
            not_header = instring
            outstring = prefix + "\\\n" + prefix
        outstring = outstring + not_header.replace("\n","\n"+prefix)
        return outstring
    raise StarError, "Requested prefix starts with semicolon or contains a backslash: " + prefix

def check_stringiness(

data)

Check that the contents of data are all strings

def check_stringiness(data):
   """Check that the contents of data are all strings"""
   if not hasattr(data,'dtype'):   #so not Numpy
       from numbers import Number
       if isinstance(data,Number): return False
       elif isinstance(data,basestring): return True
       elif data is None:return False  #should be data are None :)
       else:
           for one_item in data:
               if not check_stringiness(one_item): return False
           return True   #all must be strings
   else:   #numerical python
       import numpy
       if data.ndim == 0:    #a bare value
           if data.dtype.kind in ['S','U']: return True
           else: return False
       else:
           for one_item in numpy.nditer(data):
               print 'numpy data: ' + `one_item`
               if not check_stringiness(one_item): return False
           return True

def get_dim(

dataitem, current=0, packlen=0)

def get_dim(dataitem,current=0,packlen=0):
    zerotypes = [int, long, float, basestring]
    if type(dataitem) in zerotypes:
        return current, packlen
    if not dataitem.__class__ == ().__class__ and \
       not dataitem.__class__ == [].__class__:
       return current, packlen
    elif len(dataitem)>0: 
    #    print "Get_dim: %d: %s" % (current,`dataitem`)
        return get_dim(dataitem[0],current+1,len(dataitem))
    else: return current+1,0

def listify(

item)

def listify(item):
    if isinstance(item,basestring): return [item]
    else: return item

def remove_line_folding(

instring)

Remove line folding from instring

def remove_line_folding(instring):
    """Remove line folding from instring"""
    if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None:
        return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M)
    else:
        return instring

def remove_line_prefix(

instring)

Remove prefix from every line if present

def remove_line_prefix(instring):
    """Remove prefix from every line if present"""
    prefix_match = re.match("(?P[^;\\\n][^\n\\\\]+)(?P\\\\{1,2}[ \t\v\f]*\n)",instring)
    if prefix_match is not None:
        prefix_text = prefix_match.group('prefix')
        print 'Found prefix %s' % prefix_text
        prefix_end = prefix_match.end('folding')
        # keep any line folding instructions
        if prefix_match.group('folding')[:2]=='\\\\':  #two backslashes
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return "\\" + outstring  #keep line folding first line
        else:
            outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n")
            return outstring[1:]   #drop first line ending, no longer necessary
    else:
        return instring

def transpose(

base_list)

def transpose(base_list):
    new_lofl = []
    full_length = len(base_list)
    opt_range = range(full_length)
    for i in range(len(base_list[0])):
       new_packet = [] 
       for j in opt_range:
          new_packet.append(base_list[j][i])
       new_lofl.append(new_packet)
    return new_lofl

Classes

class BlockCollection

class BlockCollection(object):
    def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
                 characterset='ascii',scoping='instance',parent_id=None,**kwargs):
        import collections
        self.dictionary = {}
        self.standard = standard
        self.lower_keys = set()           # short_cuts
        self.renamed = {}
        self.characterset = characterset
        self.PC = collections.namedtuple('PC',['block_id','parent'])
        self.child_table = {}
        self.visible_keys = []            # for efficiency
        self.parent_id = parent_id
        self.scoping = scoping  #will trigger setting of child table
        self.blocktype = blocktype
        if isinstance(datasource,BlockCollection):
            self.merge_fast(datasource)
            self.scoping = scoping   #reset visibility
        elif isinstance(datasource,dict):
            for key,value in datasource.items():
                 self[key]= value
        self.header_comment = ''
     
    def unlock(self):
        """Allow overwriting of all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite=True

    def lock(self):
        """Disallow overwriting for all blocks in this collection"""
        for a in self.lower_keys:
            self[a].overwrite = False

    def __str__(self):
        return self.WriteOut()

    def __setitem__(self,key,value):
        self.NewBlock(key,value,parent=None)

    def __getitem__(self,key):
        if isinstance(key,basestring):
           lowerkey = key.lower()
           if lowerkey in self.lower_keys:
               return self.dictionary[lowerkey]
           #print 'Visible keys:' + `self.visible_keys`
           #print 'All keys' + `self.lower_keys`
           #print 'Child table' + `self.child_table`
           raise KeyError,'No such item %s' % key

    # we have to get an ordered list of the current keys,
    # as we'll have to delete one of them anyway.
    # Deletion will delete any key regardless of visibility

    def __delitem__(self,key):
        dummy = self[key]   #raise error if not present
        lowerkey = key.lower()
        # get rid of all children recursively as well
        children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey]
        for child in children:
            del self[child]   #recursive call
        del self.dictionary[lowerkey]
        del self.child_table[lowerkey]
        try:
            self.visible_keys.remove(lowerkey)
        except KeyError:
            pass
        self.lower_keys.remove(lowerkey)

    def __len__(self):
        return len(self.visible_keys)

    def __contains__(self,item):
        """Support the 'in' operator"""
        return self.has_key(item)

    # We iterate over all visible
    def __iter__(self):
        for one_block in self.keys():
            yield self[one_block]

    # TODO: handle different case
    def keys(self):
        return self.visible_keys

    # changes to take case independence into account
    def has_key(self,key):
        if not isinstance(key,basestring): return 0
        if key.lower() in self.visible_keys:
            return 1
        return 0

    def get(self,key,default=None):
        if self.has_key(key):     # take account of case
            return self.__getitem__(key)
        else:
            return default

    def clear(self):
        self.dictionary.clear()
        self.lower_keys = set()
        self.child_table = {}
        self.visible_keys = []

    def copy(self):   
        newcopy = self.dictionary.copy()  #all blocks
        newcopy = BlockCollection('',newcopy,parent_id=self.parent_id)
        newcopy.child_table = self.child_table.copy()
        newcopy.lower_keys = self.lower_keys
        newcopy.characterset = self.characterset
        newcopy.scoping = self.scoping  #this sets visible keys
        return newcopy

    def update(self,adict):
        for key in adict.keys():
            self[key] = adict[key]

    def items(self):
        return [(a,self[a]) for a in self.keys()]

    def first_block(self):
        """Return the 'first' block.  This is not necessarily the first block in the file."""
        if self.keys():
            return self[self.keys()[0]]

    def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
        if blockcontents is None:
            blockcontents = StarBlock()
        if self.standard == "CIF":
            blockcontents.setmaxnamelength(75)
        if len(blockname)>75:
                 raise StarError , 'Blockname %s is longer than 75 characters' % blockname
        if fix:
            newblockname = re.sub('[  \t]','_',blockname)
        else: newblockname = blockname
        new_lowerbn = newblockname.lower()
        if new_lowerbn in self.lower_keys:
            if self.standard is not None:    #already there
               toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
               if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
                  while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
               elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
                  replace_name = new_lowerbn            
                  while replace_name in self.lower_keys: replace_name = replace_name + '+'
                  self._rekey(new_lowerbn,replace_name)
                  # now continue on to add in the new block
                  if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                      parent = replace_name
               else:
                  raise StarError( "Attempt to replace existing block " + blockname)
            else:
               del self[new_lowerbn]
        self.dictionary.update({new_lowerbn:blockcontents})
        self.lower_keys.add(new_lowerbn)
        if parent is None:
           self.child_table[new_lowerbn]=self.PC(newblockname,None)
           self.visible_keys.append(new_lowerbn)
        else:
           if parent.lower() in self.lower_keys:
              if self.scoping == 'instance':
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
              else:
                 self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
                 self.visible_keys.append(new_lowerbn)
           else:
               print 'Warning:Parent block %s does not exist for child %s' % (parent,newblockname)  
        return new_lowerbn  #in case calling routine wants to know

    def _rekey(self,oldname,newname,block_id=''):
        """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name
           does not change unless [[block_id]] is given.  Prefer [[rename]] for a safe version."""
        move_block = self[oldname]    #old block
        is_visible = oldname in self.visible_keys
        move_block_info = self.child_table[oldname]    #old info
        move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname]
        # now rewrite the necessary bits
        self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children]))
        del self[oldname]   #do this after updating child table so we don't delete children
        self.dictionary.update({newname:move_block})
        self.lower_keys.add(newname)
        if block_id == '':
           self.child_table.update({newname:move_block_info})
        else:
           self.child_table.update({newname:self.PC(block_id,move_block_info.parent)})
        if is_visible: self.visible_keys += [newname]

    def rename(self,oldname,newname):
        """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
           conformance checks are conducted."""
        realoldname = oldname.lower()
        realnewname = newname.lower()
        if realnewname in self.lower_keys:
            raise StarError,'Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)
        if realoldname not in self.lower_keys:
            raise KeyError,'Cannot find old block %s' % realoldname
        self._rekey(realoldname,realnewname,block_id=newname)
        
    def merge_fast(self,new_bc,parent=None):
        """Do a fast merge"""
        if self.standard is None:
            mode = 'replace' 
        else:
            mode = 'strict'
        overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
        if overlap_flag and mode != 'replace':
            double_keys = self.lower_keys.intersection(new_bc.lower_keys)
            for dup_key in double_keys:
                  our_parent = self.child_table[dup_key].parent
                  their_parent = new_bc.child_table[dup_key].parent
                  if (our_parent is None and their_parent is not None and parent is None) or\
                      parent is not None:  #rename our block
                    start_key = dup_key
                    while start_key in self.lower_keys: start_key = start_key+'+'
                    self._rekey(dup_key,start_key)
                    if parent.lower() == dup_key:  #we just renamed the prospective parent!
                        parent = start_key
                  elif our_parent is not None and their_parent is None and parent is None:
                    start_key = dup_key
                    while start_key in new_bc.lower_keys: start_key = start_key+'+'
                    new_bc._rekey(dup_key,start_key)
                  else: 
                    raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
        self.dictionary.update(new_bc.dictionary) 
        self.lower_keys.update(new_bc.lower_keys)
        self.visible_keys += (list(new_bc.lower_keys))
        self.child_table.update(new_bc.child_table)
        if parent is not None:     #redo the child_table entries
              reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
              reparent_dict = [(a[0],self.PC(a[1],parent.lower())) for a in reparent_list]
              self.child_table.update(dict(reparent_dict))

    def merge(self,new_bc,mode=None,parent=None,single_block=[],
                   idblock="",match_att=[],match_function=None):
        if mode is None:
            if self.standard is None:
               mode = 'replace'
            else:
               mode = 'strict'
        if single_block:
            self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                                   match_att=match_att,
                                                   match_function=match_function)
            return None
        base_keys = [a[1].block_id for a in self.child_table.items()]
        block_to_item = base_keys   #default
        new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
        if match_att:
            #make a blockname -> item name map
            if match_function:
                block_to_item = map(lambda a:match_function(self[a]),self.keys())
            else:
                block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())
            #print `block_to_item`
        for key in new_keys:        #run over incoming blocknames
            if key == idblock: continue    #skip dictionary id
            basekey = key           #default value
            if len(match_att)>0:
               attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
            else:
               attval = 0
            for ii in range(len(block_to_item)):  #do this way to get looped names
                thisatt = block_to_item[ii]       #keyname in old block
                #print "Looking for %s in %s" % (attval,thisatt)
                if attval == thisatt or \
                   (isinstance(thisatt,list) and attval in thisatt):
                      basekey = base_keys.pop(ii)
                      block_to_item.remove(thisatt)
                      break
            if not self.has_key(basekey) or mode=="replace":
                new_parent = new_bc.get_parent(key)
                if parent is not None and new_parent is None:
                   new_parent = parent
                self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
            else:
                if mode=="strict":
                    raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
                elif mode=="overlay":
                    # print "Merging block %s with %s" % (basekey,key)
                    self[basekey].merge(new_bc[key],mode,match_att=match_att)
                else:  
                    raise StarError( "Merge called with unknown mode %s" % mode)
         
    def checknamelengths(self,target_block,maxlength=-1):
        if maxlength < 0:
            return
        else:
            toolong = filter(lambda a:len(a)>maxlength, target_block.keys())
        outstring = ""
        for it in toolong: outstring += "\n" + it
        if toolong:
           raise StarError( 'Following data names too long:' + outstring)

    def get_all(self,item_name):
        raw_values = map(lambda a:self[a].get(item_name),self.keys())
        raw_values = filter(lambda a:a != None, raw_values)
        ret_vals = []
        for rv in raw_values:
            if isinstance(rv,list):
                for rvv in rv:
                    if rvv not in ret_vals: ret_vals.append(rvv)
            else:
                if rv not in ret_vals: ret_vals.append(rv)
        return ret_vals

    def __setattr__(self,attr_name,newval):
        if attr_name == 'scoping':
            if newval not in ('dictionary','instance'):
                raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval)
            if newval == 'dictionary':
                self.visible_keys = [a for a in self.lower_keys] 
            else:
                #only top-level datablocks visible
                self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None]
        object.__setattr__(self,attr_name,newval)

    def get_parent(self,blockname):
        """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
        possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
        try:
            first = possibles.next()   #get first one
        except:
            raise StarError('no parent for %s' % blockname)
        try:
           second = possibles.next()
        except StopIteration:
           return first[1].parent
        raise StarError('More than one parent for %s' % blockname)

    def get_roots(self):
        """Get the top-level blocks"""
        return [a for a in self.child_table.items() if a[1].parent==None]

    def get_children(self,blockname,include_parent=False,scoping='dictionary'):
        """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
        True, the parent block will also be included in the block collection as the root."""
        newbc = BlockCollection()
        block_lower = blockname.lower()
        proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
        newbc.child_table = dict(proto_child_table)
        if not include_parent:
           newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
        newbc.lower_keys = set([a[0] for a in proto_child_table])
        newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
        if include_parent:
            newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
            newbc.lower_keys.add(block_lower)
            newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
        newbc.scoping = scoping
        return newbc

    def get_immediate_children(self,parentname):
        """Get the next level of children of the given block as a list, without nested levels"""
        child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
        return child_handles

    def get_child_list(self,parentname):
        """Get a list of all child categories"""
        child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
        return child_handles

    def is_child_of_parent(self,parentname,blockname):
        """Recursively search for children of blockname, case is important for now"""
        checkname = parentname.lower()
        more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
        if blockname.lower() in more_children:
           return True
        else:
           for one_child in more_children:
               if self.is_child_of_parent(one_child,blockname): return True
        return False
           
    def set_parent(self,parentname,childname):
        """Set the parent block"""
        # first check that both blocks exist
        if parentname.lower() not in self.lower_keys:
            raise KeyError('Parent block %s does not exist' % parentname)
        if childname.lower() not in self.lower_keys:
            raise KeyError('Child block %s does not exist' % childname)
        old_entry = self.child_table[childname.lower()]
        self.child_table[childname.lower()]=self.PC(old_entry.block_id,
               parentname.lower())
        self.scoping = self.scoping #reset visibility
            
    def WriteOut(self,comment='',wraplength=80,maxoutlength=2048):
        import cStringIO
        if not comment:
            comment = self.header_comment
        outstring = cStringIO.StringIO()
        outstring.write(comment)
        # loop over top-level
        top_block_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent is None]
        for blockref,blockname in top_block_names:
            outstring.write('\n' + 'data_' +blockname+'\n')
            child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==blockref]
            if self.standard == 'Dic':              #put contents before save frames
              self[blockref].SetOutputLength(wraplength,maxoutlength)
              outstring.write(str(self[blockref]))
            for child_ref,child_name in child_names:
                outstring.write('\n' + 'save_' + child_name + '\n')
                self.block_to_string(child_ref,child_name,outstring,4)    
                outstring.write('\n' + 'save_'+ '\n')   
            if self.standard != 'Dic':              #put contents after save frames
                self[blockref].SetOutputLength(wraplength,maxoutlength)
                outstring.write(str(self[blockref]))
        returnstring =  outstring.getvalue()
        outstring.close()
        return returnstring

    def block_to_string(self,block_ref,block_id,outstring,indentlevel=0):
        """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children"""
        child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==block_ref]
        if self.standard == 'Dic':
            outstring.write(str(self[block_ref]))
        for child_ref,child_name in child_names:
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string(child_ref,child_name,outstring,indentlevel)
            outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
        if self.standard != 'Dic':
            outstring.write(str(self[block_ref]))

Ancestors (in MRO)

Instance variables

var PC

var blocktype

var characterset

var child_table

var dictionary

var header_comment

var lower_keys

var parent_id

var renamed

var scoping

var standard

var visible_keys

Methods

def __init__(

self, datasource=None, standard='CIF', blocktype=<class '__pdoc_file_module__.StarBlock'>, characterset='ascii', scoping='instance', parent_id=None, **kwargs)

def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock,
             characterset='ascii',scoping='instance',parent_id=None,**kwargs):
    import collections
    self.dictionary = {}
    self.standard = standard
    self.lower_keys = set()           # short_cuts
    self.renamed = {}
    self.characterset = characterset
    self.PC = collections.namedtuple('PC',['block_id','parent'])
    self.child_table = {}
    self.visible_keys = []            # for efficiency
    self.parent_id = parent_id
    self.scoping = scoping  #will trigger setting of child table
    self.blocktype = blocktype
    if isinstance(datasource,BlockCollection):
        self.merge_fast(datasource)
        self.scoping = scoping   #reset visibility
    elif isinstance(datasource,dict):
        for key,value in datasource.items():
             self[key]= value
    self.header_comment = ''

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    if blockcontents is None:
        blockcontents = StarBlock()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError , 'Blockname %s is longer than 75 characters' % blockname
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:
        if self.standard is not None:    #already there
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn            
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print 'Warning:Parent block %s does not exist for child %s' % (parent,newblockname)  
    return new_lowerbn  #in case calling routine wants to know

def WriteOut(

self, comment='', wraplength=80, maxoutlength=2048)

def WriteOut(self,comment='',wraplength=80,maxoutlength=2048):
    import cStringIO
    if not comment:
        comment = self.header_comment
    outstring = cStringIO.StringIO()
    outstring.write(comment)
    # loop over top-level
    top_block_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent is None]
    for blockref,blockname in top_block_names:
        outstring.write('\n' + 'data_' +blockname+'\n')
        child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==blockref]
        if self.standard == 'Dic':              #put contents before save frames
          self[blockref].SetOutputLength(wraplength,maxoutlength)
          outstring.write(str(self[blockref]))
        for child_ref,child_name in child_names:
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string(child_ref,child_name,outstring,4)    
            outstring.write('\n' + 'save_'+ '\n')   
        if self.standard != 'Dic':              #put contents after save frames
            self[blockref].SetOutputLength(wraplength,maxoutlength)
            outstring.write(str(self[blockref]))
    returnstring =  outstring.getvalue()
    outstring.close()
    return returnstring

def block_to_string(

self, block_ref, block_id, outstring, indentlevel=0)

Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children

def block_to_string(self,block_ref,block_id,outstring,indentlevel=0):
    """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children"""
    child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==block_ref]
    if self.standard == 'Dic':
        outstring.write(str(self[block_ref]))
    for child_ref,child_name in child_names:
        outstring.write('\n' + 'save_' + child_name + '\n')
        self.block_to_string(child_ref,child_name,outstring,indentlevel)
        outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
    if self.standard != 'Dic':
        outstring.write(str(self[block_ref]))

def checknamelengths(

self, target_block, maxlength=-1)

def checknamelengths(self,target_block,maxlength=-1):
    if maxlength < 0:
        return
    else:
        toolong = filter(lambda a:len(a)>maxlength, target_block.keys())
    outstring = ""
    for it in toolong: outstring += "\n" + it
    if toolong:
       raise StarError( 'Following data names too long:' + outstring)

def clear(

self)

def clear(self):
    self.dictionary.clear()
    self.lower_keys = set()
    self.child_table = {}
    self.visible_keys = []

def copy(

self)

def copy(self):   
    newcopy = self.dictionary.copy()  #all blocks
    newcopy = BlockCollection('',newcopy,parent_id=self.parent_id)
    newcopy.child_table = self.child_table.copy()
    newcopy.lower_keys = self.lower_keys
    newcopy.characterset = self.characterset
    newcopy.scoping = self.scoping  #this sets visible keys
    return newcopy

def first_block(

self)

Return the 'first' block. This is not necessarily the first block in the file.

def first_block(self):
    """Return the 'first' block.  This is not necessarily the first block in the file."""
    if self.keys():
        return self[self.keys()[0]]

def get(

self, key, default=None)

def get(self,key,default=None):
    if self.has_key(key):     # take account of case
        return self.__getitem__(key)
    else:
        return default

def get_all(

self, item_name)

def get_all(self,item_name):
    raw_values = map(lambda a:self[a].get(item_name),self.keys())
    raw_values = filter(lambda a:a != None, raw_values)
    ret_vals = []
    for rv in raw_values:
        if isinstance(rv,list):
            for rvv in rv:
                if rvv not in ret_vals: ret_vals.append(rvv)
        else:
            if rv not in ret_vals: ret_vals.append(rv)
    return ret_vals

def get_child_list(

self, parentname)

Get a list of all child categories

def get_child_list(self,parentname):
    """Get a list of all child categories"""
    child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
    return child_handles

def get_children(

self, blockname, include_parent=False, scoping='dictionary')

Get all children of [[blockname]] as a block collection. If [[include_parent]] is True, the parent block will also be included in the block collection as the root.

def get_children(self,blockname,include_parent=False,scoping='dictionary'):
    """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
    True, the parent block will also be included in the block collection as the root."""
    newbc = BlockCollection()
    block_lower = blockname.lower()
    proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
    newbc.child_table = dict(proto_child_table)
    if not include_parent:
       newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
    newbc.lower_keys = set([a[0] for a in proto_child_table])
    newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
    if include_parent:
        newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
        newbc.lower_keys.add(block_lower)
        newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
    newbc.scoping = scoping
    return newbc

def get_immediate_children(

self, parentname)

Get the next level of children of the given block as a list, without nested levels

def get_immediate_children(self,parentname):
    """Get the next level of children of the given block as a list, without nested levels"""
    child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
    return child_handles

def get_parent(

self, blockname)

Return the name of the block enclosing [[blockname]] in canonical form (lower case)

def get_parent(self,blockname):
    """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
    possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
    try:
        first = possibles.next()   #get first one
    except:
        raise StarError('no parent for %s' % blockname)
    try:
       second = possibles.next()
    except StopIteration:
       return first[1].parent
    raise StarError('More than one parent for %s' % blockname)

def get_roots(

self)

Get the top-level blocks

def get_roots(self):
    """Get the top-level blocks"""
    return [a for a in self.child_table.items() if a[1].parent==None]

def has_key(

self, key)

def has_key(self,key):
    if not isinstance(key,basestring): return 0
    if key.lower() in self.visible_keys:
        return 1
    return 0

def is_child_of_parent(

self, parentname, blockname)

Recursively search for children of blockname, case is important for now

def is_child_of_parent(self,parentname,blockname):
    """Recursively search for children of blockname, case is important for now"""
    checkname = parentname.lower()
    more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
    if blockname.lower() in more_children:
       return True
    else:
       for one_child in more_children:
           if self.is_child_of_parent(one_child,blockname): return True
    return False

def items(

self)

def items(self):
    return [(a,self[a]) for a in self.keys()]

def keys(

self)

def keys(self):
    return self.visible_keys

def lock(

self)

Disallow overwriting for all blocks in this collection

def lock(self):
    """Disallow overwriting for all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite = False

def merge(

self, new_bc, mode=None, parent=None, single_block=[], idblock='', match_att=[], match_function=None)

def merge(self,new_bc,mode=None,parent=None,single_block=[],
               idblock="",match_att=[],match_function=None):
    if mode is None:
        if self.standard is None:
           mode = 'replace'
        else:
           mode = 'strict'
    if single_block:
        self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                               match_att=match_att,
                                               match_function=match_function)
        return None
    base_keys = [a[1].block_id for a in self.child_table.items()]
    block_to_item = base_keys   #default
    new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
    if match_att:
        #make a blockname -> item name map
        if match_function:
            block_to_item = map(lambda a:match_function(self[a]),self.keys())
        else:
            block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())
        #print `block_to_item`
    for key in new_keys:        #run over incoming blocknames
        if key == idblock: continue    #skip dictionary id
        basekey = key           #default value
        if len(match_att)>0:
           attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
        else:
           attval = 0
        for ii in range(len(block_to_item)):  #do this way to get looped names
            thisatt = block_to_item[ii]       #keyname in old block
            #print "Looking for %s in %s" % (attval,thisatt)
            if attval == thisatt or \
               (isinstance(thisatt,list) and attval in thisatt):
                  basekey = base_keys.pop(ii)
                  block_to_item.remove(thisatt)
                  break
        if not self.has_key(basekey) or mode=="replace":
            new_parent = new_bc.get_parent(key)
            if parent is not None and new_parent is None:
               new_parent = parent
            self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
        else:
            if mode=="strict":
                raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
            elif mode=="overlay":
                # print "Merging block %s with %s" % (basekey,key)
                self[basekey].merge(new_bc[key],mode,match_att=match_att)
            else:  
                raise StarError( "Merge called with unknown mode %s" % mode)

def merge_fast(

self, new_bc, parent=None)

Do a fast merge

def merge_fast(self,new_bc,parent=None):
    """Do a fast merge"""
    if self.standard is None:
        mode = 'replace' 
    else:
        mode = 'strict'
    overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
    if overlap_flag and mode != 'replace':
        double_keys = self.lower_keys.intersection(new_bc.lower_keys)
        for dup_key in double_keys:
              our_parent = self.child_table[dup_key].parent
              their_parent = new_bc.child_table[dup_key].parent
              if (our_parent is None and their_parent is not None and parent is None) or\
                  parent is not None:  #rename our block
                start_key = dup_key
                while start_key in self.lower_keys: start_key = start_key+'+'
                self._rekey(dup_key,start_key)
                if parent.lower() == dup_key:  #we just renamed the prospective parent!
                    parent = start_key
              elif our_parent is not None and their_parent is None and parent is None:
                start_key = dup_key
                while start_key in new_bc.lower_keys: start_key = start_key+'+'
                new_bc._rekey(dup_key,start_key)
              else: 
                raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
    self.dictionary.update(new_bc.dictionary) 
    self.lower_keys.update(new_bc.lower_keys)
    self.visible_keys += (list(new_bc.lower_keys))
    self.child_table.update(new_bc.child_table)
    if parent is not None:     #redo the child_table entries
          reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
          reparent_dict = [(a[0],self.PC(a[1],parent.lower())) for a in reparent_list]
          self.child_table.update(dict(reparent_dict))

def rename(

self, oldname, newname)

Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed. No conformance checks are conducted.

def rename(self,oldname,newname):
    """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
       conformance checks are conducted."""
    realoldname = oldname.lower()
    realnewname = newname.lower()
    if realnewname in self.lower_keys:
        raise StarError,'Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)
    if realoldname not in self.lower_keys:
        raise KeyError,'Cannot find old block %s' % realoldname
    self._rekey(realoldname,realnewname,block_id=newname)

def set_parent(

self, parentname, childname)

Set the parent block

def set_parent(self,parentname,childname):
    """Set the parent block"""
    # first check that both blocks exist
    if parentname.lower() not in self.lower_keys:
        raise KeyError('Parent block %s does not exist' % parentname)
    if childname.lower() not in self.lower_keys:
        raise KeyError('Child block %s does not exist' % childname)
    old_entry = self.child_table[childname.lower()]
    self.child_table[childname.lower()]=self.PC(old_entry.block_id,
           parentname.lower())
    self.scoping = self.scoping #reset visibility

def unlock(

self)

Allow overwriting of all blocks in this collection

def unlock(self):
    """Allow overwriting of all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite=True

def update(

self, adict)

def update(self,adict):
    for key in adict.keys():
        self[key] = adict[key]

class CIFStringIO

class CIFStringIO(StringIO):
    def __init__(self,target_width=80,**kwargs):
        StringIO.__init__(self,**kwargs)
        self.currentpos = 0
        self.target_width = target_width
        self.tabwidth = -1
        self.indentlist = [0]

    def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,startcol=-1):
        """Write a string with correct linebreak, tabs and indents"""
        # do we need to break?
        if mustbreak:    #insert a new line and indent
            StringIO.write(self,'\n' + ' '*self.indentlist[-1])
            self.currentpos = self.indentlist[-1]
        if self.currentpos+len(outstring)>self.target_width: #try to break
            if canbreak:
                StringIO.write(self,'\n'+' '*self.indentlist[-1])
                self.currentpos = self.indentlist[-1]
        if newindent:           #indent by current amount
            if self.indentlist[-1] == 0:    #first time
                self.indentlist.append(self.currentpos)
                print 'Indentlist: ' + `self.indentlist`
            else:
                self.indentlist.append(self.indentlist[-1]+2)
        elif unindent:
            if len(self.indentlist)>1:
                self.indentlist.pop()
            else:
                print 'Warning: cannot unindent any further'
        #handle tabs
        if self.tabwidth >0 and do_tab:
            next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
            #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
            if self.currentpos < next_stop:
                StringIO.write(self,(next_stop-self.currentpos)*' ')
                self.currentpos = next_stop
        #now output the string
        StringIO.write(self,outstring)
        last_line_break = outstring.rfind('\n')
        if last_line_break >=0:
            self.currentpos = len(outstring)-last_line_break
        else:
            self.currentpos = self.currentpos + len(outstring)
        
    def set_tab(self,tabwidth):
        """Set the tab stop position"""
        self.tabwidth = tabwidth

Ancestors (in MRO)

Instance variables

var currentpos

var indentlist

var tabwidth

var target_width

Methods

def __init__(

self, target_width=80, **kwargs)

def __init__(self,target_width=80,**kwargs):
    StringIO.__init__(self,**kwargs)
    self.currentpos = 0
    self.target_width = target_width
    self.tabwidth = -1
    self.indentlist = [0]

def close(

self)

Free the memory buffer.

def close(self):
    """Free the memory buffer.
    """
    if not self.closed:
        self.closed = True
        del self.buf, self.pos

def flush(

self)

Flush the internal buffer

def flush(self):
    """Flush the internal buffer
    """
    _complain_ifclosed(self.closed)

def getvalue(

self)

Retrieve the entire contents of the "file" at any time before the StringIO object's close() method is called.

The StringIO object can accept either Unicode or 8-bit strings, but mixing the two may take some care. If both are used, 8-bit strings that cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause a UnicodeError to be raised when getvalue() is called.

def getvalue(self):
    """
    Retrieve the entire contents of the "file" at any time before
    the StringIO object's close() method is called.
    The StringIO object can accept either Unicode or 8-bit strings,
    but mixing the two may take some care. If both are used, 8-bit
    strings that cannot be interpreted as 7-bit ASCII (that use the
    8th bit) will cause a UnicodeError to be raised when getvalue()
    is called.
    """
    _complain_ifclosed(self.closed)
    if self.buflist:
        self.buf += ''.join(self.buflist)
        self.buflist = []
    return self.buf

def isatty(

self)

Returns False because StringIO objects are not connected to a tty-like device.

def isatty(self):
    """Returns False because StringIO objects are not connected to a
    tty-like device.
    """
    _complain_ifclosed(self.closed)
    return False

def next(

self)

A file object is its own iterator, for example iter(f) returns f (unless f is closed). When a file is used as an iterator, typically in a for loop (for example, for line in f: print line), the next() method is called repeatedly. This method returns the next input line, or raises StopIteration when EOF is hit.

def next(self):
    """A file object is its own iterator, for example iter(f) returns f
    (unless f is closed). When a file is used as an iterator, typically
    in a for loop (for example, for line in f: print line), the next()
    method is called repeatedly. This method returns the next input line,
    or raises StopIteration when EOF is hit.
    """
    _complain_ifclosed(self.closed)
    r = self.readline()
    if not r:
        raise StopIteration
    return r

def read(

self, n=-1)

Read at most size bytes from the file (less if the read hits EOF before obtaining size bytes).

If the size argument is negative or omitted, read all data until EOF is reached. The bytes are returned as a string object. An empty string is returned when EOF is encountered immediately.

def read(self, n = -1):
    """Read at most size bytes from the file
    (less if the read hits EOF before obtaining size bytes).
    If the size argument is negative or omitted, read all data until EOF
    is reached. The bytes are returned as a string object. An empty
    string is returned when EOF is encountered immediately.
    """
    _complain_ifclosed(self.closed)
    if self.buflist:
        self.buf += ''.join(self.buflist)
        self.buflist = []
    if n is None or n < 0:
        newpos = self.len
    else:
        newpos = min(self.pos+n, self.len)
    r = self.buf[self.pos:newpos]
    self.pos = newpos
    return r

def readline(

self, length=None)

Read one entire line from the file.

A trailing newline character is kept in the string (but may be absent when a file ends with an incomplete line). If the size argument is present and non-negative, it is a maximum byte count (including the trailing newline) and an incomplete line may be returned.

An empty string is returned only when EOF is encountered immediately.

Note: Unlike stdio's fgets(), the returned string contains null characters ('\0') if they occurred in the input.

def readline(self, length=None):
    r"""Read one entire line from the file.
    A trailing newline character is kept in the string (but may be absent
    when a file ends with an incomplete line). If the size argument is
    present and non-negative, it is a maximum byte count (including the
    trailing newline) and an incomplete line may be returned.
    An empty string is returned only when EOF is encountered immediately.
    Note: Unlike stdio's fgets(), the returned string contains null
    characters ('\0') if they occurred in the input.
    """
    _complain_ifclosed(self.closed)
    if self.buflist:
        self.buf += ''.join(self.buflist)
        self.buflist = []
    i = self.buf.find('\n', self.pos)
    if i < 0:
        newpos = self.len
    else:
        newpos = i+1
    if length is not None and length >= 0:
        if self.pos + length < newpos:
            newpos = self.pos + length
    r = self.buf[self.pos:newpos]
    self.pos = newpos
    return r

def readlines(

self, sizehint=0)

Read until EOF using readline() and return a list containing the lines thus read.

If the optional sizehint argument is present, instead of reading up to EOF, whole lines totalling approximately sizehint bytes (or more to accommodate a final whole line).

def readlines(self, sizehint = 0):
    """Read until EOF using readline() and return a list containing the
    lines thus read.
    If the optional sizehint argument is present, instead of reading up
    to EOF, whole lines totalling approximately sizehint bytes (or more
    to accommodate a final whole line).
    """
    total = 0
    lines = []
    line = self.readline()
    while line:
        lines.append(line)
        total += len(line)
        if 0 < sizehint <= total:
            break
        line = self.readline()
    return lines

def seek(

self, pos, mode=0)

Set the file's current position.

The mode argument is optional and defaults to 0 (absolute file positioning); other values are 1 (seek relative to the current position) and 2 (seek relative to the file's end).

There is no return value.

def seek(self, pos, mode = 0):
    """Set the file's current position.
    The mode argument is optional and defaults to 0 (absolute file
    positioning); other values are 1 (seek relative to the current
    position) and 2 (seek relative to the file's end).
    There is no return value.
    """
    _complain_ifclosed(self.closed)
    if self.buflist:
        self.buf += ''.join(self.buflist)
        self.buflist = []
    if mode == 1:
        pos += self.pos
    elif mode == 2:
        pos += self.len
    self.pos = max(0, pos)

def set_tab(

self, tabwidth)

Set the tab stop position

def set_tab(self,tabwidth):
    """Set the tab stop position"""
    self.tabwidth = tabwidth

def tell(

self)

Return the file's current position.

def tell(self):
    """Return the file's current position."""
    _complain_ifclosed(self.closed)
    return self.pos

def truncate(

self, size=None)

Truncate the file's size.

If the optional size argument is present, the file is truncated to (at most) that size. The size defaults to the current position. The current file position is not changed unless the position is beyond the new file size.

If the specified size exceeds the file's current size, the file remains unchanged.

def truncate(self, size=None):
    """Truncate the file's size.
    If the optional size argument is present, the file is truncated to
    (at most) that size. The size defaults to the current position.
    The current file position is not changed unless the position
    is beyond the new file size.
    If the specified size exceeds the file's current size, the
    file remains unchanged.
    """
    _complain_ifclosed(self.closed)
    if size is None:
        size = self.pos
    elif size < 0:
        raise IOError(EINVAL, "Negative size not allowed")
    elif size < self.pos:
        self.pos = size
    self.buf = self.getvalue()[:size]
    self.len = size

def write(

self, outstring, canbreak=False, mustbreak=False, do_tab=True, newindent=False, unindent=False, startcol=-1)

Write a string with correct linebreak, tabs and indents

def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False,startcol=-1):
    """Write a string with correct linebreak, tabs and indents"""
    # do we need to break?
    if mustbreak:    #insert a new line and indent
        StringIO.write(self,'\n' + ' '*self.indentlist[-1])
        self.currentpos = self.indentlist[-1]
    if self.currentpos+len(outstring)>self.target_width: #try to break
        if canbreak:
            StringIO.write(self,'\n'+' '*self.indentlist[-1])
            self.currentpos = self.indentlist[-1]
    if newindent:           #indent by current amount
        if self.indentlist[-1] == 0:    #first time
            self.indentlist.append(self.currentpos)
            print 'Indentlist: ' + `self.indentlist`
        else:
            self.indentlist.append(self.indentlist[-1]+2)
    elif unindent:
        if len(self.indentlist)>1:
            self.indentlist.pop()
        else:
            print 'Warning: cannot unindent any further'
    #handle tabs
    if self.tabwidth >0 and do_tab:
        next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth
        #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop)
        if self.currentpos < next_stop:
            StringIO.write(self,(next_stop-self.currentpos)*' ')
            self.currentpos = next_stop
    #now output the string
    StringIO.write(self,outstring)
    last_line_break = outstring.rfind('\n')
    if last_line_break >=0:
        self.currentpos = len(outstring)-last_line_break
    else:
        self.currentpos = self.currentpos + len(outstring)

def writelines(

self, iterable)

Write a sequence of strings to the file. The sequence can be any iterable object producing strings, typically a list of strings. There is no return value.

(The name is intended to match readlines(); writelines() does not add line separators.)

def writelines(self, iterable):
    """Write a sequence of strings to the file. The sequence can be any
    iterable object producing strings, typically a list of strings. There
    is no return value.
    (The name is intended to match readlines(); writelines() does not add
    line separators.)
    """
    write = self.write
    for line in iterable:
        write(line)

class LoopBlock

class LoopBlock(object):
    def __init__(self,parent_block,dataname):
        self.loop_no = parent_block.FindLoop(dataname)
        if self.loop_no < 0:
            raise KeyError, '%s is not in a loop structure' % dataname
        self.parent_block = parent_block

    def keys(self):
        return self.parent_block.loops[self.loop_no]

    def values(self):
        return [self.parent_block[a] for a in self.keys()]

    def items(self):
        return zip(self.keys(),self.values())

    def __getitem__(self,dataname):
        if isinstance(dataname,int):   #a packet request
            return self.GetPacket(dataname)
        if dataname in self.keys():
            return self.parent_block[dataname]
        else:
            raise KeyError, '%s not in loop block' % dataname

    def __setitem__(self,dataname,value):
        self.parent_block[dataname] = value
        self.parent_block.AddLoopName(self.keys()[0],dataname)

    def has_key(self,key):
        return key in self.parent_block.loops[self.loop_no]

    def __iter__(self):
        packet_list = zip(*self.values())
        names = self.keys()
        for p in packet_list:
            r = StarPacket(p)
            for n in range(len(names)):
                setattr(r,names[n].lower(),r[n])
            yield r

    # for compatibility
    def __getattr__(self,attname):
        return getattr(self.parent_block,attname)

    def load_iter(self,coords=[]):
        count = 0        #to create packet index 
        while not self.popout:
            # ok, we have a new packet:  append a list to our subloops
            for aloop in self.loops:
                aloop.new_enclosing_packet()
            for iname in self.item_order:
                if isinstance(iname,LoopBlock):       #into a nested loop
                    for subitems in iname.load_iter(coords=coords+[count]):
                        # print 'Yielding %s' % `subitems`
                        yield subitems
                    # print 'End of internal loop'
                else:
                    if self.dimension == 0:
                        # print 'Yielding %s' % `self[iname]`
                        yield self,self[iname]
                    else:
                        backval = self.block[iname]
                        for i in range(len(coords)):
                           # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                           backval = backval[coords[i]]
                        yield self,backval
            count = count + 1      # count packets
        self.popout = False        # reinitialise
        # print 'Finished iterating'
        yield self,'###Blank###'     #this value should never be used

    # an experimental fast iterator for level-1 loops (ie CIF)
    def fast_load_iter(self):
        targets = map(lambda a:self.block[a],self.item_order)
        while targets:
            for target in targets:
                yield self,target

    # Add another list of the required shape to take into account a new outer packet
    def new_enclosing_packet(self):
        if self.dimension > 1:      #otherwise have a top-level list
            for iname in self.keys():  #includes lower levels
                target_list = self[iname]
                for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 
                    target_list = target_list[-1]
                target_list.append([])
                # print '%s now %s' % (iname,`self[iname]`)

    def recursive_iter(self,dict_so_far={},coord=[]):
        # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
        my_length = 0
        top_items = self.block.items()
        top_values = self.block.values()       #same order as items
        drill_values = self.block.values()
        for dimup in range(0,self.dimension):  #look higher in the tree
            if len(drill_values)>0:            #this block has values
                drill_values=drill_values[0]   #drill in
            else:
                raise StarError("Malformed loop packet %s" % `top_items[0]`)
        my_length = len(drill_values[0])       #length of 'string' entry
        if self.dimension == 0:                #top level
            for aloop in self.loops:
                for apacket in aloop.recursive_iter():
                    # print "Recursive yielding %s" % `dict(top_items + apacket.items())`
                    prep_yield = StarPacket(top_values+apacket.values())  #straight list
                    for name,value in top_items + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
        else:                                  #in some loop
            for i in range(my_length):
                kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
                kvvals = map(lambda a:a[1],kvpairs)   #just values
                # print "Recursive kvpairs at %d: %s" % (i,`kvpairs`)
                if self.loops:
                  for aloop in self.loops:
                    for apacket in aloop.recursive_iter(coord=coord+[i]):
                        # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`
                        prep_yield = StarPacket(kvvals+apacket.values())
                        for name,value in kvpairs + apacket.items():
                            setattr(prep_yield,name,value)
                        yield prep_yield
                else:           # we're at the bottom of the tree
                    # print "Recursive yielding %s" % `dict(kvpairs)`
                    prep_yield = StarPacket(kvvals)
                    for name,value in kvpairs:
                        setattr(prep_yield,name,value)
                    yield prep_yield

    # small function to use the coordinates. 
    def coord_to_group(self,dataname,coords):
          if not isinstance(dataname,basestring):
             return dataname     # flag inner loop processing
          newm = self[dataname]          # newm must be a list or tuple
          for c in coords:
              # print "Coord_to_group: %s ->" % (`newm`),
              newm = newm[c]
              # print `newm`
          return newm 

    def flat_iterator(self):
        if self.dimension == 0:   
            yield copy.copy(self)
        else:
            my_length = 0
            top_keys = self.block.keys()
            if len(top_keys)>0:
                my_length = len(self.block[top_keys[0]])
            for pack_no in range(my_length):
                yield(self.collapse(pack_no))
            

    def RemoveItem(self,itemname):
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if self.has_key(testkey):
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)
     
    def RemoveLoopItem(self,itemname):
        self.RemoveItem(itemname)

    def GetLoop(self,keyname):
        """Return a LoopBlock object containing keyname"""
        return LoopBlock(self,keyname)

    def GetPacket(self,index):
        thispack = StarPacket([])
        for myitem in self.parent_block.loops[self.loop_no]:
            thispack.append(self[myitem][index])
            setattr(thispack,myitem,thispack[-1])
        return thispack 

    def AddPacket(self,packet):
        for myitem in self.parent_block.loops[self.loop_no]:
            old_values = self.parent_block[myitem]
            old_values.append(packet.__getattribute__(myitem))
            self.parent_block[myitem] = old_values
        
    def GetItemOrder(self):
        return self.parent_block.loops[self.loop_no][:]


    def GetItemOrder(self):
        return self.parent_block.loops[self.loop_no][:]

    def ChangeItemOrder(self,itemname,newpos):
        self.parent_block.loops[self.loop_no].remove(itemname.lower())
        self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

    def GetItemPosition(self,itemname):
        import string
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not self.has_key(itemname):
            raise ValueError, 'No such dataname %s' % itemname
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try: 
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError, 'Item does not exist'

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with [[keyname]]"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError, '%s is not in any loop' % keyname

    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue 
        
    def AddToLoop(self,dataname,loopdata):
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
               % (`bad_vals`,loop_len)
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

Ancestors (in MRO)

Instance variables

var loop_no

var parent_block

Methods

def __init__(

self, parent_block, dataname)

def __init__(self,parent_block,dataname):
    self.loop_no = parent_block.FindLoop(dataname)
    if self.loop_no < 0:
        raise KeyError, '%s is not in a loop structure' % dataname
    self.parent_block = parent_block

def AddPacket(

self, packet)

def AddPacket(self,packet):
    for myitem in self.parent_block.loops[self.loop_no]:
        old_values = self.parent_block[myitem]
        old_values.append(packet.__getattribute__(myitem))
        self.parent_block[myitem] = old_values

def AddToLoop(

self, dataname, loopdata)

def AddToLoop(self,dataname,loopdata):
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
           % (`bad_vals`,loop_len)
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

def ChangeItemOrder(self,itemname,newpos):
    self.parent_block.loops[self.loop_no].remove(itemname.lower())
    self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower())

def GetItemOrder(

self)

def GetItemOrder(self):
    return self.parent_block.loops[self.loop_no][:]

def GetItemPosition(

self, itemname)

def GetItemPosition(self,itemname):
    import string
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not self.has_key(itemname):
        raise ValueError, 'No such dataname %s' % itemname
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetLoop(

self, keyname)

Return a LoopBlock object containing keyname

def GetLoop(self,keyname):
    """Return a LoopBlock object containing keyname"""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with [[keyname]]

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with [[keyname]]"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError, '%s is not in any loop' % keyname

def GetPacket(

self, index)

def GetPacket(self,index):
    thispack = StarPacket([])
    for myitem in self.parent_block.loops[self.loop_no]:
        thispack.append(self[myitem][index])
        setattr(thispack,myitem,thispack[-1])
    return thispack 

def RemoveItem(

self, itemname)

def RemoveItem(self,itemname):
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if self.has_key(testkey):
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveLoopItem(

self, itemname)

def RemoveLoopItem(self,itemname):
    self.RemoveItem(itemname)

def coord_to_group(

self, dataname, coords)

def coord_to_group(self,dataname,coords):
      if not isinstance(dataname,basestring):
         return dataname     # flag inner loop processing
      newm = self[dataname]          # newm must be a list or tuple
      for c in coords:
          # print "Coord_to_group: %s ->" % (`newm`),
          newm = newm[c]
          # print `newm`
      return newm 

def fast_load_iter(

self)

def fast_load_iter(self):
    targets = map(lambda a:self.block[a],self.item_order)
    while targets:
        for target in targets:
            yield self,target

def flat_iterator(

self)

def flat_iterator(self):
    if self.dimension == 0:   
        yield copy.copy(self)
    else:
        my_length = 0
        top_keys = self.block.keys()
        if len(top_keys)>0:
            my_length = len(self.block[top_keys[0]])
        for pack_no in range(my_length):
            yield(self.collapse(pack_no))

def has_key(

self, key)

def has_key(self,key):
    return key in self.parent_block.loops[self.loop_no]

def items(

self)

def items(self):
    return zip(self.keys(),self.values())

def keys(

self)

def keys(self):
    return self.parent_block.loops[self.loop_no]

def load_iter(

self, coords=[])

def load_iter(self,coords=[]):
    count = 0        #to create packet index 
    while not self.popout:
        # ok, we have a new packet:  append a list to our subloops
        for aloop in self.loops:
            aloop.new_enclosing_packet()
        for iname in self.item_order:
            if isinstance(iname,LoopBlock):       #into a nested loop
                for subitems in iname.load_iter(coords=coords+[count]):
                    # print 'Yielding %s' % `subitems`
                    yield subitems
                # print 'End of internal loop'
            else:
                if self.dimension == 0:
                    # print 'Yielding %s' % `self[iname]`
                    yield self,self[iname]
                else:
                    backval = self.block[iname]
                    for i in range(len(coords)):
                       # print 'backval, coords: %s, %s' % (`backval`,`coords`)
                       backval = backval[coords[i]]
                    yield self,backval
        count = count + 1      # count packets
    self.popout = False        # reinitialise
    # print 'Finished iterating'
    yield self,'###Blank###'     #this value should never be used

def new_enclosing_packet(

self)

def new_enclosing_packet(self):
    if self.dimension > 1:      #otherwise have a top-level list
        for iname in self.keys():  #includes lower levels
            target_list = self[iname]
            for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... 
                target_list = target_list[-1]
            target_list.append([])

def recursive_iter(

self, dict_so_far={}, coord=[])

def recursive_iter(self,dict_so_far={},coord=[]):
    # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension)
    my_length = 0
    top_items = self.block.items()
    top_values = self.block.values()       #same order as items
    drill_values = self.block.values()
    for dimup in range(0,self.dimension):  #look higher in the tree
        if len(drill_values)>0:            #this block has values
            drill_values=drill_values[0]   #drill in
        else:
            raise StarError("Malformed loop packet %s" % `top_items[0]`)
    my_length = len(drill_values[0])       #length of 'string' entry
    if self.dimension == 0:                #top level
        for aloop in self.loops:
            for apacket in aloop.recursive_iter():
                # print "Recursive yielding %s" % `dict(top_items + apacket.items())`
                prep_yield = StarPacket(top_values+apacket.values())  #straight list
                for name,value in top_items + apacket.items():
                    setattr(prep_yield,name,value)
                yield prep_yield
    else:                                  #in some loop
        for i in range(my_length):
            kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys())
            kvvals = map(lambda a:a[1],kvpairs)   #just values
            # print "Recursive kvpairs at %d: %s" % (i,`kvpairs`)
            if self.loops:
              for aloop in self.loops:
                for apacket in aloop.recursive_iter(coord=coord+[i]):
                    # print "Recursive yielding %s" % `dict(kvpairs + apacket.items())`
                    prep_yield = StarPacket(kvvals+apacket.values())
                    for name,value in kvpairs + apacket.items():
                        setattr(prep_yield,name,value)
                    yield prep_yield
            else:           # we're at the bottom of the tree
                # print "Recursive yielding %s" % `dict(kvpairs)`
                prep_yield = StarPacket(kvvals)
                for name,value in kvpairs:
                    setattr(prep_yield,name,value)
                yield prep_yield

def values(

self)

def values(self):
    return [self.parent_block[a] for a in self.keys()]

class StarBlock

class StarBlock(object):
    def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
                 characterset='ascii',maxnamelength=-1):
        self.block = {}    #the actual data storage (lower case keys)
        self.loops = {}    #each loop is indexed by a number and contains a list of datanames
        self.item_order = []  #lower case, loops referenced by integer
        self.formatting_hints = {}
        self.true_case = {} #transform lower case to supplied case
        self.provide_value = False   #prefer string version always
        self.dictionary = None      #DDLm dictionary
        self.popout = False         #used during load iteration
        self.curitem = -1           #used during iteration
        self.maxoutlength = maxoutlength
        self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
        self.wraplength = wraplength
        self.overwrite = overwrite
        self.characterset = characterset
        if self.characterset == 'ascii':
            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
        elif self.characterset == 'unicode':
            self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
        else:
            raise StarError("No character set specified")
        if isinstance(data,(tuple,list)):
            for item in data:
                self.AddLoopItem(item)
        elif isinstance(data,StarBlock):
            self.block = data.block.copy() 
            self.item_order = data.item_order[:]
            self.true_case = data.true_case.copy()
            # loops as well
            self.loops = data.loops.copy()

    def setmaxnamelength(self,maxlength):
        """Set the maximum allowable dataname length (-1 for no check)"""
        self.maxnamelength = maxlength
        if maxlength > 0:
            bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
            if len(bad_names)>0:
                raise StarError, 'Datanames too long: ' + `bad_names`

    def __str__(self):
        return self.printsection()

    def __setitem__(self,key,value):
        if key == "saves":
            raise StarError("""Setting the saves key is deprecated. Add the save block to
    an enclosing block collection (e.g. CIF or STAR file) with this block as child""")
        self.AddItem(key,value)

    def __getitem__(self,key):
        if key == "saves":
            raise StarError("""The saves key is deprecated. Access the save block from
    the enclosing block collection (e.g. CIF or STAR file object)""") 
        try:
           rawitem,is_value = self.GetFullItemValue(key)
        except KeyError:
           if self.dictionary:
               # send the dictionary the required key and a pointer to us
               new_value = self.dictionary.derive_item(key,self,store_value=True)
               print 'Set %s to derived value %s' % (key, `new_value`)
               return new_value
           else:
               raise KeyError, 'No such item: %s' % key
        # we now have an item, we can try to convert it to a number if that is appropriate
        # note numpy values are never stored but are converted to lists
        if not self.dictionary or not self.dictionary.has_key(key): return rawitem
        print '%s: is_value %s provide_value %s value %s' % (key,`is_value`,`self.provide_value`,`rawitem`)
        if is_value:
            if self.provide_value: return rawitem
            else:
               print 'Turning %s into string' % `rawitem`
               return self.convert_to_string(key)
        else:    # a string
            if self.provide_value and rawitem != '?' and rawitem != ".":
                return self.dictionary.change_type(key,rawitem)
            return rawitem   #can't do anything
           
    def __delitem__(self,key):
        self.RemoveItem(key)

    def __len__(self):
        blen = len(self.block)
        return blen    

    def __nonzero__(self):
        if self.__len__() > 0: return 1
        return 0

    # keys returns all internal keys
    def keys(self):
        return self.block.keys()    #always lower case

    def values(self):
        return [self[a] for a in self.keys()]

    def items(self):
        return [a for a in zip(self.keys(),self.values())]

    def has_key(self,key):
        if isinstance(key,basestring) and key.lower() in self.keys():
            return 1
        return 0

    def get(self,key,default=None):
        if self.has_key(key):
            retval = self.__getitem__(key)
        else:
            retval = default
        return retval

    def clear(self):
        self.block = {}
        self.loops = {}
        self.item_order = []
        self.true_case = {}

    # doesn't appear to work
    def copy(self):
        newcopy = StarBlock()
        newcopy.block = self.block.copy()
        newcopy.loops = []
        newcopy.item_order = self.item_order[:]
        newcopy.true_case = self.true_case.copy()
        newcopy.loops = self.loops.copy()
    #    return self.copy.im_class(newcopy)   #catch inheritance
        return newcopy
     
    def update(self,adict):
        for key in adict.keys():
            self.AddItem(key,adict[key])

    def GetItemPosition(self,itemname):
        import string
        if isinstance(itemname,int):
            # return loop position
            return (-1, self.item_order.index(itemname))
        if not self.has_key(itemname):
            raise ValueError, 'No such dataname %s' % itemname
        testname = itemname.lower()
        if testname in self.item_order:
            return (-1,self.item_order.index(testname))
        loop_no = self.FindLoop(testname)
        loop_pos = self.loops[loop_no].index(testname)
        return loop_no,loop_pos

    def ChangeItemOrder(self,itemname,newpos):
        """Move itemname to newpos"""
        if isinstance(itemname,basestring):
            true_name = itemname.lower()
        else:
            true_name = itemname
        self.item_order.remove(true_name)
        self.item_order.insert(newpos,true_name)

    def GetItemOrder(self):
        return self.item_order[:]

    def AddItem(self,key,value,precheck=False):
        if not isinstance(key,basestring):
             raise TypeError, 'Star datanames are strings only (got %s)' % `key`
        if not precheck:
             self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters   
        # check for overwriting
        if self.has_key(key):
             if not self.overwrite:
                 raise StarError( 'Attempt to insert duplicate item name %s' % key)
        # put the data in the right container
        regval,empty_val = self.regularise_data(value)
        # check for pure string data
        pure_string = check_stringiness(regval)
        if not precheck:
            self.check_item_value(regval)
        # update ancillary information first
        lower_key = key.lower()
        if not self.has_key(lower_key) and self.FindLoop(lower_key)<0:      #need to add to order
            self.item_order.append(lower_key)
        # always remove from our case table in case the case is different
        else:
            del self.true_case[lower_key]
        self.true_case[lower_key] = key
        if pure_string:
            self.block.update({lower_key:[regval,empty_val]})
        else:
            self.block.update({lower_key:[empty_val,regval]})  
            
    def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
        # print "Received data %s" % `incomingdata`
        # we accept tuples, strings, lists and dicts!!
        # Direct insertion: we have a string-valued key, with an array
        # of values -> single-item into our loop
        if isinstance(incomingdata[0],(tuple,list)):
           # a whole loop
           keyvallist = zip(incomingdata[0],incomingdata[1])
           for key,value in keyvallist:
               self.AddItem(key,value)
           self.CreateLoop(incomingdata[0])
        elif not isinstance(incomingdata[0],basestring):
             raise TypeError, 'Star datanames are strings only (got %s)' % `incomingdata[0]`
        else:
            self.AddItem(incomingdata[0],incomingdata[1])

    def check_data_name(self,dataname,maxlength=-1): 
        if maxlength > 0:
            self.check_name_length(dataname,maxlength)
        if dataname[0]!='_':
            raise StarError( 'Dataname ' + dataname + ' does not begin with _')
        if self.characterset=='ascii':
            if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
        else:
            # print 'Checking %s for unicode characterset conformance' % dataname
            if len (filter (lambda a: ord(a) < 33, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
            if len (filter (lambda a: ord(a) > 126 and ord(a) < 160, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
            if len (filter (lambda a: ord(a) > 0xD7FF and ord(a) < 0xE000, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
            if len (filter (lambda a: ord(a) > 0xFDCF and ord(a) < 0xFDF0, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
            if len (filter (lambda a: ord(a) == 0xFFFE or ord(a) == 0xFFFF, dataname)) > 0:
                raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
            if len (filter (lambda a: ord(a) > 0x10000 and (ord(a) & 0xE == 0xE) , dataname)) > 0:
                print '%s fails' % dataname
                for a in dataname: print '%x' % ord(a),
                print
                raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

    def check_name_length(self,dataname,maxlength):
        if len(dataname)>maxlength:
            raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
        return

    def check_item_value(self,item):
        test_item = item
        if not isinstance(item,(list,dict,tuple)):
           test_item = [item]         #single item list
        def check_one (it):
            if isinstance(it,basestring):
                if it=='': return
                me = self.char_check.match(it)            
                if not me:
                    print "Fail value check: %s" % it
                    raise StarError, u'Bad character in %s' % it
                else:
                    if me.span() != (0,len(it)):
                        print "Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],`it`)
                        raise StarError,u'Data item "' + `it` +  u'"... contains forbidden characters'
        map(check_one,test_item)

    def regularise_data(self,dataitem):
        """Place dataitem into a list if necessary"""
        from numbers import Number
        if isinstance(dataitem,(Number,basestring,StarList,StarDict)):
            return dataitem,None
        if isinstance(dataitem,(tuple,list)):
            return dataitem,[None]*len(dataitem)
        # so try to make into a list
        try:
            regval = list(dataitem)
        except TypeError, value:
            raise StarError( str(dataitem) + ' is wrong type for data value\n' )
        return regval,[None]*len(regval)
        
    def RemoveItem(self,itemname):
        # first check any loops
        loop_no = self.FindLoop(itemname)
        testkey = itemname.lower()
        if self.has_key(testkey):
            del self.block[testkey]
            del self.true_case[testkey]
            # now remove from loop
            if loop_no >= 0:
                self.loops[loop_no].remove(testkey)
                if len(self.loops[loop_no])==0:
                    del self.loops[loop_no]
                    self.item_order.remove(loop_no)
            else:  #will appear in order list
                self.item_order.remove(testkey)
     
    def RemoveLoopItem(self,itemname):
        self.RemoveItem(itemname)

    def GetItemValue(self,itemname):
        """Return value of itemname"""
        return self.GetFullItemValue(itemname)[0]

    def GetFullItemValue(self,itemname):
        """Return value of itemname and whether or not it is a native value"""
        try:
            s,v = self.block[itemname.lower()]
        except KeyError:
            raise KeyError, 'Itemname %s not in datablock' % itemname
        # prefer string value unless all are None
        # are we a looped value?
        if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
            if s is not None or (isinstance(s,StarList) and None not in s): 
                return s,False    #a string value
            else:
                return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
        elif None not in s: 
            return s,False     #a list of string values
        else: 
            if len(v)>0:   
                return v,not isinstance(v[0],StarList)
            return v,True

    def CreateLoop(self,datanames,order=-1):
           """[[datanames]] is a list of datanames that together form a loop.  They should
           all contain the same number of elements (possibly 0). If [[order]] is given, the loop will
           appear at this position in the block. A loop counts as a single position."""
           # check lengths: these datanames should exist
           listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
           if len(listed_values) == len(datanames):
               len_set = set([len(self[a]) for a in datanames])
               if len(len_set)>1:
                   raise ValueError, 'Request to loop datanames %s with different lengths: %s' % (`datanames`,`len_set`)
           elif len(listed_values) != 0:
               raise ValueError, 'Request to loop datanames where some are single values and some are not'
           if len(self.loops)>0:
               loopno = max(self.loops.keys()) + 1
           else:
               loopno = 1
           # store as lower case
           lc_datanames = [d.lower() for d in datanames]
           # remove these datanames from all other loops
           [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
           self.loops[loopno] = list(lc_datanames)
           if order >= 0:
               self.item_order.insert(order,loopno)
           else:
               self.item_order.append(loopno)
           # remove these datanames from item ordering
           self.item_order = [a for a in self.item_order if a not in lc_datanames] 
          
    def AddLoopName(self,oldname, newname):
        """Add [[newname]] to the loop containing [[oldname]]"""
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError, '%s not in loop' % oldname
        if newname in self.loops[loop_no]:
            return
        # check length
        loop_len = len(self[oldname])
        if len(self[newname]) != loop_len:
            raise ValueError, 'Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)
        # remove from any other loops
        [self.loops[a].remove(newname) for a in self.loops if newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(newname)
        
    def FindLoop(self,keyname):
        """Find the loop that contains keyname and return its numerical index,-1 if not present"""
        loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
        if len(loop_no)>0:
            return loop_no[0]
        else:
            return -1

    def GetLoop(self,keyname):
        """Return a LoopBlock object containing keyname"""
        return LoopBlock(self,keyname)

    def GetLoopNames(self,keyname):
        if keyname in self:
            return self.keys()
        for aloop in self.loops:
            try: 
                return aloop.GetLoopNames(keyname)
            except KeyError:
                pass
        raise KeyError, 'Item does not exist'

    def GetLoopNames(self,keyname):
        """Return all datanames appearing together with [[keyname]]"""
        loop_no = self.FindLoop(keyname)
        if loop_no >= 0:
            return self.loops[loop_no]
        else:
            raise KeyError, '%s is not in any loop' % keyname

    def AddLoopName(self,oldname, newname):
        """Add [[newname]] to the loop containing [[oldname]]"""
        loop_no = self.FindLoop(oldname)
        if loop_no < 0:
            raise KeyError, '%s not in loop' % oldname
        if newname in self.loops[loop_no]:
            return
        # check length
        loop_len = len(self[oldname])
        if len(self[newname]) != loop_len:
            raise ValueError, 'Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)
        # remove from any other loops
        [self.loops[a].remove(newname) for a in self.loops if newname in self.loops[a]]
        # and add to this loop
        self.loops[loop_no].append(newname)
        
    def AddToLoop(self,dataname,loopdata):
        thisloop = self.GetLoop(dataname)
        for itemname,itemvalue in loopdata.items():
            thisloop[itemname] = itemvalue 
        
    def AddToLoop(self,dataname,loopdata):
        # check lengths
        thisloop = self.FindLoop(dataname)
        loop_len = len(self[dataname])
        bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
        if len(bad_vals)>0:
           raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
               % (`bad_vals`,loop_len)
        self.update(loopdata)
        self.loops[thisloop]+=loopdata.keys()

    def Loopify(self,datanamelist):
        self.CreateLoop(datanamelist)

    def RemoveKeyedPacket(self,keyname,keyvalue):
        packet_coord = list(self[keyname]).index(keyvalue)
        loopnames = self.GetLoopNames(keyname)
        for dataname in loopnames:
            self.block[dataname][0] = list(self.block[dataname][0])
            del self.block[dataname][0][packet_coord]
            self.block[dataname][1] = list(self.block[dataname][1])
            del self.block[dataname][1][packet_coord]

    def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
        """Return the loop packet where [[keyname]] has value [[keyvalue]]. Ignore case if no_case is true"""
        #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
        my_loop = self.GetLoop(keyname)
        if no_case:
           one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
        else:
           one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
        if len(one_pack)!=1:
            raise ValueError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))
        #print "Keyed packet: %s" % one_pack[0]
        return one_pack[0]

    def GetKeyedSemanticPacket(self,keyvalue,cat_id):
        """Return a complete packet for category cat_id"""
        target_keys = self.dictionary.cat_key_table[cat_id]
        p = StarPacket()
        # set case-sensitivity flag
        lcase = False
        if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
            lcase = True
        for cat_key in target_keys:
            try:
                extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
            except KeyError:        #try to create the key
                key_vals = self[cat_key]   #will create a key column
            p.merge_packet(self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase))
        # the following attributes used to calculate missing values
        p.key = target_keys[0]
        p.cif_dictionary = self.dictionary
        p.fulldata = self
        return p


    def SetOutputLength(self,wraplength=80,maxoutlength=2048):
        if wraplength > maxoutlength:
            raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
        self.wraplength = wraplength
        self.maxoutlength = maxoutlength

    def printsection(self,instring='',ordering=[],blockstart="",blockend="",indent=0):
        import string
        # first make an ordering
        self.create_ordering(ordering)
        # now do it...
        if not instring:
            outstring = CIFStringIO(target_width=80)       # the returned string
        else:
            outstring = instring
        # print loop delimiter
        outstring.write(blockstart,canbreak=True)
        while len(self.output_order)>0:
           #print "Remaining to output " + `self.output_order`
           itemname = self.output_order.pop(0)
           item_spec = [i for i in ordering if i['dataname'].lower()==itemname.lower()]
           if len(item_spec)>0:
               col_pos = item_spec[0].get('column',-1)
           else:
               col_pos = -1
               item_spec = {}
               if not isinstance(itemname,int):  #no loop
                   if col_pos < 0: col_pos = 40
                   outstring.set_tab(col_pos)
                   itemvalue = self[itemname]
                   outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False)
                   outstring.write(' ',canbreak=True,do_tab=False)    #space after itemname
                   self.format_value(itemvalue,outstring,hints=item_spec)
               else:   # we are asked to print a loop block
                    #first make sure we have sensible coords.  Length should be one
                    #less than the current dimension
                    outstring.set_tab(10)       #guess this is OK?
                    outstring.write(' '*indent,mustbreak=True,do_tab=False); outstring.write('loop_\n',do_tab=False)
                    self.format_names(outstring,indent+2,loop_no=itemname)
                    self.format_packets(outstring,indent+2,loop_no=itemname)
        else:
            returnstring = outstring.getvalue()
        outstring.close()
        return returnstring

    def format_names(self,outstring,indent=0,loop_no=-1):
        """Print datanames from [[loop_no]] one per line"""
        temp_order = self.loops[loop_no][:]   #copy
        while len(temp_order)>0:
            itemname = temp_order.pop(0)
            outstring.write(' ' * indent,do_tab=False) 
            outstring.write(self.true_case[itemname],do_tab=False)
            outstring.write("\n",do_tab=False)

    def format_packets(self,outstring,indent=0,loop_no=-1):
       import cStringIO
       import string
       alldata = [self[a] for a in self.loops[loop_no]]
       print 'Alldata: %s' % `alldata`
       packet_data = apply(zip,alldata)
       print 'Packet data: %s' % `packet_data`
       for position in range(len(packet_data)):
           for point in range(len(packet_data[position])):
               datapoint = packet_data[position][point]
               packstring = self.format_packet_item(datapoint,indent,outstring)
           outstring.write("\n",do_tab=False)
               
    def format_packet_item(self,pack_item,indent,outstring):
           # print 'Formatting %s' % `pack_item`
           if isinstance(pack_item,basestring):
               outstring.write(self._formatstring(pack_item)) 
           else: 
               self.format_value(pack_item,outstring)
           outstring.write(' ',canbreak=True,do_tab=False)

    def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,
        lbprotocol=True,pref_protocol=True):
        import string
        if standard == 'CIF2':
            allowed_delimiters = set(['"',"'",";",None,'"""',"'''"])
        else:
            allowed_delimiters = set(['"',"'",";",None])
        if len(instring)==0: allowed_delimiters.difference_update([None])
        if len(instring) > (self.maxoutlength-2) or '\n' in instring:
                allowed_delimiters.intersection_update([";","'''",'"""'])
        if ' ' in instring or '\t' in instring or '\v' in instring or '_' in instring or ',' in instring:
                allowed_delimiters.difference_update([None])
        if '"' in instring: allowed_delimiters.difference_update(['"',None])
        if "'" in instring: allowed_delimiters.difference_update(["'",None])
        out_delimiter = ";"  #default (most conservative)
        if delimiter in allowed_delimiters:
            out_delimiter = delimiter
        elif "'" in allowed_delimiters: out_delimiter = "'"
        elif '"' in allowed_delimiters: out_delimiter = '"'
        if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter
        elif out_delimiter is None: return instring
        # we are left with semicolon strings
        outstring = "\n;"
        # if there are returns in the string, try to work with them
        while 1:
            retin = string.find(instring,'\n')+1
            if retin < self.maxoutlength and retin > 0:      # honour this break
                outstring = outstring + instring[:retin]
                instring = instring[retin:]
            elif len(instring)0:
               self.format_value(itemvalue[0],stringsink)
               for listval in itemvalue[1:]:
                  print 'Formatting %s' % `listval`
                  stringsink.write(', ',do_tab=False)
                  self.format_value(listval,stringsink,compound=True)
           stringsink.write(']',unindent=True)
        elif isinstance(itemvalue,StarDict):
           stringsink.set_tab(0)
           stringsink.write('{',newindent=True,mustbreak=compound)  #start a new line inside
           items = itemvalue.items()
           if len(items)>0:
               stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
               self.format_value(items[0][1],stringsink)
               for key,value in items[1:]:
                   stringsink.write(', ')
                   stringsink.write("'"+key+"'"+":",canbreak=True)
                   self.format_value(value,stringsink)   #never break between key and value
           stringsink.write('}',unindent=True)
        else: 
           stringsink.write(str(itemvalue),canbreak=True)   #numbers

    def process_template(self,template_string):
        """Process a template datafile to formatting instructions"""
        template_as_cif = StarFile(StringIO(template_string),grammar="DDLm").first_block()
        #template_as_lines = template_string.split("\n")
        #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
        #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
        #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
        self.form_hints = []   #ordered array of hint dictionaries
        for item in template_as_cif.item_order:  #order of input
            if not isinstance(item,int):    #not nested
                hint_dict = {"dataname":item}
                # find the line in the file
                start_pos = re.search("(^[ \t]*" + item + "[ \t\n]+)(?P([\S]+)|(^;))",template_string,re.I|re.M)
                if start_pos.group("spec") != None:
                    spec_pos = start_pos.start("spec")-start_pos.start(0)
                    spec_char = template_string[start_pos.start("spec")]
                    if spec_char in '\'";':
                        hint_dict.update({"delimiter":spec_char})
                    if spec_char != ";":   #so we need to work out the column number
                        hint_dict.update({"column":spec_pos})
                print '%s: %s' % (item,`hint_dict`)
                self.form_hints.append(hint_dict)
            else:           #loop block
                testnames = template_as_cif.loops[item]
                total_items = len(template_as_cif.loops[item])
                testname = testnames[0]
                #find the loop spec line in the file
                loop_regex = "(^[ \t]*loop_[ \t\n\r]+" + testname + "([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P(.(?!_loop|_[\S]+))*))" % (total_items - 1)
                loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
                loop_so_far = loop_line.end()
                packet_text = loop_line.group('packet')
                packet_regex = "[ \t]*(?P(?P'([^\n\r\f']*)'+)|(?P\"([^\n\r\"]*)\"+)|(?P[^\s]+))"
                packet_pos = re.finditer(packet_regex,packet_text)
                line_end_pos = re.finditer("^",packet_text,re.M)
                next_end = line_end_pos.next().end()
                last_end = next_end
                for loopname in testnames:
                    hint_dict = {"dataname":loopname}
                    thismatch = packet_pos.next()
                    while thismatch.start('all') > next_end:
                        try: 
                            last_end = next_end
                            next_end = line_end_pos.next().start()
                            print 'next end %d' % next_end
                        except StopIteration:
                            pass 
                    print 'Start %d, last_end %d' % (thismatch.start('all'),last_end)
                    col_pos = thismatch.start('all') - last_end
                    if thismatch.group('none') is None:
                        hint_dict.update({'delimiter':thismatch.groups()[0][0]})
                    hint_dict.update({'column':col_pos})
                    print '%s: %s' % (loopname,`hint_dict`)
                    self.form_hints.append(hint_dict)
        return

    def create_ordering(self,order_dict):
        """Create a canonical ordering that includes loops using our formatting hints dictionary"""
        requested_order = [i['dataname'] for i in order_dict]
        new_order = []
        for item in requested_order:
           if isinstance(item,basestring) and item.lower() in self.item_order:
               new_order.append(item.lower())
           elif self.has_key(item):    #in a loop somewhere
               target_loop = self.FindLoop(item)
               if target_loop not in new_order:
                   new_order.append(target_loop)
        extras = [i for i in self.item_order if i not in new_order]
        self.output_order = new_order + extras
        print 'Final order: ' + `self.output_order`

    def convert_to_string(self,dataname):
        """Convert values held in dataname value fork to string version"""
        v,is_value = self.GetFullItemValue(dataname)
        if not is_value:
            return v
        if check_stringiness(v): return v   #already strings
        # TODO...something else
        return v


    def merge(self,new_block,mode="strict",match_att=[],match_function=None,
                   rel_keys = []):
        if mode == 'strict':
           for key in new_block.keys(): 
               if self.has_key(key) and key not in match_att:
                  raise CifError( "Identical keys %s in strict merge mode" % key)
               elif key not in match_att:           #a new dataname
                   self[key] = new_block[key]
           # we get here if there are no keys in common, so we can now copy
           # the loops and not worry about overlaps
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'replace':
           newkeys = new_block.keys()
           for ma in match_att:
              try:
                   newkeys.remove(ma)        #don't touch the special ones
              except ValueError:
                   pass
           for key in new_block.keys(): 
                  if isinstance(key,basestring):
                      self[key] = new_block[key] 
           # creating the loop will remove items from other loops
           for one_loop in new_block.loops.values():
               self.CreateLoop(one_loop)
           # we have lost case information
           self.true_case.update(new_block.true_case)
        elif mode == 'overlay':
           print 'Overlay mode, current overwrite is %s' % self.overwrite
           raise StarError, 'Overlay block merge mode not implemented'
           save_overwrite = self.overwrite
           self.overwrite = True
           for attribute in new_block.keys():
               if attribute in match_att: continue      #ignore this one
               new_value = new_block[attribute]
               #non-looped items
               if new_block.FindLoop(attribute)<0:     #not looped
                  self[attribute] = new_value 
           my_loops = self.loops.values()
           perfect_overlaps = [a for a in new_block.loops if a in my_loops]
           for po in perfect_overlaps:
              loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
              try:
                  newkeypos = map(lambda a:newkeys.index(a),loop_keys)
                  newkeypos = newkeypos[0]      #one key per loop for now
                  loop_keys = loop_keys[0] 
              except (ValueError,IndexError):
                  newkeypos = []
                  overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
                  new_data = map(lambda a:new_block[a],overlaps) #new packet data
                  packet_data = transpose(overlap_data)
                  new_p_data = transpose(new_data)
                  # remove any packets for which the keys match between old and new; we
                  # make the arbitrary choice that the old data stays
                  if newkeypos:
                      # get matching values in new list
                      print "Old, new data:\n%s\n%s" % (`overlap_data[newkeypos]`,`new_data[newkeypos]`)
                      key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                      # filter out any new data with these key values
                      new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                      if new_p_data:
                          new_data = transpose(new_p_data)
                      else: new_data = []
                  # wipe out the old data and enter the new stuff
                  byebyeloop = self.GetLoop(overlaps[0])
                  # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`)
                  # Note that if, in the original dictionary, overlaps are not
                  # looped, GetLoop will return the block itself.  So we check
                  # for this case...
                  if byebyeloop != self:
                      self.remove_loop(byebyeloop)
                  self.AddLoopItem((overlaps,overlap_data))  #adding old packets
                  for pd in new_p_data:                             #adding new packets
                     if pd not in packet_data:
                        for i in range(len(overlaps)):
                            #don't do this at home; we are appending
                            #to something in place
                            self[overlaps[i]].append(pd[i]) 
           self.overwrite = save_overwrite

    def assign_dictionary(self,dic):
        if not dic.diclang=="DDLm":
            print "Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri
            return
        self.dictionary = dic

    def unassign_dictionary(self):
        """Remove dictionary-dependent behaviour"""
        self.dictionary = None

Ancestors (in MRO)

Instance variables

var block

var characterset

var curitem

var dictionary

var formatting_hints

var item_order

var loops

var maxoutlength

var overwrite

var popout

var provide_value

var true_case

var wraplength

Methods

def __init__(

self, data=(), maxoutlength=2048, wraplength=80, overwrite=True, characterset='ascii', maxnamelength=-1)

def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True,
             characterset='ascii',maxnamelength=-1):
    self.block = {}    #the actual data storage (lower case keys)
    self.loops = {}    #each loop is indexed by a number and contains a list of datanames
    self.item_order = []  #lower case, loops referenced by integer
    self.formatting_hints = {}
    self.true_case = {} #transform lower case to supplied case
    self.provide_value = False   #prefer string version always
    self.dictionary = None      #DDLm dictionary
    self.popout = False         #used during load iteration
    self.curitem = -1           #used during iteration
    self.maxoutlength = maxoutlength
    self.setmaxnamelength(maxnamelength)  #to enforce CIF limit of 75 characters
    self.wraplength = wraplength
    self.overwrite = overwrite
    self.characterset = characterset
    if self.characterset == 'ascii':
        self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
    elif self.characterset == 'unicode':
        self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
    else:
        raise StarError("No character set specified")
    if isinstance(data,(tuple,list)):
        for item in data:
            self.AddLoopItem(item)
    elif isinstance(data,StarBlock):
        self.block = data.block.copy() 
        self.item_order = data.item_order[:]
        self.true_case = data.true_case.copy()
        # loops as well
        self.loops = data.loops.copy()

def AddItem(

self, key, value, precheck=False)

def AddItem(self,key,value,precheck=False):
    if not isinstance(key,basestring):
         raise TypeError, 'Star datanames are strings only (got %s)' % `key`
    if not precheck:
         self.check_data_name(key,self.maxnamelength)    # make sure no nasty characters   
    # check for overwriting
    if self.has_key(key):
         if not self.overwrite:
             raise StarError( 'Attempt to insert duplicate item name %s' % key)
    # put the data in the right container
    regval,empty_val = self.regularise_data(value)
    # check for pure string data
    pure_string = check_stringiness(regval)
    if not precheck:
        self.check_item_value(regval)
    # update ancillary information first
    lower_key = key.lower()
    if not self.has_key(lower_key) and self.FindLoop(lower_key)<0:      #need to add to order
        self.item_order.append(lower_key)
    # always remove from our case table in case the case is different
    else:
        del self.true_case[lower_key]
    self.true_case[lower_key] = key
    if pure_string:
        self.block.update({lower_key:[regval,empty_val]})
    else:
        self.block.update({lower_key:[empty_val,regval]})  

def AddLoopItem(

self, incomingdata, precheck=False, maxlength=-1)

def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1):
    # print "Received data %s" % `incomingdata`
    # we accept tuples, strings, lists and dicts!!
    # Direct insertion: we have a string-valued key, with an array
    # of values -> single-item into our loop
    if isinstance(incomingdata[0],(tuple,list)):
       # a whole loop
       keyvallist = zip(incomingdata[0],incomingdata[1])
       for key,value in keyvallist:
           self.AddItem(key,value)
       self.CreateLoop(incomingdata[0])
    elif not isinstance(incomingdata[0],basestring):
         raise TypeError, 'Star datanames are strings only (got %s)' % `incomingdata[0]`
    else:
        self.AddItem(incomingdata[0],incomingdata[1])

def AddLoopName(

self, oldname, newname)

Add [[newname]] to the loop containing [[oldname]]

def AddLoopName(self,oldname, newname):
    """Add [[newname]] to the loop containing [[oldname]]"""
    loop_no = self.FindLoop(oldname)
    if loop_no < 0:
        raise KeyError, '%s not in loop' % oldname
    if newname in self.loops[loop_no]:
        return
    # check length
    loop_len = len(self[oldname])
    if len(self[newname]) != loop_len:
        raise ValueError, 'Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)
    # remove from any other loops
    [self.loops[a].remove(newname) for a in self.loops if newname in self.loops[a]]
    # and add to this loop
    self.loops[loop_no].append(newname)

def AddToLoop(

self, dataname, loopdata)

def AddToLoop(self,dataname,loopdata):
    # check lengths
    thisloop = self.FindLoop(dataname)
    loop_len = len(self[dataname])
    bad_vals = [a for a in loopdata.items() if len(a[1])!=loop_len]
    if len(bad_vals)>0:
       raise StarLengthError, "Number of values for looped datanames %s not equal to %d" \
           % (`bad_vals`,loop_len)
    self.update(loopdata)
    self.loops[thisloop]+=loopdata.keys()

def ChangeItemOrder(

self, itemname, newpos)

Move itemname to newpos

def ChangeItemOrder(self,itemname,newpos):
    """Move itemname to newpos"""
    if isinstance(itemname,basestring):
        true_name = itemname.lower()
    else:
        true_name = itemname
    self.item_order.remove(true_name)
    self.item_order.insert(newpos,true_name)

def CreateLoop(

self, datanames, order=-1)

[[datanames]] is a list of datanames that together form a loop. They should all contain the same number of elements (possibly 0). If [[order]] is given, the loop will appear at this position in the block. A loop counts as a single position.

def CreateLoop(self,datanames,order=-1):
       """[[datanames]] is a list of datanames that together form a loop.  They should
       all contain the same number of elements (possibly 0). If [[order]] is given, the loop will
       appear at this position in the block. A loop counts as a single position."""
       # check lengths: these datanames should exist
       listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)]
       if len(listed_values) == len(datanames):
           len_set = set([len(self[a]) for a in datanames])
           if len(len_set)>1:
               raise ValueError, 'Request to loop datanames %s with different lengths: %s' % (`datanames`,`len_set`)
       elif len(listed_values) != 0:
           raise ValueError, 'Request to loop datanames where some are single values and some are not'
       if len(self.loops)>0:
           loopno = max(self.loops.keys()) + 1
       else:
           loopno = 1
       # store as lower case
       lc_datanames = [d.lower() for d in datanames]
       # remove these datanames from all other loops
       [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]]
       self.loops[loopno] = list(lc_datanames)
       if order >= 0:
           self.item_order.insert(order,loopno)
       else:
           self.item_order.append(loopno)
       # remove these datanames from item ordering
       self.item_order = [a for a in self.item_order if a not in lc_datanames] 

def FindLoop(

self, keyname)

Find the loop that contains keyname and return its numerical index,-1 if not present

def FindLoop(self,keyname):
    """Find the loop that contains keyname and return its numerical index,-1 if not present"""
    loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]]
    if len(loop_no)>0:
        return loop_no[0]
    else:
        return -1

def GetFullItemValue(

self, itemname)

Return value of itemname and whether or not it is a native value

def GetFullItemValue(self,itemname):
    """Return value of itemname and whether or not it is a native value"""
    try:
        s,v = self.block[itemname.lower()]
    except KeyError:
        raise KeyError, 'Itemname %s not in datablock' % itemname
    # prefer string value unless all are None
    # are we a looped value?
    if not isinstance(s,(tuple,list)) or isinstance(s,StarList):
        if s is not None or (isinstance(s,StarList) and None not in s): 
            return s,False    #a string value
        else:
            return v,not isinstance(v,StarList)  #a StarList is not calculation-ready
    elif None not in s: 
        return s,False     #a list of string values
    else: 
        if len(v)>0:   
            return v,not isinstance(v[0],StarList)
        return v,True

def GetItemOrder(

self)

def GetItemOrder(self):
    return self.item_order[:]

def GetItemPosition(

self, itemname)

def GetItemPosition(self,itemname):
    import string
    if isinstance(itemname,int):
        # return loop position
        return (-1, self.item_order.index(itemname))
    if not self.has_key(itemname):
        raise ValueError, 'No such dataname %s' % itemname
    testname = itemname.lower()
    if testname in self.item_order:
        return (-1,self.item_order.index(testname))
    loop_no = self.FindLoop(testname)
    loop_pos = self.loops[loop_no].index(testname)
    return loop_no,loop_pos

def GetItemValue(

self, itemname)

Return value of itemname

def GetItemValue(self,itemname):
    """Return value of itemname"""
    return self.GetFullItemValue(itemname)[0]

def GetKeyedPacket(

self, keyname, keyvalue, no_case=False)

Return the loop packet where [[keyname]] has value [[keyvalue]]. Ignore case if no_case is true

def GetKeyedPacket(self,keyname,keyvalue,no_case=False):
    """Return the loop packet where [[keyname]] has value [[keyvalue]]. Ignore case if no_case is true"""
    #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname])
    my_loop = self.GetLoop(keyname)
    if no_case:
       one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()]
    else:
       one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue]
    if len(one_pack)!=1:
        raise ValueError, "Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))
    #print "Keyed packet: %s" % one_pack[0]
    return one_pack[0]

def GetKeyedSemanticPacket(

self, keyvalue, cat_id)

Return a complete packet for category cat_id

def GetKeyedSemanticPacket(self,keyvalue,cat_id):
    """Return a complete packet for category cat_id"""
    target_keys = self.dictionary.cat_key_table[cat_id]
    p = StarPacket()
    # set case-sensitivity flag
    lcase = False
    if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']:
        lcase = True
    for cat_key in target_keys:
        try:
            extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase)
        except KeyError:        #try to create the key
            key_vals = self[cat_key]   #will create a key column
        p.merge_packet(self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase))
    # the following attributes used to calculate missing values
    p.key = target_keys[0]
    p.cif_dictionary = self.dictionary
    p.fulldata = self
    return p

def GetLoop(

self, keyname)

Return a LoopBlock object containing keyname

def GetLoop(self,keyname):
    """Return a LoopBlock object containing keyname"""
    return LoopBlock(self,keyname)

def GetLoopNames(

self, keyname)

Return all datanames appearing together with [[keyname]]

def GetLoopNames(self,keyname):
    """Return all datanames appearing together with [[keyname]]"""
    loop_no = self.FindLoop(keyname)
    if loop_no >= 0:
        return self.loops[loop_no]
    else:
        raise KeyError, '%s is not in any loop' % keyname

def Loopify(

self, datanamelist)

def Loopify(self,datanamelist):
    self.CreateLoop(datanamelist)

def RemoveItem(

self, itemname)

def RemoveItem(self,itemname):
    # first check any loops
    loop_no = self.FindLoop(itemname)
    testkey = itemname.lower()
    if self.has_key(testkey):
        del self.block[testkey]
        del self.true_case[testkey]
        # now remove from loop
        if loop_no >= 0:
            self.loops[loop_no].remove(testkey)
            if len(self.loops[loop_no])==0:
                del self.loops[loop_no]
                self.item_order.remove(loop_no)
        else:  #will appear in order list
            self.item_order.remove(testkey)

def RemoveKeyedPacket(

self, keyname, keyvalue)

def RemoveKeyedPacket(self,keyname,keyvalue):
    packet_coord = list(self[keyname]).index(keyvalue)
    loopnames = self.GetLoopNames(keyname)
    for dataname in loopnames:
        self.block[dataname][0] = list(self.block[dataname][0])
        del self.block[dataname][0][packet_coord]
        self.block[dataname][1] = list(self.block[dataname][1])
        del self.block[dataname][1][packet_coord]

def RemoveLoopItem(

self, itemname)

def RemoveLoopItem(self,itemname):
    self.RemoveItem(itemname)

def SetOutputLength(

self, wraplength=80, maxoutlength=2048)

def SetOutputLength(self,wraplength=80,maxoutlength=2048):
    if wraplength > maxoutlength:
        raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength))
    self.wraplength = wraplength
    self.maxoutlength = maxoutlength

def assign_dictionary(

self, dic)

def assign_dictionary(self,dic):
    if not dic.diclang=="DDLm":
        print "Warning: ignoring dictionary %s" % dic.dic_as_cif.my_uri
        return
    self.dictionary = dic

def check_data_name(

self, dataname, maxlength=-1)

def check_data_name(self,dataname,maxlength=-1): 
    if maxlength > 0:
        self.check_name_length(dataname,maxlength)
    if dataname[0]!='_':
        raise StarError( 'Dataname ' + dataname + ' does not begin with _')
    if self.characterset=='ascii':
        if len (filter (lambda a: ord(a) < 33 or ord(a) > 126, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters')
    else:
        # print 'Checking %s for unicode characterset conformance' % dataname
        if len (filter (lambda a: ord(a) < 33, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)')
        if len (filter (lambda a: ord(a) > 126 and ord(a) < 160, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)')
        if len (filter (lambda a: ord(a) > 0xD7FF and ord(a) < 0xE000, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)')
        if len (filter (lambda a: ord(a) > 0xFDCF and ord(a) < 0xFDF0, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)')
        if len (filter (lambda a: ord(a) == 0xFFFE or ord(a) == 0xFFFF, dataname)) > 0:
            raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)')
        if len (filter (lambda a: ord(a) > 0x10000 and (ord(a) & 0xE == 0xE) , dataname)) > 0:
            print '%s fails' % dataname
            for a in dataname: print '%x' % ord(a),
            print
            raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)')

def check_item_value(

self, item)

def check_item_value(self,item):
    test_item = item
    if not isinstance(item,(list,dict,tuple)):
       test_item = [item]         #single item list
    def check_one (it):
        if isinstance(it,basestring):
            if it=='': return
            me = self.char_check.match(it)            
            if not me:
                print "Fail value check: %s" % it
                raise StarError, u'Bad character in %s' % it
            else:
                if me.span() != (0,len(it)):
                    print "Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],`it`)
                    raise StarError,u'Data item "' + `it` +  u'"... contains forbidden characters'
    map(check_one,test_item)

def check_name_length(

self, dataname, maxlength)

def check_name_length(self,dataname,maxlength):
    if len(dataname)>maxlength:
        raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength))
    return

def clear(

self)

def clear(self):
    self.block = {}
    self.loops = {}
    self.item_order = []
    self.true_case = {}

def convert_to_string(

self, dataname)

Convert values held in dataname value fork to string version

def convert_to_string(self,dataname):
    """Convert values held in dataname value fork to string version"""
    v,is_value = self.GetFullItemValue(dataname)
    if not is_value:
        return v
    if check_stringiness(v): return v   #already strings
    # TODO...something else
    return v

def copy(

self)

def copy(self):
    newcopy = StarBlock()
    newcopy.block = self.block.copy()
    newcopy.loops = []
    newcopy.item_order = self.item_order[:]
    newcopy.true_case = self.true_case.copy()
    newcopy.loops = self.loops.copy()
#    return self.copy.im_class(newcopy)   #catch inheritance
    return newcopy

def create_ordering(

self, order_dict)

Create a canonical ordering that includes loops using our formatting hints dictionary

def create_ordering(self,order_dict):
    """Create a canonical ordering that includes loops using our formatting hints dictionary"""
    requested_order = [i['dataname'] for i in order_dict]
    new_order = []
    for item in requested_order:
       if isinstance(item,basestring) and item.lower() in self.item_order:
           new_order.append(item.lower())
       elif self.has_key(item):    #in a loop somewhere
           target_loop = self.FindLoop(item)
           if target_loop not in new_order:
               new_order.append(target_loop)
    extras = [i for i in self.item_order if i not in new_order]
    self.output_order = new_order + extras
    print 'Final order: ' + `self.output_order`

def format_names(

self, outstring, indent=0, loop_no=-1)

Print datanames from [[loop_no]] one per line

def format_names(self,outstring,indent=0,loop_no=-1):
    """Print datanames from [[loop_no]] one per line"""
    temp_order = self.loops[loop_no][:]   #copy
    while len(temp_order)>0:
        itemname = temp_order.pop(0)
        outstring.write(' ' * indent,do_tab=False) 
        outstring.write(self.true_case[itemname],do_tab=False)
        outstring.write("\n",do_tab=False)

def format_packet_item(

self, pack_item, indent, outstring)

def format_packet_item(self,pack_item,indent,outstring):
       # print 'Formatting %s' % `pack_item`
       if isinstance(pack_item,basestring):
           outstring.write(self._formatstring(pack_item)) 
       else: 
           self.format_value(pack_item,outstring)
       outstring.write(' ',canbreak=True,do_tab=False)

def format_packets(

self, outstring, indent=0, loop_no=-1)

def format_packets(self,outstring,indent=0,loop_no=-1):
   import cStringIO
   import string
   alldata = [self[a] for a in self.loops[loop_no]]
   print 'Alldata: %s' % `alldata`
   packet_data = apply(zip,alldata)
   print 'Packet data: %s' % `packet_data`
   for position in range(len(packet_data)):
       for point in range(len(packet_data[position])):
           datapoint = packet_data[position][point]
           packstring = self.format_packet_item(datapoint,indent,outstring)
       outstring.write("\n",do_tab=False)

def format_value(

self, itemvalue, stringsink, compound=False, hints={})

Format a Star data value

def format_value(self,itemvalue,stringsink,compound=False,hints={}):
    """Format a Star data value"""
    delimiter = hints.get('delimiter',None)
    if isinstance(itemvalue,basestring):  #need to sanitize
       stringsink.write(self._formatstring(itemvalue,delimiter=delimiter),canbreak = True)
    elif isinstance(itemvalue,StarList):
       stringsink.set_tab(0)
       stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound)
       if len(itemvalue)>0:
           self.format_value(itemvalue[0],stringsink)
           for listval in itemvalue[1:]:
              print 'Formatting %s' % `listval`
              stringsink.write(', ',do_tab=False)
              self.format_value(listval,stringsink,compound=True)
       stringsink.write(']',unindent=True)
    elif isinstance(itemvalue,StarDict):
       stringsink.set_tab(0)
       stringsink.write('{',newindent=True,mustbreak=compound)  #start a new line inside
       items = itemvalue.items()
       if len(items)>0:
           stringsink.write("'"+items[0][0]+"'"+':',canbreak=True)
           self.format_value(items[0][1],stringsink)
           for key,value in items[1:]:
               stringsink.write(', ')
               stringsink.write("'"+key+"'"+":",canbreak=True)
               self.format_value(value,stringsink)   #never break between key and value
       stringsink.write('}',unindent=True)
    else: 
       stringsink.write(str(itemvalue),canbreak=True)   #numbers

def get(

self, key, default=None)

def get(self,key,default=None):
    if self.has_key(key):
        retval = self.__getitem__(key)
    else:
        retval = default
    return retval

def has_key(

self, key)

def has_key(self,key):
    if isinstance(key,basestring) and key.lower() in self.keys():
        return 1
    return 0

def items(

self)

def items(self):
    return [a for a in zip(self.keys(),self.values())]

def keys(

self)

def keys(self):
    return self.block.keys()    #always lower case

def merge(

self, new_block, mode='strict', match_att=[], match_function=None, rel_keys=[])

def merge(self,new_block,mode="strict",match_att=[],match_function=None,
               rel_keys = []):
    if mode == 'strict':
       for key in new_block.keys(): 
           if self.has_key(key) and key not in match_att:
              raise CifError( "Identical keys %s in strict merge mode" % key)
           elif key not in match_att:           #a new dataname
               self[key] = new_block[key]
       # we get here if there are no keys in common, so we can now copy
       # the loops and not worry about overlaps
       for one_loop in new_block.loops.values():
           self.CreateLoop(one_loop)
       # we have lost case information
       self.true_case.update(new_block.true_case)
    elif mode == 'replace':
       newkeys = new_block.keys()
       for ma in match_att:
          try:
               newkeys.remove(ma)        #don't touch the special ones
          except ValueError:
               pass
       for key in new_block.keys(): 
              if isinstance(key,basestring):
                  self[key] = new_block[key] 
       # creating the loop will remove items from other loops
       for one_loop in new_block.loops.values():
           self.CreateLoop(one_loop)
       # we have lost case information
       self.true_case.update(new_block.true_case)
    elif mode == 'overlay':
       print 'Overlay mode, current overwrite is %s' % self.overwrite
       raise StarError, 'Overlay block merge mode not implemented'
       save_overwrite = self.overwrite
       self.overwrite = True
       for attribute in new_block.keys():
           if attribute in match_att: continue      #ignore this one
           new_value = new_block[attribute]
           #non-looped items
           if new_block.FindLoop(attribute)<0:     #not looped
              self[attribute] = new_value 
       my_loops = self.loops.values()
       perfect_overlaps = [a for a in new_block.loops if a in my_loops]
       for po in perfect_overlaps:
          loop_keys = [a for a in po if a in rel_keys]  #do we have a key?
          try:
              newkeypos = map(lambda a:newkeys.index(a),loop_keys)
              newkeypos = newkeypos[0]      #one key per loop for now
              loop_keys = loop_keys[0] 
          except (ValueError,IndexError):
              newkeypos = []
              overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data
              new_data = map(lambda a:new_block[a],overlaps) #new packet data
              packet_data = transpose(overlap_data)
              new_p_data = transpose(new_data)
              # remove any packets for which the keys match between old and new; we
              # make the arbitrary choice that the old data stays
              if newkeypos:
                  # get matching values in new list
                  print "Old, new data:\n%s\n%s" % (`overlap_data[newkeypos]`,`new_data[newkeypos]`)
                  key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos])
                  # filter out any new data with these key values
                  new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data)
                  if new_p_data:
                      new_data = transpose(new_p_data)
                  else: new_data = []
              # wipe out the old data and enter the new stuff
              byebyeloop = self.GetLoop(overlaps[0])
              # print "Removing '%s' with overlaps '%s'" % (`byebyeloop`,`overlaps`)
              # Note that if, in the original dictionary, overlaps are not
              # looped, GetLoop will return the block itself.  So we check
              # for this case...
              if byebyeloop != self:
                  self.remove_loop(byebyeloop)
              self.AddLoopItem((overlaps,overlap_data))  #adding old packets
              for pd in new_p_data:                             #adding new packets
                 if pd not in packet_data:
                    for i in range(len(overlaps)):
                        #don't do this at home; we are appending
                        #to something in place
                        self[overlaps[i]].append(pd[i]) 
       self.overwrite = save_overwrite

def printsection(

self, instring='', ordering=[], blockstart='', blockend='', indent=0)

def printsection(self,instring='',ordering=[],blockstart="",blockend="",indent=0):
    import string
    # first make an ordering
    self.create_ordering(ordering)
    # now do it...
    if not instring:
        outstring = CIFStringIO(target_width=80)       # the returned string
    else:
        outstring = instring
    # print loop delimiter
    outstring.write(blockstart,canbreak=True)
    while len(self.output_order)>0:
       #print "Remaining to output " + `self.output_order`
       itemname = self.output_order.pop(0)
       item_spec = [i for i in ordering if i['dataname'].lower()==itemname.lower()]
       if len(item_spec)>0:
           col_pos = item_spec[0].get('column',-1)
       else:
           col_pos = -1
           item_spec = {}
           if not isinstance(itemname,int):  #no loop
               if col_pos < 0: col_pos = 40
               outstring.set_tab(col_pos)
               itemvalue = self[itemname]
               outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False)
               outstring.write(' ',canbreak=True,do_tab=False)    #space after itemname
               self.format_value(itemvalue,outstring,hints=item_spec)
           else:   # we are asked to print a loop block
                #first make sure we have sensible coords.  Length should be one
                #less than the current dimension
                outstring.set_tab(10)       #guess this is OK?
                outstring.write(' '*indent,mustbreak=True,do_tab=False); outstring.write('loop_\n',do_tab=False)
                self.format_names(outstring,indent+2,loop_no=itemname)
                self.format_packets(outstring,indent+2,loop_no=itemname)
    else:
        returnstring = outstring.getvalue()
    outstring.close()
    return returnstring

def process_template(

self, template_string)

Process a template datafile to formatting instructions

def process_template(self,template_string):
    """Process a template datafile to formatting instructions"""
    template_as_cif = StarFile(StringIO(template_string),grammar="DDLm").first_block()
    #template_as_lines = template_string.split("\n")
    #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#']
    #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_']
    #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0])
    self.form_hints = []   #ordered array of hint dictionaries
    for item in template_as_cif.item_order:  #order of input
        if not isinstance(item,int):    #not nested
            hint_dict = {"dataname":item}
            # find the line in the file
            start_pos = re.search("(^[ \t]*" + item + "[ \t\n]+)(?P([\S]+)|(^;))",template_string,re.I|re.M)
            if start_pos.group("spec") != None:
                spec_pos = start_pos.start("spec")-start_pos.start(0)
                spec_char = template_string[start_pos.start("spec")]
                if spec_char in '\'";':
                    hint_dict.update({"delimiter":spec_char})
                if spec_char != ";":   #so we need to work out the column number
                    hint_dict.update({"column":spec_pos})
            print '%s: %s' % (item,`hint_dict`)
            self.form_hints.append(hint_dict)
        else:           #loop block
            testnames = template_as_cif.loops[item]
            total_items = len(template_as_cif.loops[item])
            testname = testnames[0]
            #find the loop spec line in the file
            loop_regex = "(^[ \t]*loop_[ \t\n\r]+" + testname + "([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P(.(?!_loop|_[\S]+))*))" % (total_items - 1)
            loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S)
            loop_so_far = loop_line.end()
            packet_text = loop_line.group('packet')
            packet_regex = "[ \t]*(?P(?P'([^\n\r\f']*)'+)|(?P\"([^\n\r\"]*)\"+)|(?P[^\s]+))"
            packet_pos = re.finditer(packet_regex,packet_text)
            line_end_pos = re.finditer("^",packet_text,re.M)
            next_end = line_end_pos.next().end()
            last_end = next_end
            for loopname in testnames:
                hint_dict = {"dataname":loopname}
                thismatch = packet_pos.next()
                while thismatch.start('all') > next_end:
                    try: 
                        last_end = next_end
                        next_end = line_end_pos.next().start()
                        print 'next end %d' % next_end
                    except StopIteration:
                        pass 
                print 'Start %d, last_end %d' % (thismatch.start('all'),last_end)
                col_pos = thismatch.start('all') - last_end
                if thismatch.group('none') is None:
                    hint_dict.update({'delimiter':thismatch.groups()[0][0]})
                hint_dict.update({'column':col_pos})
                print '%s: %s' % (loopname,`hint_dict`)
                self.form_hints.append(hint_dict)
    return

def regularise_data(

self, dataitem)

Place dataitem into a list if necessary

def regularise_data(self,dataitem):
    """Place dataitem into a list if necessary"""
    from numbers import Number
    if isinstance(dataitem,(Number,basestring,StarList,StarDict)):
        return dataitem,None
    if isinstance(dataitem,(tuple,list)):
        return dataitem,[None]*len(dataitem)
    # so try to make into a list
    try:
        regval = list(dataitem)
    except TypeError, value:
        raise StarError( str(dataitem) + ' is wrong type for data value\n' )
    return regval,[None]*len(regval)

def setmaxnamelength(

self, maxlength)

Set the maximum allowable dataname length (-1 for no check)

def setmaxnamelength(self,maxlength):
    """Set the maximum allowable dataname length (-1 for no check)"""
    self.maxnamelength = maxlength
    if maxlength > 0:
        bad_names = [a for a in self.keys() if len(a)>self.maxnamelength]
        if len(bad_names)>0:
            raise StarError, 'Datanames too long: ' + `bad_names`

def unassign_dictionary(

self)

Remove dictionary-dependent behaviour

def unassign_dictionary(self):
    """Remove dictionary-dependent behaviour"""
    self.dictionary = None

def update(

self, adict)

def update(self,adict):
    for key in adict.keys():
        self.AddItem(key,adict[key])

def values(

self)

def values(self):
    return [self[a] for a in self.keys()]

class StarDict

class StarDict(dict):
    pass

Ancestors (in MRO)

  • StarDict
  • __builtin__.dict
  • __builtin__.object

class StarError

class StarError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar Format error: '+ self.value 

Ancestors (in MRO)

  • StarError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

Class variables

var args

var message

Instance variables

var value

Methods

def __init__(

self, value)

def __init__(self,value):
    self.value = value

class StarFile

class StarFile(BlockCollection):
    def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
                scoping='instance',grammar='1.1',scantype='standard',
                **kwargs):
        super(StarFile,self).__init__(datasource=datasource,**kwargs)
        self.my_uri = getattr(datasource,'my_uri','')
        if maxoutlength == 0:
            self.maxoutlength = 2048 
        else:
            self.maxoutlength = maxoutlength
        self.scoping = scoping
        if isinstance(datasource,basestring) or hasattr(datasource,"read"):
            ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
            maxlength = maxinlength)
        self.header_comment = \
"""#\\#STAR
##########################################################################
#               STAR Format file 
#               Produced by PySTARRW module
# 
#  This is a STAR file.  STAR is a superset of the CIF file type.  For
#  more information, please refer to International Tables for Crystallography,
#  Volume G, Chapter 2.1
#
##########################################################################
"""
    def set_uri(self,my_uri): self.my_uri = my_uri

Ancestors (in MRO)

Instance variables

var PC

Inheritance: BlockCollection.PC

var blocktype

Inheritance: BlockCollection.blocktype

var characterset

Inheritance: BlockCollection.characterset

var child_table

Inheritance: BlockCollection.child_table

var dictionary

Inheritance: BlockCollection.dictionary

var header_comment

Inheritance: BlockCollection.header_comment

var lower_keys

Inheritance: BlockCollection.lower_keys

var my_uri

var parent_id

Inheritance: BlockCollection.parent_id

var renamed

Inheritance: BlockCollection.renamed

var scoping

Inheritance: BlockCollection.scoping

var standard

Inheritance: BlockCollection.standard

var visible_keys

Inheritance: BlockCollection.visible_keys

Methods

def __init__(

self, datasource=None, maxinlength=-1, maxoutlength=0, scoping='instance', grammar='1.1', scantype='standard', **kwargs)

Inheritance: BlockCollection.__init__

def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0,
            scoping='instance',grammar='1.1',scantype='standard',
            **kwargs):
    super(StarFile,self).__init__(datasource=datasource,**kwargs)
    self.my_uri = getattr(datasource,'my_uri','')
    if maxoutlength == 0:
        self.maxoutlength = 2048 
    else:
        self.maxoutlength = maxoutlength
    self.scoping = scoping
    if isinstance(datasource,basestring) or hasattr(datasource,"read"):
        ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype,
        maxlength = maxinlength)
    self.header_comment = \
\\#STAR
######################################################################
            STAR Format file 
            Produced by PySTARRW module
his is a STAR file.  STAR is a superset of the CIF file type.  For
ore information, please refer to International Tables for Crystallography,
olume G, Chapter 2.1
######################################################################

def NewBlock(

self, blockname, blockcontents=None, fix=True, parent=None)

Inheritance: BlockCollection.NewBlock

def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None):
    if blockcontents is None:
        blockcontents = StarBlock()
    if self.standard == "CIF":
        blockcontents.setmaxnamelength(75)
    if len(blockname)>75:
             raise StarError , 'Blockname %s is longer than 75 characters' % blockname
    if fix:
        newblockname = re.sub('[  \t]','_',blockname)
    else: newblockname = blockname
    new_lowerbn = newblockname.lower()
    if new_lowerbn in self.lower_keys:
        if self.standard is not None:    #already there
           toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None]
           if parent is None and new_lowerbn not in toplevelnames:  #can give a new key to this one
              while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+'
           elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one
              replace_name = new_lowerbn            
              while replace_name in self.lower_keys: replace_name = replace_name + '+'
              self._rekey(new_lowerbn,replace_name)
              # now continue on to add in the new block
              if parent.lower() == new_lowerbn:        #the new block's requested parent just got renamed!!
                  parent = replace_name
           else:
              raise StarError( "Attempt to replace existing block " + blockname)
        else:
           del self[new_lowerbn]
    self.dictionary.update({new_lowerbn:blockcontents})
    self.lower_keys.add(new_lowerbn)
    if parent is None:
       self.child_table[new_lowerbn]=self.PC(newblockname,None)
       self.visible_keys.append(new_lowerbn)
    else:
       if parent.lower() in self.lower_keys:
          if self.scoping == 'instance':
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
          else:
             self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower())
             self.visible_keys.append(new_lowerbn)
       else:
           print 'Warning:Parent block %s does not exist for child %s' % (parent,newblockname)  
    return new_lowerbn  #in case calling routine wants to know

def WriteOut(

self, comment='', wraplength=80, maxoutlength=2048)

Inheritance: BlockCollection.WriteOut

def WriteOut(self,comment='',wraplength=80,maxoutlength=2048):
    import cStringIO
    if not comment:
        comment = self.header_comment
    outstring = cStringIO.StringIO()
    outstring.write(comment)
    # loop over top-level
    top_block_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent is None]
    for blockref,blockname in top_block_names:
        outstring.write('\n' + 'data_' +blockname+'\n')
        child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==blockref]
        if self.standard == 'Dic':              #put contents before save frames
          self[blockref].SetOutputLength(wraplength,maxoutlength)
          outstring.write(str(self[blockref]))
        for child_ref,child_name in child_names:
            outstring.write('\n' + 'save_' + child_name + '\n')
            self.block_to_string(child_ref,child_name,outstring,4)    
            outstring.write('\n' + 'save_'+ '\n')   
        if self.standard != 'Dic':              #put contents after save frames
            self[blockref].SetOutputLength(wraplength,maxoutlength)
            outstring.write(str(self[blockref]))
    returnstring =  outstring.getvalue()
    outstring.close()
    return returnstring

def block_to_string(

self, block_ref, block_id, outstring, indentlevel=0)

Inheritance: BlockCollection.block_to_string

Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children

def block_to_string(self,block_ref,block_id,outstring,indentlevel=0):
    """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children"""
    child_names = [(a[0],a[1].block_id) for a in self.child_table.items() if a[1].parent==block_ref]
    if self.standard == 'Dic':
        outstring.write(str(self[block_ref]))
    for child_ref,child_name in child_names:
        outstring.write('\n' + 'save_' + child_name + '\n')
        self.block_to_string(child_ref,child_name,outstring,indentlevel)
        outstring.write('\n' + '  '*indentlevel + 'save_' + '\n')
    if self.standard != 'Dic':
        outstring.write(str(self[block_ref]))

def checknamelengths(

self, target_block, maxlength=-1)

Inheritance: BlockCollection.checknamelengths

def checknamelengths(self,target_block,maxlength=-1):
    if maxlength < 0:
        return
    else:
        toolong = filter(lambda a:len(a)>maxlength, target_block.keys())
    outstring = ""
    for it in toolong: outstring += "\n" + it
    if toolong:
       raise StarError( 'Following data names too long:' + outstring)

def clear(

self)

Inheritance: BlockCollection.clear

def clear(self):
    self.dictionary.clear()
    self.lower_keys = set()
    self.child_table = {}
    self.visible_keys = []

def copy(

self)

Inheritance: BlockCollection.copy

def copy(self):   
    newcopy = self.dictionary.copy()  #all blocks
    newcopy = BlockCollection('',newcopy,parent_id=self.parent_id)
    newcopy.child_table = self.child_table.copy()
    newcopy.lower_keys = self.lower_keys
    newcopy.characterset = self.characterset
    newcopy.scoping = self.scoping  #this sets visible keys
    return newcopy

def first_block(

self)

Inheritance: BlockCollection.first_block

Return the 'first' block. This is not necessarily the first block in the file.

def first_block(self):
    """Return the 'first' block.  This is not necessarily the first block in the file."""
    if self.keys():
        return self[self.keys()[0]]

def get(

self, key, default=None)

Inheritance: BlockCollection.get

def get(self,key,default=None):
    if self.has_key(key):     # take account of case
        return self.__getitem__(key)
    else:
        return default

def get_all(

self, item_name)

Inheritance: BlockCollection.get_all

def get_all(self,item_name):
    raw_values = map(lambda a:self[a].get(item_name),self.keys())
    raw_values = filter(lambda a:a != None, raw_values)
    ret_vals = []
    for rv in raw_values:
        if isinstance(rv,list):
            for rvv in rv:
                if rvv not in ret_vals: ret_vals.append(rvv)
        else:
            if rv not in ret_vals: ret_vals.append(rv)
    return ret_vals

def get_child_list(

self, parentname)

Inheritance: BlockCollection.get_child_list

Get a list of all child categories

def get_child_list(self,parentname):
    """Get a list of all child categories"""
    child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])]
    return child_handles

def get_children(

self, blockname, include_parent=False, scoping='dictionary')

Inheritance: BlockCollection.get_children

Get all children of [[blockname]] as a block collection. If [[include_parent]] is True, the parent block will also be included in the block collection as the root.

def get_children(self,blockname,include_parent=False,scoping='dictionary'):
    """Get all children of [[blockname]] as a block collection. If [[include_parent]] is
    True, the parent block will also be included in the block collection as the root."""
    newbc = BlockCollection()
    block_lower = blockname.lower()
    proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)]
    newbc.child_table = dict(proto_child_table)
    if not include_parent:
       newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower]))
    newbc.lower_keys = set([a[0] for a in proto_child_table])
    newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table)
    if include_parent:
        newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)})
        newbc.lower_keys.add(block_lower)
        newbc.dictionary.update({block_lower:self.dictionary[block_lower]})
    newbc.scoping = scoping
    return newbc

def get_immediate_children(

self, parentname)

Inheritance: BlockCollection.get_immediate_children

Get the next level of children of the given block as a list, without nested levels

def get_immediate_children(self,parentname):
    """Get the next level of children of the given block as a list, without nested levels"""
    child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()]
    return child_handles

def get_parent(

self, blockname)

Inheritance: BlockCollection.get_parent

Return the name of the block enclosing [[blockname]] in canonical form (lower case)

def get_parent(self,blockname):
    """Return the name of the block enclosing [[blockname]] in canonical form (lower case)"""
    possibles = (a for a in self.child_table.items() if a[0] == blockname.lower())
    try:
        first = possibles.next()   #get first one
    except:
        raise StarError('no parent for %s' % blockname)
    try:
       second = possibles.next()
    except StopIteration:
       return first[1].parent
    raise StarError('More than one parent for %s' % blockname)

def get_roots(

self)

Inheritance: BlockCollection.get_roots

Get the top-level blocks

def get_roots(self):
    """Get the top-level blocks"""
    return [a for a in self.child_table.items() if a[1].parent==None]

def has_key(

self, key)

Inheritance: BlockCollection.has_key

def has_key(self,key):
    if not isinstance(key,basestring): return 0
    if key.lower() in self.visible_keys:
        return 1
    return 0

def is_child_of_parent(

self, parentname, blockname)

Inheritance: BlockCollection.is_child_of_parent

Recursively search for children of blockname, case is important for now

def is_child_of_parent(self,parentname,blockname):
    """Recursively search for children of blockname, case is important for now"""
    checkname = parentname.lower()
    more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname]
    if blockname.lower() in more_children:
       return True
    else:
       for one_child in more_children:
           if self.is_child_of_parent(one_child,blockname): return True
    return False

def items(

self)

Inheritance: BlockCollection.items

def items(self):
    return [(a,self[a]) for a in self.keys()]

def keys(

self)

Inheritance: BlockCollection.keys

def keys(self):
    return self.visible_keys

def lock(

self)

Inheritance: BlockCollection.lock

Disallow overwriting for all blocks in this collection

def lock(self):
    """Disallow overwriting for all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite = False

def merge(

self, new_bc, mode=None, parent=None, single_block=[], idblock='', match_att=[], match_function=None)

Inheritance: BlockCollection.merge

def merge(self,new_bc,mode=None,parent=None,single_block=[],
               idblock="",match_att=[],match_function=None):
    if mode is None:
        if self.standard is None:
           mode = 'replace'
        else:
           mode = 'strict'
    if single_block:
        self[single_block[0]].merge(new_bc[single_block[1]],mode,
                                               match_att=match_att,
                                               match_function=match_function)
        return None
    base_keys = [a[1].block_id for a in self.child_table.items()]
    block_to_item = base_keys   #default
    new_keys = [a[1].block_id for a in new_bc.child_table.items()]    #get list of incoming blocks
    if match_att:
        #make a blockname -> item name map
        if match_function:
            block_to_item = map(lambda a:match_function(self[a]),self.keys())
        else:
            block_to_item = map(lambda a:self[a].get(match_att[0],None),self.keys())
        #print `block_to_item`
    for key in new_keys:        #run over incoming blocknames
        if key == idblock: continue    #skip dictionary id
        basekey = key           #default value
        if len(match_att)>0:
           attval = new_bc[key].get(match_att[0],0)  #0 if ignoring matching
        else:
           attval = 0
        for ii in range(len(block_to_item)):  #do this way to get looped names
            thisatt = block_to_item[ii]       #keyname in old block
            #print "Looking for %s in %s" % (attval,thisatt)
            if attval == thisatt or \
               (isinstance(thisatt,list) and attval in thisatt):
                  basekey = base_keys.pop(ii)
                  block_to_item.remove(thisatt)
                  break
        if not self.has_key(basekey) or mode=="replace":
            new_parent = new_bc.get_parent(key)
            if parent is not None and new_parent is None:
               new_parent = parent
            self.NewBlock(basekey,new_bc[key],parent=new_parent)   #add the block
        else:
            if mode=="strict":
                raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key))
            elif mode=="overlay":
                # print "Merging block %s with %s" % (basekey,key)
                self[basekey].merge(new_bc[key],mode,match_att=match_att)
            else:  
                raise StarError( "Merge called with unknown mode %s" % mode)

def merge_fast(

self, new_bc, parent=None)

Inheritance: BlockCollection.merge_fast

Do a fast merge

def merge_fast(self,new_bc,parent=None):
    """Do a fast merge"""
    if self.standard is None:
        mode = 'replace' 
    else:
        mode = 'strict'
    overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys)
    if overlap_flag and mode != 'replace':
        double_keys = self.lower_keys.intersection(new_bc.lower_keys)
        for dup_key in double_keys:
              our_parent = self.child_table[dup_key].parent
              their_parent = new_bc.child_table[dup_key].parent
              if (our_parent is None and their_parent is not None and parent is None) or\
                  parent is not None:  #rename our block
                start_key = dup_key
                while start_key in self.lower_keys: start_key = start_key+'+'
                self._rekey(dup_key,start_key)
                if parent.lower() == dup_key:  #we just renamed the prospective parent!
                    parent = start_key
              elif our_parent is not None and their_parent is None and parent is None:
                start_key = dup_key
                while start_key in new_bc.lower_keys: start_key = start_key+'+'
                new_bc._rekey(dup_key,start_key)
              else: 
                raise StarError("In strict merge mode:duplicate keys %s" % dup_key)
    self.dictionary.update(new_bc.dictionary) 
    self.lower_keys.update(new_bc.lower_keys)
    self.visible_keys += (list(new_bc.lower_keys))
    self.child_table.update(new_bc.child_table)
    if parent is not None:     #redo the child_table entries
          reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None]
          reparent_dict = [(a[0],self.PC(a[1],parent.lower())) for a in reparent_list]
          self.child_table.update(dict(reparent_dict))

def rename(

self, oldname, newname)

Inheritance: BlockCollection.rename

Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed. No conformance checks are conducted.

def rename(self,oldname,newname):
    """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed.  No
       conformance checks are conducted."""
    realoldname = oldname.lower()
    realnewname = newname.lower()
    if realnewname in self.lower_keys:
        raise StarError,'Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)
    if realoldname not in self.lower_keys:
        raise KeyError,'Cannot find old block %s' % realoldname
    self._rekey(realoldname,realnewname,block_id=newname)

def set_parent(

self, parentname, childname)

Inheritance: BlockCollection.set_parent

Set the parent block

def set_parent(self,parentname,childname):
    """Set the parent block"""
    # first check that both blocks exist
    if parentname.lower() not in self.lower_keys:
        raise KeyError('Parent block %s does not exist' % parentname)
    if childname.lower() not in self.lower_keys:
        raise KeyError('Child block %s does not exist' % childname)
    old_entry = self.child_table[childname.lower()]
    self.child_table[childname.lower()]=self.PC(old_entry.block_id,
           parentname.lower())
    self.scoping = self.scoping #reset visibility

def set_uri(

self, my_uri)

def set_uri(self,my_uri): self.my_uri = my_uri

def unlock(

self)

Inheritance: BlockCollection.unlock

Allow overwriting of all blocks in this collection

def unlock(self):
    """Allow overwriting of all blocks in this collection"""
    for a in self.lower_keys:
        self[a].overwrite=True

def update(

self, adict)

Inheritance: BlockCollection.update

def update(self,adict):
    for key in adict.keys():
        self[key] = adict[key]

class StarLengthError

class StarLengthError(Exception):
    def __init__(self,value):
        self.value = value
    def __str__(self):
        return '\nStar length error: ' + self.value

Ancestors (in MRO)

  • StarLengthError
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

Class variables

var args

var message

Instance variables

var value

Methods

def __init__(

self, value)

def __init__(self,value):
    self.value = value

class StarList

class StarList(list):
    pass

Ancestors (in MRO)

  • StarList
  • __builtin__.list
  • __builtin__.object

class StarPacket

class StarPacket(list):
    def merge_packet(self,incoming):
        """Merge contents of incoming packet with this packet"""
        new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
        self.append(incoming)
        for na in new_attrs:
            setattr(self,na,getattr(incoming,na))

    def __getattr__(self,att_name):
        """Derive a missing attribute"""
        if att_name.lower() in self.__dict__:
            return getattr(self,att_name.lower())
        if att_name in ('cif_dictionary','fulldata','key'):
            raise AttributeError, 'Programming error: cannot compute value of %s' % att_name
        d = self.cif_dictionary
        c = self.fulldata
        k = self.key
        d.derive_item(att_name,c,store_value=True)
        # 
        # now pick out the new value
        keyval = getattr(self,k)
        full_pack = c.GetKeyedPacket(k,keyval)
        return getattr(full_pack,att_name)

Ancestors (in MRO)

Methods

def merge_packet(

self, incoming)

Merge contents of incoming packet with this packet

def merge_packet(self,incoming):
    """Merge contents of incoming packet with this packet"""
    new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"]
    self.append(incoming)
    for na in new_attrs:
        setattr(self,na,getattr(incoming,na))
pycifrw-4.4/src/StarFile.nw000066400000000000000000004571411345362224200157310ustar00rootroot00000000000000<>= __copyright = """ PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. """ @ \section{Introduction} This file implements a general STAR reading/writing utility. The basic objects ([[StarFile/StarBlock]]) read and write syntactically correct STAR files including save frames. The [[StarFile]] class is initialised with either no arguments (a new STAR file) or with the name of an already existing STAR file. Data items are accessed/changed/added using the python mapping type ie to get [[dataitem]] you would type [[value = cf[blockname][dataitem]]]. Note also that a StarFile object can be accessed as a mapping type, ie using square brackets. Most mapping operations have been implemented (see below). We define a generic BlockCollection class that both CifFiles and StarFiles are subclasses of. It is also used when the user requests a collection of blocks from a StarFile. The LoopBlock class used to be the root class of StarBlocks and all loop blocks for recursive handling of nested loops, but with removal of nested loop support it is simpler to model a StarBlock as a collection of dataitems with additional information specifying which datanames are grouped together. LoopBlocks are still used to provide packet-based access to loops. <<*>>= # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import <> import sys # Python 2,3 compatibility try: from urllib import urlopen # for arbitrary opening from urlparse import urlparse, urlunparse except: from urllib.request import urlopen from urllib.parse import urlparse,urlunparse import re,os import textwrap try: from StringIO import StringIO #not cStringIO as we cannot subclass except ImportError: from io import StringIO if isinstance(u"abc",str): #Python 3 unicode = str try: import numpy have_numpy = True except ImportError: have_numpy = False <> <> <> <> <> <> <> <> <> <> <> <> @ \section{BlockCollection} Starfiles and Ciffiles are both collections of blocks. We abstract this into the [[BlockCollection]] class, and then inherit from it to make a [[StarFile]] object. The philosophy is that the treatment of the constituent blocks is managed by the enclosing block collection based on how the block collection was initialised. <>= class BlockCollection(object): """A container for StarBlock objects. The constructor takes one non-keyword argument `datasource` to set the initial data. If `datasource` is a Python dictionary, the values must be `StarBlock` objects and the keys will be blocknames in the new object. Keyword arguments: standard: `CIF` or `Dic`. `CIF` enforces 75-character blocknames, and will print block contents before that block's save frame. blocktype: The type of blocks held in this container. Normally `StarBlock` or `CifBlock`. characterset: `ascii` or `unicode`. Blocknames and datanames appearing within blocks are restricted to the appropriate characterset. Note that only characters in the basic multilingual plane are accepted. This restriction will be lifted when PyCIFRW is ported to Python3. scoping: `instance` or `dictionary`: `instance` implies that save frames are hidden from save frames lower in the hierarchy or in sibling hierarchies. `dictionary` makes all save frames visible everywhere within a data block. This setting is only relevant for STAR2 dictionaries and STAR2 data files, as save frames are currently not used in plain CIF data files. """ <> <> <> <> <> <> <> <> <> <> <> <> <> @ With the advent of CIF2, the allowed character set has expanded to encompass most of Unicode. Our object needs to know about this different characterset in order to check incoming values and datanames for conformance. This is done via the 'characterset' keyword. DDLm dictionaries assume that all definitions in nested save frames are equally accessible from other nested save frames, whereas in instance files save frames are logically insulated from other save frames at the same or lower levels. Block names may be duplicated if they are in different enclosing frames, although all save frame names have to be unique within a DDLm dictionary (as importation is allowed to refer to the save frame names with no qualifications). We deal with potential duplication by appending a '+' to the access key of legitimate save frames with duplicate names. Our [[child_table]] dictionary links the internal block key to its parent and mixed-case name used when outputting the block. If scoping is 'instance', nested datablocks are invisible and only accessible through the 'saves' attribute, which produces a view onto the same block collection. To take account of dictionaries with 10s of thousands of entries (e.g. the PDB) we optimise block merging for speed. Most of the information in separate structures below could be derived from [[child_table]], but we take the space hit for speed. The canonical reference to a block is the lowercase version of the name. We use these addresses to index into a table that contains the actual block name and the parent blockname. <>= def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock, characterset='ascii',scoping='instance',**kwargs): import collections self.dictionary = {} self.standard = standard self.lower_keys = set() # short_cuts self.renamed = {} self.PC = collections.namedtuple('PC',['block_id','parent']) self.child_table = {} self.visible_keys = [] # for efficiency self.block_input_order = [] # to output in same order self.scoping = scoping #will trigger setting of child table self.blocktype = blocktype self.master_template = {} #for outputting self.set_grammar('2.0') self.set_characterset(characterset) if isinstance(datasource,BlockCollection): self.merge_fast(datasource) self.scoping = scoping #reset visibility elif isinstance(datasource,dict): for key,value in datasource.items(): self[key]= value self.header_comment = '' def set_grammar(self,new_grammar): """Set the syntax and grammar for output to `new_grammar`""" if new_grammar not in ['1.1','1.0','2.0','STAR2']: raise StarError('Unrecognised output grammar %s' % new_grammar) self.grammar = new_grammar def set_characterset(self,characterset): """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames have already been added to any datablocks, they are not checked.""" self.characterset = characterset for one_block in self.lower_keys: self[one_block].set_characterset(characterset) @ Unlocking. When editing dictionaries with many datablocks, we would rather just unlock all datablocks at once. <>= def unlock(self): """Allow overwriting of all blocks in this collection""" for a in self.lower_keys: self[a].overwrite=True def lock(self): """Disallow overwriting for all blocks in this collection""" for a in self.lower_keys: self[a].overwrite = False @ Checking block name lengths. This is not needed for a STAR block, but is useful for CIF. <>= def checklengths(self,maxlength): toolong = [a.block_id for a in self.child_table.values() if len(a.block_id)>maxlength] if toolong: errorstring = "" for bn in toolong: errorstring += "\n" + bn raise StarError( 'Following block name(s) too long: \n' + errorstring) @ Switch scoping. We interpose some code in the normal [[__setattr__]] method so detect a scoping switch. In some cases we want to hide save frames from our accesses, in other cases we wish to make all frames visible. Setting the scoping attribute allows this to be swapped around. We do not assume that no change means we do not have to do anything. <>= def __setattr__(self,attr_name,newval): if attr_name == 'scoping': if newval not in ('dictionary','instance'): raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval) if newval == 'dictionary': self.visible_keys = [a for a in self.lower_keys] else: #only top-level datablocks visible self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None] object.__setattr__(self,attr_name,newval) @ Emulation of a mapping type. We also put odd little useful utilities in this section. <>= def __str__(self): return self.WriteOut() def __setitem__(self,key,value): self.NewBlock(key,value,parent=None) def __getitem__(self,key): if isinstance(key,(unicode,str)): lowerkey = key.lower() if lowerkey in self.lower_keys: return self.dictionary[lowerkey] #print 'Visible keys:' + `self.visible_keys` #print 'All keys' + `self.lower_keys` #print 'Child table' + `self.child_table` raise KeyError('No such item %s' % key) # we have to get an ordered list of the current keys, # as we'll have to delete one of them anyway. # Deletion will delete any key regardless of visibility def __delitem__(self,key): dummy = self[key] #raise error if not present lowerkey = key.lower() # get rid of all children recursively as well children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey] for child in children: del self[child] #recursive call del self.dictionary[lowerkey] del self.child_table[lowerkey] try: self.visible_keys.remove(lowerkey) except KeyError: pass self.lower_keys.remove(lowerkey) self.block_input_order.remove(lowerkey) def __len__(self): return len(self.visible_keys) def __contains__(self,item): """Support the 'in' operator""" if not isinstance(item,(unicode,str)): return False if item.lower() in self.visible_keys: return True return False # We iterate over all visible def __iter__(self): for one_block in self.keys(): yield self[one_block] # TODO: handle different case def keys(self): return self.visible_keys # Note that has_key does not exist in 3.5 def has_key(self,key): return key in self def get(self,key,default=None): if key in self: # take account of case return self.__getitem__(key) else: return default def clear(self): self.dictionary.clear() self.lower_keys = set() self.child_table = {} self.visible_keys = [] self.block_input_order = [] def copy(self): newcopy = self.dictionary.copy() #all blocks for k,v in self.dictionary.items(): newcopy[k] = v.copy() newcopy = BlockCollection(newcopy) newcopy.child_table = self.child_table.copy() newcopy.lower_keys = self.lower_keys.copy() newcopy.block_input_order = self.block_input_order.copy() newcopy.characterset = self.characterset newcopy.SetTemplate(self.master_template.copy()) newcopy.scoping = self.scoping #this sets visible keys return newcopy def update(self,adict): for key in adict.keys(): self[key] = adict[key] def items(self): return [(a,self[a]) for a in self.keys()] def first_block(self): """Return the 'first' block. This is not necessarily the first block in the file.""" if self.keys(): return self[self.keys()[0]] @ Parent-child utilities. As we are now emulating parent-child relationships using self.child_table, we provide some useful methods. <>= def get_parent(self,blockname): """Return the name of the block enclosing [[blockname]] in canonical form (lower case)""" possibles = (a for a in self.child_table.items() if a[0] == blockname.lower()) try: first = next(possibles) #get first one except: raise StarError('no parent for %s' % blockname) try: second = next(possibles) except StopIteration: return first[1].parent raise StarError('More than one parent for %s' % blockname) def get_roots(self): """Get the top-level blocks""" return [a for a in self.child_table.items() if a[1].parent==None] def get_children(self,blockname,include_parent=False,scoping='dictionary'): """Get all children of [[blockname]] as a block collection. If [[include_parent]] is True, the parent block will also be included in the block collection as the root.""" newbc = BlockCollection() block_lower = blockname.lower() proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)] newbc.child_table = dict(proto_child_table) if not include_parent: newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower])) newbc.lower_keys = set([a[0] for a in proto_child_table]) newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) if include_parent: newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)}) newbc.lower_keys.add(block_lower) newbc.dictionary.update({block_lower:self.dictionary[block_lower]}) newbc.scoping = scoping return newbc def get_immediate_children(self,parentname): """Get the next level of children of the given block as a list, without nested levels""" child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()] return child_handles # This takes time def get_child_list(self,parentname): """Get a list of all child categories in alphabetical order""" child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])] child_handles.sort() return child_handles def is_child_of_parent(self,parentname,blockname): """Return `True` if `blockname` is a child of `parentname`""" checkname = parentname.lower() more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname] if blockname.lower() in more_children: return True else: for one_child in more_children: if self.is_child_of_parent(one_child,blockname): return True return False def set_parent(self,parentname,childname): """Set the parent block""" # first check that both blocks exist if parentname.lower() not in self.lower_keys: raise KeyError('Parent block %s does not exist' % parentname) if childname.lower() not in self.lower_keys: raise KeyError('Child block %s does not exist' % childname) old_entry = self.child_table[childname.lower()] self.child_table[childname.lower()]=self.PC(old_entry.block_id, parentname.lower()) self.scoping = self.scoping #reset visibility @ Making a Block Collection from a set of our own block names. This is used in merging, where we must merge with a Block Collection. Any pointers to parent blocks that are not in the list become None, ie. become top level blocks. We use our own child table to find links between the supplied block names and ourself. :: <>= def makebc(self,namelist,scoping='dictionary'): """Make a block collection from a list of block names""" newbc = BlockCollection() block_lower = [n.lower() for n in namelist] proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower] newbc.child_table = dict(proto_child_table) new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower] newbc.child_table.update(dict(new_top_level)) newbc.lower_keys = set([a[0] for a in proto_child_table]) newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) newbc.scoping = scoping newbc.block_input_order = block_lower return newbc @ Adding a new block. A new block is just a new item in our dictionary, so we add a new entry. We return the new block name in case we have changed it, so the calling routine can refer to it later. Also, there is a limit of 75 characters for the block name length, which we enforce here. By setting [[fix]] to true, blocknames will have illegal whitespace changed to underscore. [[self.standard]] is used to enforce differences in treatments of block names. If [[self.standard]] is set at all, blocks will not replace a previous block with the same name. DDLm dictionaries are not permitted identical save frame names, but those save frame names may be identical to the enclosing datablock. We rename the access key if an identically-named save frame is introduced anywhere in the file by appending a '+'. These renames are stored in the rename dictionary. The name appearing in the output file is not changed, only the access key. If [[self.standard]] is 'Dic', then we put block contents before save frames in accordance with stylistic conventions when printing out. Note that we must take account of upper/lower case differences being irrelevant for STAR/CIF, but that we want to preserve the original case. To allow for nested blocks, we can specify a parent block. When the file is printed, the new block will appear inside the parent block if nested frames have been requested or if the parent block is a top-level block. [[blockcontents]] cannot be set immediately to [[StarBlock]] as a default, because it will evaluate the constructor once and then assign all new blocks to the same object. <>= def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None): """Add a new block named `blockname` with contents `blockcontents`. If `fix` is True, `blockname` will have spaces and tabs replaced by underscores. `parent` allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.""" if blockcontents is None: blockcontents = self.blocktype() if self.standard == "CIF": blockcontents.setmaxnamelength(75) if len(blockname)>75: raise StarError('Blockname %s is longer than 75 characters' % blockname) if fix: newblockname = re.sub('[ \t]','_',blockname) else: newblockname = blockname new_lowerbn = newblockname.lower() if new_lowerbn in self.lower_keys: #already there if self.standard is not None: toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None] if parent is None and new_lowerbn not in toplevelnames: #can give a new key to this one while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+' elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one replace_name = new_lowerbn while replace_name in self.lower_keys: replace_name = replace_name + '+' self._rekey(new_lowerbn,replace_name) # now continue on to add in the new block if parent.lower() == new_lowerbn: #the new block's requested parent just got renamed!! parent = replace_name else: raise StarError( "Attempt to replace existing block " + blockname) else: del self[new_lowerbn] self.dictionary.update({new_lowerbn:blockcontents}) self.lower_keys.add(new_lowerbn) self.block_input_order.append(new_lowerbn) if parent is None: self.child_table[new_lowerbn]=self.PC(newblockname,None) self.visible_keys.append(new_lowerbn) else: if parent.lower() in self.lower_keys: if self.scoping == 'instance': self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) else: self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) self.visible_keys.append(new_lowerbn) else: print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname)) self[new_lowerbn].set_grammar(self.grammar) self[new_lowerbn].set_characterset(self.characterset) self[new_lowerbn].formatting_hints = self.master_template return new_lowerbn #in case calling routine wants to know @ Renaming a block. This is a slightly intricate operation as we have to also make sure the original children are pointed to the new blockname. We assume that both oldname and newname are already lower case. We can simply change the key used to identify the block using [[_rekey]], or we cna change the block name that is printed using [[rename]]. In the latter case, there must be no name collisions or the operation will fail. <>= def _rekey(self,oldname,newname,block_id=''): """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name does not change unless [[block_id]] is given. Prefer [[rename]] for a safe version.""" move_block = self[oldname] #old block is_visible = oldname in self.visible_keys move_block_info = self.child_table[oldname] #old info move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname] # now rewrite the necessary bits self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children])) oldpos = self.block_input_order.index(oldname) del self[oldname] #do this after updating child table so we don't delete children self.dictionary.update({newname:move_block}) self.lower_keys.add(newname) #print 'Block input order was: ' + `self.block_input_order` self.block_input_order[oldpos:oldpos]=[newname] if block_id == '': self.child_table.update({newname:move_block_info}) else: self.child_table.update({newname:self.PC(block_id,move_block_info.parent)}) if is_visible: self.visible_keys += [newname] def rename(self,oldname,newname): """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed. No conformance checks are conducted.""" realoldname = oldname.lower() realnewname = newname.lower() if realnewname in self.lower_keys: raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)) if realoldname not in self.lower_keys: raise KeyError('Cannot find old block %s' % realoldname) self._rekey(realoldname,realnewname,block_id=newname) @ Merging. Originally, this package envisaged Cif and STAR files as collections of either Starblocks or Cifblocks, which differed only in their capacity to hold save frames and nested loops. From version 4.05, we envisage Cif and Star files as collections of StarBlocks, neither of which hold any nested save frames. Instead, save frames relationships are held in a separate table, which we look up when outputting. This was originally implemented for dictionary merging support, which is now deprecated with the new DDLm way of combining dictionaries. We cannot merge [[CifDic]] objects, because the internal data structures for DDL2 and DDL1 are different (parent-child in particular), so any merge operation would have to first recreate the original Cif structure before proceeding. Merging can be strict, overlay or replace. In all cases, if the block name is different, we simply add it in. If it is the same, in strict mode we flag an error, in replace mode we replace it, and in overlay mode we actually add/replace individual data items. The default mode will be determined from the setting of 'standard': if no standard has been specified, the mode is 'replace', otherwise the mode is 'strict'. If the single_block list is non-empty, we assume that we should merge on the block level, using the given block names as the particular blocks to merge. This is essentially what we have to do for DDL2 dictionaries, where all the definitions are stored in save frames inside a single block. Note also the related situation where we are in 'strict' mode, and the DDL1 dictionaries both have an "on_this_dictionary" block. So we have an extra keyword argument "idblock" which contains a blockname to ignore during merging, i.e. it will remain the same as before merging. The suggested overlay method involves adding to loops, rather than replacing them completely. Identical rows must be removed, and any key values with identical values remaining after this have to flag an error. We do not read in the ddl specifications themselves, to avoid messing around with hard-coded filenames, so we require the calling function to provide us with this file (not yet implemented). The [[match_att]] keyword allows us to match blocks/save frames on a particular attribute, rather than the block name itself. This means we can do the right thing and compare [[_name]] entries rather than block names (the default behaviour). Note also a problem with the overlay protocol as written up in Vol. G: if we try matching on item.name, we will run into trouble where _item.name is looped in DDL2-style dictionaries. We cannot match on a complete match against all item names in the list, because we would like to be able to add item names in overlay mode. So we have to deduce the 'main' item name from any parent-child information that we have using a helper function which is passed to us. Nested save frames are emulated through child table lookups, so we should merge this table when merging block collections. Unless [[parent]] is not empty, we put all new blocks on the same level. Otherwise, any top-level blocks in the incoming block collection (parent is None) are given the parent specified in [[parent]]. In previous versions this was text, but due to the inability to specify to future callers that the name has been changed, [[parent]] is now itself a datablock. As for [[NewBlock]], we allow duplicate save frame names in the precise situation where one of the blocks is a top-level block. The [[drop_att]] attribute allows a particular datablock attribute to be used to determine if datablocks are semantically identical. <>= def merge_fast(self,new_bc,parent=None): """Do a fast merge. WARNING: this may change one or more of its frame headers in order to remove duplicate frames. Please keep a handle to the block object instead of the text of the header.""" if self.standard is None: mode = 'replace' else: mode = 'strict' overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys) if parent is not None: parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent] if len(parent_name)==0 or len(parent_name)>1: raise StarError("Unable to find unique parent block name: have %s" % str(parent_name)) parent_name = parent_name[0] else: parent_name = None #an error will be thrown if we treat as a string if overlap_flag and mode != 'replace': double_keys = self.lower_keys.intersection(new_bc.lower_keys) for dup_key in double_keys: our_parent = self.child_table[dup_key].parent their_parent = new_bc.child_table[dup_key].parent if (our_parent is None and their_parent is not None and parent is None) or\ parent is not None: #rename our block start_key = dup_key while start_key in self.lower_keys: start_key = start_key+'+' self._rekey(dup_key,start_key) if parent_name.lower() == dup_key: #we just renamed the prospective parent! parent_name = start_key elif our_parent is not None and their_parent is None and parent is None: start_key = dup_key while start_key in new_bc.lower_keys: start_key = start_key+'+' new_bc._rekey(dup_key,start_key) else: raise StarError("In strict merge mode:duplicate keys %s" % dup_key) self.dictionary.update(new_bc.dictionary) self.lower_keys.update(new_bc.lower_keys) self.visible_keys += (list(new_bc.lower_keys)) self.block_input_order += new_bc.block_input_order #print('Block input order now:' + repr(self.block_input_order)) self.child_table.update(new_bc.child_table) if parent_name is not None: #redo the child_table entries reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None] reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list] self.child_table.update(dict(reparent_dict)) def merge(self,new_bc,mode=None,parent=None,single_block=[], idblock="",match_att=[],match_function=None): if mode is None: if self.standard is None: mode = 'replace' else: mode = 'strict' if single_block: self[single_block[0]].merge(new_bc[single_block[1]],mode, match_att=match_att, match_function=match_function) return None base_keys = [a[1].block_id for a in self.child_table.items()] block_to_item = base_keys #default new_keys = [a[1].block_id for a in new_bc.child_table.items()] #get list of incoming blocks if match_att: #make a blockname -> item name map if match_function: block_to_item = [match_function(self[a]) for a in self.keys()] else: block_to_item = [self[a].get(match_att[0],None) for a in self.keys()] #print `block_to_item` for key in new_keys: #run over incoming blocknames if key == idblock: continue #skip dictionary id basekey = key #default value if len(match_att)>0: attval = new_bc[key].get(match_att[0],0) #0 if ignoring matching else: attval = 0 for ii in range(len(block_to_item)): #do this way to get looped names thisatt = block_to_item[ii] #keyname in old block #print "Looking for %s in %s" % (attval,thisatt) if attval == thisatt or \ (isinstance(thisatt,list) and attval in thisatt): basekey = base_keys.pop(ii) block_to_item.remove(thisatt) break if not basekey in self or mode=="replace": new_parent = new_bc.get_parent(key) if parent is not None and new_parent is None: new_parent = parent self.NewBlock(basekey,new_bc[key],parent=new_parent) #add the block else: if mode=="strict": raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key)) elif mode=="overlay": # print "Merging block %s with %s" % (basekey,key) self[basekey].merge(new_bc[key],mode,match_att=match_att) else: raise StarError( "Merge called with unknown mode %s" % mode) @ Checking conformance. CIF and STAR standards differ in allowing nested loops and maximum data name lengths. Although the CIF 1.1 standard allows very long lines (2048 characters), data names are still restricted to be no more than 75 characters in length in the CIF standard. <>= def checknamelengths(self,target_block,maxlength=-1): if maxlength < 0: return else: toolong = [a for a in target_block.keys() if len(a)>maxlength] outstring = "" if toolong: outstring = "\n".join(toolong) raise StarError( 'Following data names too long:' + outstring) @ When validating DDL2-type dictionaries against the DDL spec file, we have to be able to see all values of parent data items across all save frames in order to validate parent-child relations (I have inferred this, but if I ever find a standard document this may turn out to be wrong). So this method is provided to return a list of all values taken by the given attribute within all of the blocks inside a block collection. A flat list is returned, even if looped values happen to occur in a data block. This is because the one routine that calls this method is interested in whether or not a given value occurs, rather than how it occurs or what it occurs with. We also remove duplicate values. <>= def get_all(self,item_name): raw_values = [self[a].get(item_name) for a in self.keys()] raw_values = [a for a in raw_values if a != None] ret_vals = [] for rv in raw_values: if isinstance(rv,list): for rvv in rv: if rvv not in ret_vals: ret_vals.append(rvv) else: if rv not in ret_vals: ret_vals.append(rv) return ret_vals @ Writing all this stuff out to a string. We loop over each of the individual sections, getting their string representation. We implement this using the cStringIO module for faster work. Note that the default output comment specifies a CIF 1.1 standard file. Note that child blocks must be save frames, so we hard-code 'save'. If [[self.grammar]] is '2.0', save frames are not nested and table/list delimiters are spaces; if 'STAR2', save frames are nested. We allow the maximum line length to be overridden here although preferably the output length is set when initialising the file. <>= def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None): """Return the contents of this file as a string, wrapping if possible at `wraplength` characters and restricting maximum line length to `maxoutlength`. Delimiters and save frame nesting are controlled by `self.grammar`. If `blockorder` is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. `saves_after` inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.""" if maxoutlength != 0: self.SetOutputLength(maxoutlength) if not comment: comment = self.header_comment outstring = StringIO() if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0": outstring.write(r"#\#CIF_2.0" + "\n") outstring.write(comment) # prepare all blocks for b in self.dictionary.values(): b.set_grammar(self.grammar) b.formatting_hints = self.master_template b.SetOutputLength(wraplength,self.maxoutlength) # loop over top-level # monitor output all_names = list(self.child_table.keys()) #i.e. lower case if blockorder is None: blockorder = self.block_input_order top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None] for blockref,blockname in top_block_names: print('Writing %s, ' % blockname + repr(self[blockref])) outstring.write('\n' + 'data_' +blockname+'\n') all_names.remove(blockref) if self.standard == 'Dic': #put contents before save frames outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application')) if self.grammar == 'STAR2': #nested save frames child_refs = self.get_immediate_children(blockref) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,4) outstring.write('\n' + 'save_'+ '\n') elif self.grammar in ('1.0','1.1','2.0'): #non-nested save frames child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)] for child_ref in child_refs: child_name = self.child_table[child_ref].block_id outstring.write('\n\n' + 'save_' + child_name + '\n') outstring.write(str(self[child_ref])) outstring.write('\n\n' + 'save_' + '\n') all_names.remove(child_ref.lower()) else: raise StarError('Grammar %s is not recognised for output' % self.grammar) if self.standard != 'Dic': #put contents after save frames outstring.write(str(self[blockref])) else: outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application')) returnstring = outstring.getvalue() outstring.close() if len(all_names)>0: print('WARNING: following blocks not output: %s' % repr(all_names)) else: print('All blocks output.') return returnstring def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0): """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children, and syntactically nesting save frames""" child_refs = self.get_immediate_children(block_ref) self[block_ref].set_grammar(self.grammar) if self.standard == 'Dic': outstring.write(str(self[block_ref])) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,indentlevel) outstring.write('\n' + ' '*indentlevel + 'save_' + '\n') if self.standard != 'Dic': outstring.write(str(self[block_ref])) @ Output template. We process the template file and immediately set all blocks to this value. New blocks will not see this template, so we store the template for application after the blocks are created. <>= def SetTemplate(self,template_file): """Use `template_file` as a template for all block output""" self.master_template = process_template(template_file) for b in self.dictionary.values(): b.formatting_hints = self.master_template @ \section{StarFile} If we are passed a filename, we open it and read it in, assuming that it is a conformant STAR file. A StarFile object is a dictionary of StarBlock objects, accessed by block name. Parameter [[maxoutlength]] sets the maximum line size for output. If [[maxoutlength]] is not specified, it defaults to the maximum input length. <>= class StarFile(BlockCollection): <> <> @ When initialising, we add those parts that are unique to the StarFile as opposed to a simple collection of blocks - i.e. reading in from a file, and some line length restrictions. We do not indent this section in the noweb file, so that our comment characters output at the beginning of the line. <>= def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0, scoping='instance',grammar='1.1',scantype='standard', permissive=False,**kwargs): super(StarFile,self).__init__(datasource=datasource,**kwargs) self.my_uri = getattr(datasource,'my_uri','') if maxoutlength == 0: self.maxoutlength = 2048 else: self.maxoutlength = maxoutlength self.scoping = scoping if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"): ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype, maxlength = maxinlength,permissive=permissive) self.header_comment = \ """#\\#STAR ########################################################################## # STAR Format file # Produced by PySTARRW module # # This is a STAR file. STAR is a superset of the CIF file type. For # more information, please refer to International Tables for Crystallography, # Volume G, Chapter 2.1 # ########################################################################## """ @ A function to make sure we have the correct file location <>= def set_uri(self,my_uri): self.my_uri = my_uri @ Reading in a file. We use the Yapps3-generated [[YappsStarParser]] module to provide grammar services. The structure returned from parsing is a StarFile, with possible grammar violations due to duplicate block names. We allow fast reads using the compiled StarScan module by passing the option 'flex' to this routine. We also permit an already-opened stream to be passed to us (thanks to Boris Dusek for this contribution). There are 3 possible syntax variations: very old CIF files allowed unquoted data values to begin with open square brackets, version 1.1 disallowed this, and DDLm-conformant files interpret these as actual bracket expressions. The different grammars are selected by the 'grammar' argument. We allow reading CBF files, which can contain binary sections, by removing all characters found between the strings '-BINARY-FORMAT-SECTION'. This is not a robust approach as this string could theoretically be found in a comment or datavalue. We save our URL for possible later use in finding files relative to the location of this file e.g. with DDLm dictionary imports. <>= def ReadStar(filename,prepared = None, maxlength=-1, scantype='standard',grammar='STAR2',CBF=False, permissive=False): """ Read in a STAR file, returning the contents in the `prepared` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename` will be added to. * `maxlength` is the maximum allowable line length in the input file. This has been set at 2048 characters for CIF but is unlimited (-1) for STAR files. * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None` or `auto`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files. Note that (nested) save frames are read in all grammar variations and then flagged afterwards if they do not match the requested grammar. * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument. * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is excised from the input data stream before parsing and is not available in the returned object. * `permissive` allows non UTF8 encodings (currently only latin1) in the input file. These are a violation of the standard. """ # save desired scoping save_scoping = prepared.scoping from . import YappsStarParser_1_1 as Y11 from . import YappsStarParser_1_0 as Y10 from . import YappsStarParser_2_0 as Y20 from . import YappsStarParser_STAR2 as YST if prepared is None: prepared = StarFile() if grammar == "auto" or grammar is None: try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)] elif grammar == '1.0': try_list = [('1.0',Y10)] elif grammar == '1.1': try_list = [('1.1',Y11)] elif grammar == '2.0': try_list = [('2.0',Y20)] elif grammar == 'STAR2': try_list = [('STAR2',YST)] else: raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar )) if isinstance(filename,(unicode,str)): # create an absolute URL relpath = urlparse(filename) if relpath.scheme == "": if not os.path.isabs(filename): fullpath = os.path.join(os.getcwd(),filename) else: fullpath = filename newrel = list(relpath) newrel[0] = "file" newrel[2] = fullpath my_uri = urlunparse(newrel) else: my_uri = urlunparse(relpath) # print("Full URL is: " + my_uri) filestream = urlopen(my_uri) try: text = filestream.read().decode('utf-8-sig') except UnicodeDecodeError: if permissive: text = filestream.read().decode('latin1') print("WARNING: %s violates standard (latin1 encoding instead of UTF8)." % filename) else: raise StarError("%s: bad encoding (must be utf8 or ascii)" % filename) filestream.close() else: filestream = filename #already opened for us text = filestream.read() if not isinstance(text,unicode): try: text = text.decode('utf-8-sig') #CIF is always ascii/utf8 except UnicodeDecodeError: if permissive: text = filestream.read().decode('latin1') print("WARNING: text violates CIF standard (latin1 encoding instead of UTF8)") else: raise StarError("Bad input encoding (must be utf8 or ascii)") my_uri = "" if not text: # empty file, return empty block return prepared.set_uri(my_uri) # filter out non-ASCII characters in CBF files if required. We assume # that the binary is enclosed in a fixed string that occurs # nowhere else. if CBF: text_bits = text.split("-BINARY-FORMAT-SECTION-") text = text_bits[0] for section in range(2,len(text_bits),2): text = text+" (binary omitted)"+text_bits[section] # we recognise ctrl-Z as end of file endoffile = text.find(chr(26)) if endoffile >= 0: text = text[:endoffile] split = text.split('\n') if maxlength > 0: toolong = [a for a in split if len(a)>maxlength] if toolong: pos = split.index(toolong[0]) raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength)) # honour the header string if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list: try_list.remove(('2.0',Y20),) if not try_list: raise StarError('File %s missing CIF2.0 header' % (filename)) for grammar_name,Y in try_list: if scantype == 'standard' or grammar_name in ['2.0','STAR2']: parser = Y.StarParser(Y.StarParserScanner(text)) else: parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex')) # handle encoding switch if grammar_name in ['2.0','STAR2']: prepared.set_characterset('unicode') else: prepared.set_characterset('ascii') proto_star = None try: proto_star = getattr(parser,"input")(prepared) except Y.yappsrt.YappsSyntaxError as e: input = parser._scanner.input Y.yappsrt.print_error(input, e, parser._scanner) except Y.yappsrt.NoMoreTokens: print('Could not complete parsing; stopped around here:',file=sys.stderr) print(parser._scanner,file=sys.stderr) except ValueError: print('Unexpected error:') import traceback traceback.print_exc() if proto_star is not None: proto_star.set_grammar(grammar_name) #remember for output break if proto_star is None: errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner ) raise StarError( errorstring) # set visibility correctly proto_star.scoping = 'dictionary' proto_star.set_uri(my_uri) proto_star.scoping = save_scoping return proto_star @ \section{Dictionaries} If a dictionary is attached to a StarBlock, we can use it to provide automatic value conversion whenever a value is retrieved. <>= <> @ In DDL1 and DDL2, there is not a whole lot of point associating a DDL dictionary with a CIF file in an ongoing way. However, with DDLm the dictionary can be used when searching for attributes, so is no longer simply a checking mechanism but is now also a generative mechanism. So there are advantages to making this assignment for DDLm. If we are passed a non-DDLm dictionary, we ignore the request as there is nothing we can do with it outside the normal validity checking, for which a different routine is in place. Having a dictionary in place also implies that values that are returned are automatically converted to the type given in the dictionary. <>= def assign_dictionary(self,dic): if not dic.diclang=="DDLm": print("Warning: ignoring dictionary %s" % dic.my_uri) return self.dictionary = dic def unassign_dictionary(self): """Remove dictionary-dependent behaviour""" self.dictionary = None @ \section{Collection datatype} DDLm introduced data values which could be lists, tuples or hash tables. We define a distinct StarList class to distinguish them from loop lists, and take the opportunity to expand the getitem method to allow multiple arguments. <>= class StarList(list): def __getitem__(self,args): if isinstance(args,(int,slice)): return super(StarList,self).__getitem__(args) elif isinstance(args,tuple) and len(args)>1: #extended comma notation return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:]) else: return super(StarList,self).__getitem__(args[0]) def __str__(self): return "SL("+super(StarList,self).__str__() + ")" class StarDict(dict): pass @ \section{Loop Block class} A LoopBlock is provided as a row-based interface to a collection of columns, so that iteration over packets is possible. It is initialised with a StarBlock object and dataname and returns an object that accesses the loop containing the dataname. Datavalues is not copied, meaning that changes to the data (e.g. appending a packet) will be apparent in the StarBlock parent (i.e the LoopBlock is like a view onto the parent). <>= class LoopBlock(object): <> <> <> <> <> <> <> <> <> <> @ Initialising: We do not check conformance to standards here: it assumed that this has been done by the creating routine. <>= def __init__(self,parent_block,dataname): self.loop_no = parent_block.FindLoop(dataname) if self.loop_no < 0: raise KeyError('%s is not in a loop structure' % dataname) self.parent_block = parent_block <>= def keys(self): return self.parent_block.loops[self.loop_no] def values(self): return [self.parent_block[a] for a in self.keys()] #Avoid iterator even though that is Python3-esque def items(self): return list(zip(self.keys(),self.values())) def __getitem__(self,dataname): if isinstance(dataname,int): #a packet request return self.GetPacket(dataname) if dataname in self.keys(): return self.parent_block[dataname] else: raise KeyError('%s not in loop block' % dataname) def __setitem__(self,dataname,value): self.parent_block[dataname] = value self.parent_block.AddLoopName(self.keys()[0],dataname) def __contains__(self,key): return key in self.parent_block.loops[self.loop_no] def has_key(self,key): return key in self def __iter__(self): packet_list = zip(*self.values()) names = self.keys() for p in packet_list: r = StarPacket(p) for n in range(len(names)): setattr(r,names[n].lower(),r[n]) yield r # for compatibility def __getattr__(self,attname): return getattr(self.parent_block,attname) @ Packets. We store columns, so extracting packets is a much slower task. <>= <> <> <> @ A StarPacket object looks very much like a list, in order to support the DDLm semantics of allowing a particular value to be accessed by attribute. DDLm also allows merged categories, which means that a packet can contain datanames from the appropriate sub-categories. Furthermore, a StarPacket can derive missing values by calling the appropriate dREL function. To do this, we store the key name used to create the packet. Note that all attributes must be lower case in order to meet the caseless matching required by the STAR/CIF standards. <>= class StarPacket(list): def merge_packet(self,incoming): """Merge contents of incoming packet with this packet""" new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"] self.extend(incoming) for na in new_attrs: setattr(self,na,getattr(incoming,na)) def __getattr__(self,att_name): """Derive a missing attribute""" if att_name.lower() in self.__dict__: return getattr(self,att_name.lower()) if att_name in ('cif_dictionary','fulldata','key'): raise AttributeError('Programming error: can only assign value of %s' % att_name) d = self.cif_dictionary c = self.fulldata k = self.key assert isinstance(k,list) d.derive_item(att_name,c,store_value=True) # # now pick out the new value # self.key is a list of the key values keydict = dict([(v,(getattr(self,v),True)) for v in k]) full_pack = c.GetCompoundKeyedPacket(keydict) return getattr(full_pack,att_name) @ Get nth looped packet. This returns a packet of data. <>= def GetPacket(self,index): thispack = StarPacket([]) for myitem in self.parent_block.loops[self.loop_no]: thispack.append(self[myitem][index]) setattr(thispack,myitem,thispack[-1]) return thispack @ Adding a packet. We are passed a StarPacket object, which is just a list which is accessible by attribute. As I have not yet produced a proper __init__ or __new__ method to allow creation of a new StarPacket, it is advisable to create a new packet by copying an old packet. <>= def AddPacket(self,packet): for myitem in self.parent_block.loops[self.loop_no]: old_values = self.parent_block[myitem] old_values.append(packet.__getattribute__(myitem)) self.parent_block[myitem] = old_values @ Return order of items - this is just a copy of the list of datanames making up this loop. <>= def GetItemOrder(self): """Return a list of datanames in this `LoopBlock` in the order that they will be printed""" return self.parent_block.loops[self.loop_no][:] @ Move an item to a different position in the loop. This only affects the printout order. We allow different capitalisation and have to absorb the possibility of nested loops in the order list, and being passed a loop reference in the [[itemname]] argument. <>= def ChangeItemOrder(self,itemname,newpos): """Change the position at which `itemname` appears when printing out to `newpos`.""" self.parent_block.loops[self.loop_no].remove(itemname.lower()) self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower()) @ Get co-looped names. Sometimes we just want names, and will get the values ourselves on a need-to-know basis. <>= def GetLoopNames(self,keyname): if keyname in self: return self.keys() for aloop in self.loops: try: return aloop.GetLoopNames(keyname) except KeyError: pass raise KeyError('Item does not exist') @ Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in [[data]], which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in [[AddLoopItem]], when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance. <>= def AddToLoop(self,dataname,loopdata): thisloop = self.GetLoop(dataname) for itemname,itemvalue in loopdata.items(): thisloop[itemname] = itemvalue @ \section{Star Block class} A Star Block is no longer simply a LoopBlock. Historically it was distinguished by holding save frames, but this has been removed. Development note: in the original implementation, a StarBlock was just a special type of LoopBlock. In our new implementation, a LoopBlock is a simple structure that is created to access loops in a certain way. The other difference between LoopBlocks and StarBlocks is that the latter can have a dictionary attached, whereas inner LoopBlocks should not. <>= class StarBlock(object): <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> <> @ Initialising a StarBlock. If given non-zero data to initialise the block with, we either copy (if it is a dictionary) or else initialise each key-value pair separately (if tuples). We take care to include our special "loop" key if it is not in the supplied dictionary, but apart from this we make no check of the actual conformance of the dictionary items. To maximise efficiency, we store all keys as lower case, and keep a table of key vs the actual supplied capitalisation for printout. The [[overwrite]] argument allows values to be silently replaced, as per a normal python dictionary. However, when reading in from a file, we want to detect duplicated values, so we set this to false. As DDLm introduces the unicode character set, we need to indicate which character set we are prepared to accept. We store the data in [[self.block]]. Each entry in this table is a tuple with first element the string value, and second element the corresponding calculated or actual value. We use a tuple to emphasise that both values need to be changed together. Formatting hints are used on output to suggest column positions for looped datanames and delimiters. In practice these are used only for dictionaries where fine-tuned layout is helpful for human readers. We provide a simple function to change the maximum name length, so that we can read in a StarBlock and then enforce that the names are a maximum length as required by CIF. Values calculated with a dictionary are cached by setting self.cache_vals to True. <>= def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True, characterset='ascii',maxnamelength=-1): self.block = {} #the actual data storage (lower case keys) self.loops = {} #each loop is indexed by a number and contains a list of datanames self.item_order = [] #lower case, loops referenced by integer self.formatting_hints = {} self.true_case = {} #transform lower case to supplied case self.provide_value = False #prefer string version always self.dictionary = None #DDLm dictionary self.popout = False #used during load iteration self.curitem = -1 #used during iteration self.cache_vals = True #store all calculated values self.maxoutlength = maxoutlength self.setmaxnamelength(maxnamelength) #to enforce CIF limit of 75 characters self.set_characterset(characterset) #to check input names self.wraplength = wraplength self.overwrite = overwrite self.string_delimiters = ["'",'"',"\n;"] #universal CIF set self.list_delimiter = " " #CIF2 default self.wrapper = textwrap.TextWrapper() if isinstance(data,(tuple,list)): for item in data: self.AddLoopItem(item) elif isinstance(data,StarBlock): self.block = data.block.copy() self.item_order = data.item_order[:] self.true_case = data.true_case.copy() # loops as well self.loops = data.loops.copy() def setmaxnamelength(self,maxlength): """Set the maximum allowable dataname length (-1 for no check)""" self.maxnamelength = maxlength if maxlength > 0: bad_names = [a for a in self.keys() if len(a)>self.maxnamelength] if len(bad_names)>0: raise StarError('Datanames too long: ' + repr( bad_names )) def set_characterset(self,characterset): """Set the characterset for checking datanames: may be `ascii` or `unicode`""" self.characterset = characterset if characterset == 'ascii': self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) elif characterset == 'unicode': if sys.maxunicode < 1114111: self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M) else: self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M) @ Adding emulation of a mapping type. We add any of the other functions we would like to emulate. [[__len__]] returns the number of items in this block, either in a loop or not. So it is not the simple length of the dictionary. A Star Block can hold save frames in the outermost loop. From version 4.05 we do not allow save frames to be set from within the block; rather, an enclosing block collection should be created (e.g. a Star File) and the save frame added to that block collection with the 'enclosing' StarBlock set as its parent. We catch the saves key and print an error message to show deprecation. <>= def __str__(self): return self.printsection() def __setitem__(self,key,value): if key == "saves": raise StarError("""Setting the saves key is deprecated. Add the save block to an enclosing block collection (e.g. CIF or STAR file) with this block as child""") self.AddItem(key,value) def __getitem__(self,key): if key == "saves": raise StarError("""The saves key is deprecated. Access the save block from the enclosing block collection (e.g. CIF or STAR file object)""") try: rawitem,is_value = self.GetFullItemValue(key) except KeyError: if self.dictionary: # send the dictionary the required key and a pointer to us try: new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False) except StarDerivationFailure: #try now with defaults included try: new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True) except StarDerivationFailure as s: print("In StarBlock.__getitem__, " + repr(s)) raise KeyError('No such item: %s' % key) print('Set %s to derived value %s' % (key, repr(new_value))) return new_value else: raise KeyError('No such item: %s' % key) # we now have an item, we can try to convert it to a number if that is appropriate # note numpy values are never stored but are converted to lists if not self.dictionary or not key in self.dictionary: return rawitem print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem ))) if is_value: if self.provide_value: return rawitem else: print('Turning %s into string' % repr( rawitem )) return self.convert_to_string(key) else: # a string if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \ (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)): return self.dictionary.change_type(key,rawitem) elif self.provide_value: # catch the question marks do_calculate = False if isinstance(rawitem,(list,tuple)): known = [a for a in rawitem if a != '?'] if len(known) == 0: #all questions do_calculate = True elif rawitem == '?': do_calculate = True if do_calculate: # remove old value del self[key] try: new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False) except StarDerivationFailure as s: try: new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True) except StarDerivationFailure as s: print("Could not turn %s into a value:" + repr(s)) return rawitem else: print('Set %s to derived value %s' % (key, repr( new_value ))) return new_value return rawitem #can't do anything def __delitem__(self,key): self.RemoveItem(key) def __len__(self): blen = len(self.block) return blen def __nonzero__(self): if self.__len__() > 0: return 1 return 0 # keys returns all internal keys def keys(self): return list(self.block.keys()) #always lower case def values(self): return [self[a] for a in self.keys()] def items(self): return list(zip(self.keys(),self.values())) def __contains__(self,key): if isinstance(key,(unicode,str)) and key.lower() in self.keys(): return True return False def has_key(self,key): return key in self def has_key_or_alias(self,key): """Check if a dataname or alias is available in the block""" initial_test = key in self if initial_test: return True elif self.dictionary: aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)] if len(aliases)>0: return True return False def get(self,key,default=None): if key in self: retval = self.__getitem__(key) else: retval = default return retval def clear(self): self.block = {} self.loops = {} self.item_order = [] self.true_case = {} # doesn't appear to work def copy(self): newcopy = StarBlock() newcopy.block = self.block.copy() newcopy.loops = [] newcopy.item_order = self.item_order[:] newcopy.true_case = self.true_case.copy() newcopy.loops = self.loops.copy() # return self.copy.im_class(newcopy) #catch inheritance return newcopy def update(self,adict): for key in adict.keys(): self.AddItem(key,adict[key]) @ This method is used when printing out, which is why it takes both names and numbers. <>= def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos @ This routine moves around the order of objects in the printout. We can only move an item within the loop in which it appears. <>= def ChangeItemOrder(self,itemname,newpos): """Move the printout order of `itemname` to `newpos`. If `itemname` is in a loop, `newpos` refers to the order within the loop.""" if isinstance(itemname,(unicode,str)): true_name = itemname.lower() else: true_name = itemname loopno = self.FindLoop(true_name) if loopno < 0: #top level self.item_order.remove(true_name) self.item_order.insert(newpos,true_name) else: self.loops[loopno].remove(true_name) self.loops[loopno].insert(newpos,true_name) <>= def GetItemOrder(self): """Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index""" return self.item_order[:] @ Adding a data item. We check for consistency, by making sure the new item is not in the block already. If it is, we replace it (consistent with the meaning of square brackets in Python), unless [[self.overwrite]] is False, in which case an error is raised. We skip checking of data values if the [[precheck]] value is true- this is typically set if the item is being read from a file, and so is already checked, or will be checked in bulk at the end. Note that all strings are stored internally as unicode. <>= def AddItem(self,key,value,precheck=False): """Add dataname `key` to block with value `value`. `value` may be a single value, a list or a tuple. If `precheck` is False (the default), all values will be checked and converted to unicode strings as necessary. If `precheck` is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.""" if not isinstance(key,(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( key )) key = unicode(key) #everything is unicode internally if not precheck: self.check_data_name(key,self.maxnamelength) # make sure no nasty characters # check for overwriting if key in self: if not self.overwrite: raise StarError( 'Attempt to insert duplicate item name %s' % key) if not precheck: #need to sanitise regval,empty_val = self.regularise_data(value) pure_string = check_stringiness(regval) self.check_item_value(regval) else: regval,empty_val = value,None pure_string = True # update ancillary information first lower_key = key.lower() if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order self.item_order.append(lower_key) # always remove from our case table in case the case is different try: del self.true_case[lower_key] except KeyError: pass self.true_case[lower_key] = key if pure_string: self.block.update({lower_key:[regval,empty_val]}) else: self.block.update({lower_key:[empty_val,regval]}) @ This is the original routine for adding a loop item, left in for consistency with old versions. Do not use. <>= def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): """*Deprecated*. Use `AddItem` followed by `CreateLoop` if necessary.""" # print "Received data %s" % `incomingdata` # we accept tuples, strings, lists and dicts!! # Direct insertion: we have a string-valued key, with an array # of values -> single-item into our loop if isinstance(incomingdata[0],(tuple,list)): # a whole loop keyvallist = zip(incomingdata[0],incomingdata[1]) for key,value in keyvallist: self.AddItem(key,value) self.CreateLoop(incomingdata[0]) elif not isinstance(incomingdata[0],(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) else: self.AddItem(incomingdata[0],incomingdata[1]) @ Checking the data names. The CIF 1.1 standard restricts characters in a data name to ASCII 33-126 and there should be a leading underscore. Items are allowed to have the blank characters as well, i.e. ascii 09,10,13 and 32. Data items may be lists, which we need to detect before checking. We assume that the item has been regularised before this check is called. The CIF2 standard allows all of Unicode, with certain blocks disallowed. The removal of the disallowed characters takes place on file read. We have the name length as a separate call as file reading will automatically produce datanames with the correct syntax, so during file reading we do not require any checking, but we do still need to check name length. <>= def check_data_name(self,dataname,maxlength=-1): if maxlength > 0: self.check_name_length(dataname,maxlength) if dataname[0]!='_': raise StarError( 'Dataname ' + dataname + ' does not begin with _') if self.characterset=='ascii': if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') else: # print 'Checking %s for unicode characterset conformance' % dataname if len ([a for a in dataname if ord(a) < 33]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)') if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)') if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)') if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)') if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)') if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0: print('%s fails' % dataname) for a in dataname: print('%x' % ord(a),end="") print() raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)') def check_name_length(self,dataname,maxlength): if len(dataname)>maxlength: raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) return <>= def check_item_value(self,item): test_item = item if not isinstance(item,(list,dict,tuple)): test_item = [item] #single item list def check_one (it): if isinstance(it,unicode): if it=='': return me = self.char_check.match(it) if not me: print("Fail value check: %s" % it) raise StarError('Bad character in %s' % it) else: if me.span() != (0,len(it)): print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it ))) raise StarError('Data item "' + repr( it ) + u'"... contains forbidden characters') [check_one(a) for a in test_item] @ Regularising data. We want the copy.deepcopy operation to work, so we cannot have any arrays passed into the master dictionary. We make sure everything goes in either as a single item or as a dict/list/tuple. We provide an empty datavalue with the same structure as the returned value so that the value/string alternate is correctly initialised/reset. Note that all string data should be Unicode. To maintain compatibility for Python 2 we apply Unicode to any string data. <>= def regularise_data(self,dataitem): """Place dataitem into a list if necessary""" from numbers import Number if isinstance(dataitem,str): return unicode(dataitem),None if isinstance(dataitem,(Number,unicode,StarList,StarDict)): return dataitem,None #assume StarList/StarDict contain unicode if necessary if isinstance(dataitem,(tuple,list)): v,s = zip(*list([self.regularise_data(a) for a in dataitem])) return list(v),list(s) #return dataitem,[None]*len(dataitem) # so try to make into a list try: regval = list(dataitem) except TypeError as value: raise StarError( str(dataitem) + ' is wrong type for data value\n' ) v,s = zip(*list([self.regularise_data(a) for a in regval])) return list(v),list(s) @ Dimension of data. This would ordinarily be the number of nested levels, and if we have a naked string, we have to return zero. We recursively burrow down to the lowest level. If a list is of zero length, we cannot burrow any further, so simply return one more than the current level. We return as well the length of the received packet. Note that we consider dataitems which are *not* tuples or lists to be primitive. This includes StarLists (which are a single data item) and numpy arrays. Unfortunately this means we have to use the ungainly check involving the __class__ property, as StarLists and Tuples are subclasses of list and tuple and will therefore count as instances of them. In the context of DDLm it is probably more elegant to define a special class for looped data rather than for primitive lists as data items. This is a method of the module, rather than belonging to any particular class. <>= def get_dim(dataitem,current=0,packlen=0): zerotypes = [int, float, str] if type(dataitem) in zerotypes: return current, packlen if not dataitem.__class__ == ().__class__ and \ not dataitem.__class__ == [].__class__: return current, packlen elif len(dataitem)>0: # print "Get_dim: %d: %s" % (current,`dataitem`) return get_dim(dataitem[0],current+1,len(dataitem)) else: return current+1,0 @ Numpy arrays are more difficult to check as they don't seem to implement automatic Python-style iteration (at least matrices don't). So we have to pick up this case while attempting to make dependence on Numpy optional. <>= def check_stringiness(data): """Check that the contents of data are all strings""" if not hasattr(data,'dtype'): #so not Numpy from numbers import Number if isinstance(data,Number): return False elif isinstance(data,(unicode,str)): return True elif data is None:return False #should be data are None :) else: for one_item in data: if not check_stringiness(one_item): return False return True #all must be strings else: #numerical python import numpy if data.ndim == 0: #a bare value if data.dtype.kind in ['S','U']: return True else: return False else: for one_item in numpy.nditer(data): print('numpy data: ' + repr( one_item )) if not check_stringiness(one_item): return False return True @ Removing a data item. We delete the item, and if it is looped, and nothing is left in the loop, we remove the loop. [[RemoveLoopItem]] is here for compatibility only. <>= def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey) def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname) @ Returning an item value. Note that a looped block has little meaning without all the items in the loop. Routine [[GetLoop]] is better in this case. This is a real time-intensive loop, so we initially assume that the key we have been passed is the right key (i.e. case is the same) and only check for case if this fails. We define an alternative call that returns both the stored value and whether or not it is a non-string value. This saves other routines performing the same check. But any StarLists are considered to be unready for use as values as they may in fact be Arrays or Matrices and therefore require their type to be changed. Note that if the value is '?', or a list of '?', we could delete the dataitem altogether, however that would lead to inconsistencies with previous calls to has_key, keys() etc. <>= def GetItemValue(self,itemname): """Return value of `itemname`. If `itemname` is looped, a list of all values will be returned.""" return self.GetFullItemValue(itemname)[0] def GetFullItemValue(self,itemname): """Return the value associated with `itemname`, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and `StarList` objects.""" try: s,v = self.block[itemname.lower()] except KeyError: raise KeyError('Itemname %s not in datablock' % itemname) # prefer string value unless all are None # are we a looped value? if not isinstance(s,(tuple,list)) or isinstance(s,StarList): if not_none(s): return s,False #a string value else: return v,not isinstance(v,StarList) #a StarList is not calculation-ready elif not_none(s): return s,False #a list of string values else: if len(v)>0: return v,not isinstance(v[0],StarList) return v,True @ A StarBlock allows dealing with loops on a columnar level. For row-based operations, a LoopBlock can be created with GetLoop and iterated over. <>= <> <> <> <> <> <> <> <> <> <> <> <> @ Creating loops. In the latest version of PyCIFRW, a loop is simply a collection of datanames that together make up the loop. It is indexed by a number, which goes into the item_order array to produce the loop when printing out. No check of dataname existence is done, so that a loop can be created before the datanames are provided. In order to iterate over loop packets, a LoopBlock needs to be created subsequently. When we create the loop, we remove the datanames from the item order list to prevent them being output twice, and we also remove them from any other loop. Thus, at any point in time, a dataname belongs to only one loop, but can be switched to another loop trivially. <>= def CreateLoop(self,datanames,order=-1,length_check=True): """Create a loop in the datablock. `datanames` is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If `order` is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.""" if length_check: # check lengths: these datanames should exist listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] if len(listed_values) == len(datanames): len_set = set([len(self[a]) for a in datanames]) if len(len_set)>1: raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) elif len(listed_values) != 0: raise ValueError('Request to loop datanames where some are single values and some are not') # store as lower case lc_datanames = [d.lower() for d in datanames] # remove these datanames from all other loops [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] # remove empty loops empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] for a in empty_loops: self.item_order.remove(a) del self.loops[a] if len(self.loops)>0: loopno = max(self.loops.keys()) + 1 else: loopno = 1 self.loops[loopno] = list(lc_datanames) if order >= 0: self.item_order.insert(order,loopno) else: self.item_order.append(loopno) # remove these datanames from item ordering self.item_order = [a for a in self.item_order if a not in lc_datanames] @ Removing a loop. The looped names are not removed, but will cause chaos on output unless they are placed into a different loop or deleted. <>= def remove_loop(self,oldloop): """Remove loop referenced by [[oldloop]]. Datanames remain in the structure and should be removed separately if necessary""" # print "Removing %s: item_order %s" % (`oldloop`,self.item_order) # print "Length %d" % len(oldloop) self.item_order.remove(oldloop) self.loops.remove(oldloop) @ Adding a dataname that has already been set to a loop. While relatively trivial, we still need to check that it does not exist in any other loops, and remove this dataname from the item order if it is present. We always use the canonical lower-case form. Also, the access to self[oldname] may trigger a round of evaluation, which we wish to avoid, so we make sure to switch off calculations in this case. <>= def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise StarLengthError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass @ Loops. We should distinguish two loop structures: the loop structures provided by the syntax, and the loop structures defined by the dictionary ('semantic' loops). The members of these loops do not coincide for 'joined' categories, where datanames may appear in either separate loops, or within one loop. Until we have a dictionary, we have no way to find the semantic loops. The first function below returns the particular loop block containing the specified dataname, so that we can manipulate its contents directly, and therefore refers to a syntactic loop. <>= def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname) <>= def FindLoop(self,keyname): """Find the loop that contains `keyname` and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.""" loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] if len(loop_no)>0: return loop_no[0] else: return -1 @ Get co-looped names. Sometimes we just want names, and will get the values ourselves on a need-to-know basis. <>= def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname) @ Adding to a loop. We find the loop containing the dataname that we have been passed, and then append all of the (key,values) pairs that we are passed in [[data]], which is a dictionary. We expect that the data have been sorted out for us, unlike when data are passed in [[AddLoopItem]], when there can be both unlooped and looped data in one set. The dataname passed to this routine is simply a convenient way to refer to the loop, and has no other significance. <>= def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" self.update(loopdata) for one_name in loopdata: self.AddLoopName(dataname,one_name) @ The draft DDLm specification uses square brackets next to a pre-specified identifier to mean "the packet of this category for which the key equals this item". We implement a function which fullfils this role for use in the pythonised dREL script. At this StarFile level we have no idea as to which data name is the key, so that is passed to us from the dictionary processing layer. Note we assume a single key rather than multiple keys for this call, and let the calling layer handle multiple or missing packets. We guarantee to return a single packet, or else raise a ValueError. <>= def GetKeyedPacket(self,keyname,keyvalue,no_case=False): """Return the loop packet (a `StarPacket` object) where `keyname` has value `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" my_loop = self.GetLoop(keyname) #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) #print('Packet check on:' + keyname) #[print(repr(getattr(a,keyname))) for a in my_loop] if no_case: one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] else: one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] if len(one_pack)!=1: raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) print("Keyed packet: %s" % one_pack[0]) return one_pack[0] @ The current version of DDLm allows compound keys. We implement a routine to return a single packet corresponding to the values of the specified datanames. <>= def GetCompoundKeyedPacket(self,keydict): """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) keynames = list(keydict.keys()) my_loop = self.GetLoop(keynames[0]) for one_key in keynames: keyval,no_case = keydict[one_key] if no_case: my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) else: my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) if len(my_loop)!=1: raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) print("Compound keyed packet: %s" % my_loop[0]) return my_loop[0] @ Semantic loops. These are loops defined by a dictionary, as opposed to the syntax. dREL requires us to be able to extract a packet by key, and then attributes of this packet are the individual objects that are found in that category, regardless of whether they co-occur in one loop or child loops. We use the dictionary ``cat_key_table'' to give us a list of keys for each category. We find the corresponding loops, extract any packets meeting the key requirements, and merge these packets. A packet for dREL use will need to be able to derive further values using the dictionary, e.g. when an attribute of that packet is requested. In order to do this derivation, we need to store the key names and values, so that the __getattr__ method of the packet can properly derive the needed non-key values. With a deriving dictionary we run the danger that we will generate keys for a child category for which no other values are defined. Such keys are pointless as the only information we have is that they come from the parent category, and so they can only be copies of the parent key, and therefore the child category is identical to the parent category as it has the same keys. We therefore do not generate keys of child categories; if child category items are present, then the key should already be present. ON the other hand, if the child category keys are present but the parent keys are missing, then we in principle know that the child keys are a subset of the parent keys, but we cannot use the key to derive any values, as the keys are opaque. The final DDLm specification allowed compound keys for categories. When combined with child categories, this means that a child key may be absent but its parent key may be present and is considered equivalent. <>= def GetMultiKeyedSemanticPacket(self,keydict,cat_id): """Return a complete packet for category `cat_id` where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from the requested category and any children.""" #if len(keyvalues)==1: #simplification # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) target_keys = self.dictionary.cat_key_table[cat_id] # update the dictionary passed to us with all equivalents, for # simplicity. parallel_keys = list(zip(*target_keys)) #transpose print('Parallel keys:' + repr(parallel_keys)) print('Keydict:' + repr(keydict)) start_keys = list(keydict.keys()) for one_name in start_keys: key_set = [a for a in parallel_keys if one_name in a] for one_key in key_set: keydict[one_key] = keydict[one_name] # target_keys is a list of lists, each of which is a compound key p = StarPacket() # a little function to return the dataname for a key def find_key(key): for one_key in self.dictionary.key_equivs.get(key,[])+[key]: if self.has_key(one_key): return one_key return None for one_set in target_keys: #loop down the categories true_keys = [find_key(k) for k in one_set] true_keys = [k for k in true_keys if k is not None] if len(true_keys)==len(one_set): truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) try: extra_packet = self.GetCompoundKeyedPacket(truekeydict) except KeyError: #one or more are missing continue #should try harder? except ValueError: continue else: continue print('Merging packet for keys ' + repr(one_set)) p.merge_packet(extra_packet) # the following attributes used to calculate missing values p.key = true_keys p.cif_dictionary = self.dictionary p.fulldata = self return p @ Plain single key. This is the older routine where we assume that we only have a single key per category. We still have to put the single key into a list as the __getattr__ method of the StarPacket will assume that it has been passed a list of keys. <>= def GetKeyedSemanticPacket(self,keyvalue,cat_id): """Return a complete packet for category `cat_id` where the category key for the category equals `keyvalue`. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from both categories.""" target_keys = self.dictionary.cat_key_table[cat_id] target_keys = [k[0] for k in target_keys] #one only in each list p = StarPacket() # set case-sensitivity flag lcase = False if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: lcase = True for cat_key in target_keys: try: extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except KeyError: #missing key try: test_key = self[cat_key] #generate key if possible print('Test key is %s' % repr( test_key )) if test_key is not None and\ not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): print('Getting packet for key %s' % repr( keyvalue )) extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except: #cannot be generated continue except ValueError: #none/more than one, assume none continue #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) p.merge_packet(extra_packet) # the following attributes used to calculate missing values for keyname in target_keys: if hasattr(p,keyname): p.key = [keyname] break if not hasattr(p,"key"): raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) p.cif_dictionary = self.dictionary p.fulldata = self return p @ We might also want to remove a packet by key. We operate on the data in place, and need access to the low-level information as we have to remove both the string and value elements. <>= def RemoveKeyedPacket(self,keyname,keyvalue): """Remove the packet for which dataname `keyname` takes value `keyvalue`. Only the first such occurrence is removed.""" packet_coord = list(self[keyname]).index(keyvalue) loopnames = self.GetLoopNames(keyname) for dataname in loopnames: self.block[dataname][0] = list(self.block[dataname][0]) del self.block[dataname][0][packet_coord] self.block[dataname][1] = list(self.block[dataname][1]) del self.block[dataname][1][packet_coord] @ \section{Output} The philosophy of outputting strings is to create a StringIO object, and pass this between all the routines. As there are specific rules about when a new line can occur (especially concerning semicolon-delimited strings) we subclass StringIO and fiddle with the write method. The [[grammar]] attribute is consulted to determine what output grammar to use. <>= <> <> <> <> <> <> <> <> <> <> <> @ We adjust the write method to intelligently output lines, taking care with CIF/STAR rules for output. We allow the caller to specify: (1) a line break prior to output (e.g. for a new dataname) (2) a tab stepsize, in which case we try to pad out to this value (3) that we can do a line break if we wish (4) moving to a nested indent level, starting from the current position (5) Whether or not to align the next item with the tab stops (6) The column that this item should start at. If we are past this column, it is ignored. We never insert newlines inside supplied strings. Tabs are applied after any requested line breaks, and both are applied before the next item is output. If the character is flagged as a delimiter, it is only output if the previous character is not a delimiter or if the next character will be a line break. After adding any line breaks and/or tab stops, we recognise the following situations: (1) The supplied string does not overflow the line: we output, and update the length of the current line (2) The supplied string does overflow the line. (i) If we are allowed to break, we output a linefeed, and then the string. (ii) Otherwise, we output the string (3) The supplied string contains linefeeds: we update the current line length according to the number of characters from the beginning of the line. <>= class CIFStringIO(StringIO): def __init__(self,target_width=80,**kwargs): StringIO.__init__(self,**kwargs) self.currentpos = 0 self.target_width = target_width self.tabwidth = -1 self.indentlist = [0] self.last_char = "" def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False, delimiter=False,startcol=-1): """Write a string with correct linebreak, tabs and indents""" # do we need to break? if delimiter: if len(outstring)>1: raise ValueError('Delimiter %s is longer than one character' % repr( outstring )) output_delimiter = True if mustbreak: #insert a new line and indent temp_string = '\n' + ' ' * self.indentlist[-1] StringIO.write(self,temp_string) self.currentpos = self.indentlist[-1] self.last_char = temp_string[-1] if self.currentpos+len(outstring)>self.target_width: #try to break if not delimiter and outstring[0]!='\n': #ie ; if canbreak: temp_string = '\n' + ' ' * self.indentlist[-1] StringIO.write(self,temp_string) self.currentpos = self.indentlist[-1] self.last_char = temp_string[-1] else: #assume a break will be forced on next value output_delimiter = False #the line break becomes the delimiter #try to match requested column if startcol > 0: if self.currentpos < startcol: StringIO.write(self,(startcol - self.currentpos)* ' ') self.currentpos = startcol self.last_char = ' ' else: print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos)) startcol = -1 #so that tabbing works as a backup #handle tabs if self.tabwidth >0 and do_tab and startcol < 0: next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop) if self.currentpos < next_stop: StringIO.write(self,(next_stop-self.currentpos)*' ') self.currentpos = next_stop self.last_char = ' ' #calculate indentation after tabs and col setting applied if newindent: #indent by current amount if self.indentlist[-1] == 0: #first time self.indentlist.append(self.currentpos) # print 'Indentlist: ' + `self.indentlist` else: self.indentlist.append(self.indentlist[-1]+2) elif unindent: if len(self.indentlist)>1: self.indentlist.pop() else: print('Warning: cannot unindent any further') #check that we still need a delimiter if self.last_char in [' ','\n','\t']: output_delimiter = False #now output the string - every invocation comes through here if (delimiter and output_delimiter) or not delimiter: StringIO.write(self,outstring) last_line_break = outstring.rfind('\n') if last_line_break >=0: self.currentpos = len(outstring)-last_line_break else: self.currentpos = self.currentpos + len(outstring) #remember the last character if len(outstring)>0: self.last_char = outstring[-1] def set_tab(self,tabwidth): """Set the tab stop position""" self.tabwidth = tabwidth @ For non-default output lengths, we include a function which will set the internal attribute that controls maximum line length. As this is a per-block value, this function is most likely called by the StarFile object rather than directly. Two values control output line formatting: [[self.wraplength]] and [[self.maxoutlength]]. [[self.wraplength]] is the value at which the line will be wrapped normally, but long strings will not force an internal wrap inside the string; [[self.maxoutlength]] is the absolute maximum length. <>= def SetOutputLength(self,wraplength=80,maxoutlength=2048): """Set the maximum output line length (`maxoutlength`) and the line length to wrap at (`wraplength`). The wrap length is a target only and may not always be possible.""" if wraplength > maxoutlength: raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) self.wraplength = wraplength self.maxoutlength = maxoutlength @ Setting up the output grammar. The output grammar determines the list delimiters for CIF2/STAR2, and the available delimiters for 1.0/1.1/2.0, as well as the allowed characters <>= def set_grammar(self,new_grammar): self.string_delimiters = ["'",'"',"\n;",None] if new_grammar in ['STAR2','2.0']: self.string_delimiters += ['"""',"'''"] if new_grammar == '2.0': self.list_delimiter = " " elif new_grammar == 'STAR2': self.list_delimiter = ", " elif new_grammar not in ['1.0','1.1']: raise StarError('Request to set unknown grammar %s' % new_grammar) @ Printing a section. We allow an optional order list to be given, in case the caller wants to order things in some nice way. By default, we use the item_order attribute. Naturally, looped items are grouped together according to their order in the order list. Note that we must be careful to add spaces between data items, especially when formatting string loop data, where our string addition could get quite hairy. As we are doing so much concatenation, we use a stringIO buffer to speed it up. As an alternative, we may have formatting hints, perhaps from a template that we have input through 'process_template'. The formatting hints specify a desired column and delimiter, and an order of output. We can always satisfy the output order, but may have to fiddle with columns and delimiters depending on the datavalue contents. The [[finish_at]] and [[start_from]] arguments cause output to stop/start when one of the datanames in the arguments is found. We attempt some nice formatting by printing non-packet items with an apparent tab stop at 40 characters. And of course, we stop providing values. <>= def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''): self.provide_value = False # first make an ordering self.create_ordering(finish_at,start_from) #create self.output_order # now do it... if not instring: outstring = CIFStringIO(target_width=80) # the returned string else: outstring = instring # print block delimiter outstring.write(blockstart,canbreak=True) while len(self.output_order)>0: #print "Remaining to output " + `self.output_order` itemname = self.output_order.pop(0) if not isinstance(itemname,int): #no loop item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()] if len(item_spec)>0: item_spec = item_spec[0] col_pos = item_spec.get('column',-1) name_pos = item_spec.get('name_pos',-1) else: col_pos = -1 item_spec = {} name_pos = -1 if col_pos < 0: col_pos = 40 outstring.set_tab(col_pos) itemvalue = self[itemname] outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos) outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) #space after itemname self.format_value(itemvalue,outstring,hints=item_spec) else:# we are asked to print a loop block outstring.set_tab(10) #guess this is OK? loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop'] if loop_spec: loop_indent = max(loop_spec[0],0) else: loop_indent = indent outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent) self.format_names(outstring,indent+2,loop_no=itemname) self.format_packets(outstring,indent+2,loop_no=itemname) else: returnstring = outstring.getvalue() outstring.close() return returnstring @ Formatting a data value. Data values may be stored as strings, numbers or compound values. We call this routine recursively to format data values. We use [[compound]] to flag that we are an embedded compound value, so that we do not insert a line break before the top-level compound delimiter. If hints is supplied, it is a dictionary containing an entry 'delimiter' that requests a particular delimiter. <>= def format_value(self,itemvalue,stringsink,compound=False,hints={}): """Format a Star data value""" global have_numpy delimiter = hints.get('delimiter',None) startcol = hints.get('column',-1) if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed raise StarError("Non-unicode value {0} found in block".format(itemvalue)) if isinstance(itemvalue,unicode): #need to sanitize stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol) elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy stringsink.set_tab(0) stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol) if len(itemvalue)>0: self.format_value(itemvalue[0],stringsink) for listval in itemvalue[1:]: # print 'Formatting %s' % `listval` stringsink.write(self.list_delimiter,do_tab=False) self.format_value(listval,stringsink,compound=True) stringsink.write(']',unindent=True) elif isinstance(itemvalue,dict): stringsink.set_tab(0) stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol) #start a new line inside items = list(itemvalue.items()) if len(items)>0: stringsink.write("'"+items[0][0]+"'"+':',canbreak=True) self.format_value(items[0][1],stringsink) for key,value in items[1:]: stringsink.write(self.list_delimiter) stringsink.write("'"+key+"'"+":",canbreak=True) self.format_value(value,stringsink) #never break between key and value stringsink.write('}',unindent=True) elif isinstance(itemvalue,(float,int)) or \ (have_numpy and isinstance(itemvalue,(numpy.number))): #TODO - handle uncertainties stringsink.write(str(itemvalue),canbreak=True,startcol=startcol) #numbers else: raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue )) @ Formatting a loop section. We are passed an indent and destination string, and are expected to append a list of item names to the string indented by the indicated number of spaces. If we have loops, we add those in too. <>= def format_names(self,outstring,indent=0,loop_no=-1): """Print datanames from `loop_no` one per line""" temp_order = self.loops[loop_no][:] #copy format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order]) while len(temp_order)>0: itemname = temp_order.pop(0) req_indent = format_hints.get(itemname,{}).get('name_pos',indent) outstring.write(' ' * req_indent,do_tab=False) outstring.write(self.true_case[itemname],do_tab=False) outstring.write("\n",do_tab=False) @ Formatting a loop packet. Our final packet will involve collecting the ith value of each item in our particular loop. Note that we have to be careful with indentation, as the ; digraph must be recognised. <>= def format_packets(self,outstring,indent=0,loop_no=-1): alldata = [self[a] for a in self.loops[loop_no]] loopnames = self.loops[loop_no] #print 'Alldata: %s' % `alldata` packet_data = list(zip(*alldata)) #print 'Packet data: %s' % `packet_data` #create a dictionary for quick lookup of formatting requirements format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames]) for position in range(len(packet_data)): if position > 0: outstring.write("\n") #new line each packet except first for point in range(len(packet_data[position])): datapoint = packet_data[position][point] format_hint = format_hints.get(loopnames[point],{}) packstring = self.format_packet_item(datapoint,indent,outstring,format_hint) outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) @ Formatting a single packet item. <>= def format_packet_item(self,pack_item,indent,outstring,format_hint): # print 'Formatting %s' % `pack_item` # temporary check for any non-unicode items if isinstance(pack_item,str) and not isinstance(pack_item,unicode): raise StarError("Item {0!r} is not unicode".format(pack_item)) if isinstance(pack_item,unicode): delimiter = format_hint.get('delimiter',None) startcol = format_hint.get('column',-1) outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol) else: self.format_value(pack_item,outstring,hints = format_hint) @ Formatting a string. We make sure that the length of the item value is less than [[self.maxoutlength]], or else we should split them, and so on. We check the value for terminators and impossible apostrophes and length, before deciding whether to print it and the item on a single line. We try to respect carriage returns in the string, if the caller has tried to do the formatting for us. If we are not putting apostrophes around a string, we make the first character a space, to avoid problems if the first character of a line is a semicolon. The STAR specification states that embedded quotes are allowed so long as they are not followed by a space. So if we find any quotes followed by spaces we output a semicolon-terminated string to avoid too much messing around. This routine is called very often and could be improved. We have to catch empty strings as well, which are legal. Another gotcha concerns 'embedded' strings; if the datavalue begins with a quote, it will be output verbatim (and misunderstood) unless spaces elsewhere force quotation. Note that non-delimited strings may not start with a reserved word ('data','save','global'). The caller is allowed to request a particular delimiter, with 'None' corresponding to no delimiter and the choices being apostrophe, double quote, or semicolon. CIF2-style triple quotes are not currently supported. The 'indent' argument allows the routine to enforce indentation of multi-line strings by the specified amount. Note that this will technically change the datavalue contents by adding spaces, although for datavalues intended only for human consumption this is irrelevant. 'lbprotocol' allows use of the line-breaking protocol from CIF1.1 to express long lines, and 'pref_protocol' allows use of the CIF2 text-prefix protocol. <>= def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}): if hints.get("reformat",False) and "\n" in instring: instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"]) allowed_delimiters = set(self.string_delimiters) if len(instring)==0: allowed_delimiters.difference_update([None]) if len(instring) > (self.maxoutlength-2) or '\n' in instring: allowed_delimiters.intersection_update(["\n;","'''",'"""']) if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring: allowed_delimiters.difference_update([None]) if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'): allowed_delimiters.difference_update([None]) if len(instring)>5 and instring[:6].lower()=='global': allowed_delimiters.difference_update([None]) if '"' in instring: allowed_delimiters.difference_update(['"',None]) if "'" in instring: allowed_delimiters.difference_update(["'",None]) out_delimiter = "\n;" #default (most conservative) if delimiter in allowed_delimiters: out_delimiter = delimiter elif "'" in allowed_delimiters: out_delimiter = "'" elif '"' in allowed_delimiters: out_delimiter = '"' if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter elif out_delimiter is None: return instring # we are left with semicolon strings # use our protocols: maxlinelength = max([len(a) for a in instring.split('\n')]) if maxlinelength > self.maxoutlength: protocol_string = apply_line_folding(instring) else: protocol_string = instring # now check for embedded delimiters if "\n;" in protocol_string: prefix = "CIF:" while prefix in protocol_string: prefix = prefix + ":" protocol_string = apply_line_prefix(protocol_string,prefix+"> ") return "\n;" + protocol_string + "\n;" @ Converting a value to a string. The canonical version of a value is its string representation. This is different to its output format, which will have delimiters and various conventions applied (see below). <>= def convert_to_string(self,dataname): """Convert values held in dataname value fork to string version""" v,is_value = self.GetFullItemValue(dataname) if not is_value: return v if check_stringiness(v): return v #already strings # TODO...something else return v @ Wrapping a string If our formatting hints dictionary allows us to reformat a string, *and* the string does not contain at least three spaces in a row (implying that it is already formatted), we insert appropriate spaces and line feeds. <>= def do_wrapping(self,instring,indent=3): """Wrap the provided string""" if " " in instring: #already formatted return instring self.wrapper.initial_indent = ' '*indent self.wrapper.subsequent_indent = ' '*indent # remove leading and trailing space instring = instring.strip() # split into paragraphs paras = instring.split("\n\n") wrapped_paras = [self.wrapper.fill(p) for p in paras] return "\n".join(wrapped_paras) @ \subsection{Line folding protocol} The line folding protocol allows lines to be broken by appending a backslash as the last character of a line. It is signalled by a backslash as the first character of the line following an opening semicolon. We use it to introduce line breaks where appropriate. We search for whitespace between [[minwraplength]] and [[maxwraplength]], and if none is forthcoming we wrap at maxlength-1 (-1 to allow for the backslash). <>= def apply_line_folding(instring,minwraplength=60,maxwraplength=80): """Insert line folding characters into instring between min/max wraplength""" # first check that we need to do this lines = instring.split('\n') line_len = [len(l) for l in lines] if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None: return instring outstring = "\\\n" #header for l in lines: if len(l) < maxwraplength: outstring = outstring + l if len(l) > 0 and l[-1]=='\\': #who'da thunk it? A line ending with a backslash outstring = outstring + "\\\n" # outstring = outstring + "\n" # put back the split character else: current_bit = l while len(current_bit) > maxwraplength: space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:]) if space_pos is not None and space_pos.start()>= def remove_line_folding(instring): """Remove line folding from instring""" if re.match(r"\\[ \v\t\f]*" +"\n",instring) is not None: return re.sub(r"\\[ \v\t\f]*$" + "\n?","",instring,flags=re.M) else: return instring @ \subsection{Line indenting} CIF2 introduces a line indenting protocol for embedding arbitrary text strings in a semicolon-delimited string. If the first line ends in one or two backslashes, the text before the first backslash defines an indent that should appear at the beginning of all subsequent lines. For brevity, two backslashes are used to signal that line folding should be performed after indenting. Alternatively, the line folding signal will simply correspond to a second 'header' line in the indented text consisting of the indent followed by a backslash, optional whitespace, and a line feed. <>= def apply_line_prefix(instring,prefix): """Prefix every line in instring with prefix""" if prefix[0] != ";" and "\\" not in prefix: header = re.match(r"(\\[ \v\t\f]*" +"\n)",instring) if header is not None: print('Found line folded string for prefixing...') not_header = instring[header.end():] outstring = prefix + "\\\\\n" + prefix else: print('No folding in input string...') not_header = instring outstring = prefix + "\\\n" + prefix outstring = outstring + not_header.replace("\n","\n"+prefix) return outstring raise StarError("Requested prefix starts with semicolon or contains a backslash: " + prefix) @ Line indents are signalled by one or two backslashes at the end of the first line. If this is detected,the text before the backslash is removed from every line. We do not use regular expressions for the replacement in case the prefix contains significant characters. <>= def remove_line_prefix(instring): """Remove prefix from every line if present""" prefix_match = re.match("(?P[^;\\\n][^\n\\\\]+)(?P\\\\{1,2}[ \t\v\f]*\n)",instring) if prefix_match is not None: prefix_text = prefix_match.group('prefix') print('Found prefix %s' % prefix_text) prefix_end = prefix_match.end('folding') # keep any line folding instructions if prefix_match.group('folding')[:2]=='\\\\': #two backslashes outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") return "\\" + outstring #keep line folding first line else: outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") return outstring[1:] #drop first line ending, no longer necessary else: return instring @ \subsection{Templating} A ``template'' is a CifFile containing a single block, where the datanames are laid out in the way that the user desires. The layout elements that are picked up from the template CifFile are: (1) order (2) column position of datavalues (only the first row of a loop block counts) (3) delimiters (4) column position of datanames. Within loops all items will be indented as for the final name in the loop header. The information that is gleaned is converted to entries in the formatting_hints table which are then consulted when writing out. Note that the order from formatting_hints will override the item_order information. Additionally, if a semicolon-delimited value has a tab or sequence of 2 or more spaces after a line ending, it is assumed to be free text and the text values will be neatly formatted with the same indentation as found after the first line ending in the value. Constraints on the template: (1) There should only ever be one dataname on each line (2) loop_ and datablock tokens should appear as the only non-blank characters on their lines (3) Comments are flagged by a '#' as the first character (4) Blank lines are acceptable (5) Datavalues should use only alphanumeric characters (6) Semicolon-delimited strings are not recognised in loops <>= def process_template(template_file): """Process a template datafile to formatting instructions""" template_as_cif = StarFile(template_file,grammar="2.0").first_block() if isinstance(template_file,(unicode,str)): template_string = open(template_file).read() else: #a StringIO object template_file.seek(0) #reset template_string = template_file.read() #template_as_lines = template_string.split("\n") #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#'] #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_'] #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0]) form_hints = [] #ordered array of hint dictionaries find_indent = "^ +" for item in template_as_cif.item_order: #order of input if not isinstance(item,int): #not nested hint_dict = {"dataname":item} # find the line in the file start_pos = re.search("(^[ \t]*(?P" + item + ")[ \t\n]+)(?P([\S]+)|(^;))",template_string,re.I|re.M) if start_pos.group("spec") != None: spec_pos = start_pos.start("spec")-start_pos.start(0) spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3] if spec_char[0] in '\'";': hint_dict.update({"delimiter":spec_char[0]}) if spec_char == '"""' or spec_char == "'''": hint_dict.update({"delimiter":spec_char}) if spec_char[0] != ";": #so we need to work out the column number hint_dict.update({"column":spec_pos}) else: #need to put in the carriage return hint_dict.update({"delimiter":"\n;"}) # can we format the text? text_val = template_as_cif[item] hint_dict["reformat"] = "\n\t" in text_val or "\n " in text_val if hint_dict["reformat"]: #find the indentation p = re.search(find_indent,text_val,re.M) if p.group() is not None: hint_dict["reformat_indent"]=p.end() - p.start() if start_pos.group('name') != None: name_pos = start_pos.start('name') - start_pos.start(0) hint_dict.update({"name_pos":name_pos}) #print '%s: %s' % (item,`hint_dict`) form_hints.append(hint_dict) else: #loop block testnames = template_as_cif.loops[item] total_items = len(template_as_cif.loops[item]) testname = testnames[0] #find the loop spec line in the file loop_regex = "(^[ \t]*(?Ploop_)[ \t\n\r]+(?P" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P(.(?!_loop|_[\S]+))*))" % (total_items - 1) loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S) loop_so_far = loop_line.end() packet_text = loop_line.group('packet') loop_indent = loop_line.start('loop') - loop_line.start(0) form_hints.append({"dataname":'loop','name_pos':loop_indent}) packet_regex = "[ \t]*(?P(?P'''([^\n\r\f']*)''')|(?P'([^\n\r\f']*)'+)|(?P\"([^\n\r\"]*)\"+)|(?P[^\s]+))" packet_pos = re.finditer(packet_regex,packet_text) line_end_pos = re.finditer("^",packet_text,re.M) next_end = next(line_end_pos).end() last_end = next_end for loopname in testnames: #find the name in the file for name pos name_regex = "(^[ \t]*(?P" + loopname + "))" name_match = re.search(name_regex,template_string,re.I|re.M|re.S) loop_name_indent = name_match.start('name')-name_match.start(0) hint_dict = {"dataname":loopname,"name_pos":loop_name_indent} #find the value thismatch = next(packet_pos) while thismatch.start('all') > next_end: try: last_end = next_end next_end = next(line_end_pos).start() print('next end %d' % next_end) except StopIteration: break print('Start %d, last_end %d' % (thismatch.start('all'),last_end)) col_pos = thismatch.start('all') - last_end + 1 if thismatch.group('none') is None: if thismatch.group('sqqq') is not None: hint_dict.update({'delimiter':"'''"}) else: hint_dict.update({'delimiter':thismatch.groups()[0][0]}) hint_dict.update({'column':col_pos}) print('%s: %s' % (loopname,repr( hint_dict ))) form_hints.append(hint_dict) return form_hints @ Creating a proper ordering for output from the template information. When we output, we expect the ordering to consist of a sequence of datanames or loop references. Our templated ordering is essentially a list of datanames, so we now have to find which loops each dataname corresponds to and adjust each loops ordering accordingly. For dictionary use we allow only a segment of the file to be output be specifying a finish_at/start_from dataname. For consistency, we default to outputting nothing if start_from is not found, and outputting everything if finish_at is not found. <>= def create_ordering(self,finish_at,start_from): """Create a canonical ordering that includes loops using our formatting hints dictionary""" requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop']) new_order = [] for item in requested_order: if isinstance(item,unicode) and item.lower() in self.item_order: new_order.append(item.lower()) elif item in self: #in a loop somewhere target_loop = self.FindLoop(item) if target_loop not in new_order: new_order.append(target_loop) # adjust loop name order loopnames = self.loops[target_loop] loop_order = [i for i in requested_order if i in loopnames] unordered = [i for i in loopnames if i not in loop_order] self.loops[target_loop] = loop_order + unordered extras = list([i for i in self.item_order if i not in new_order]) self.output_order = new_order + extras # now handle partial output if start_from != '': if start_from in requested_order: sfi = requested_order.index(start_from) loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0] candidates = list([k for k in self.output_order if k in requested_order[sfi:]]) cand_pos = len(new_order) if len(candidates)>0: cand_pos = self.output_order.index(candidates[0]) if len(loop_order)>0: cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) if cand_pos < len(self.output_order): print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from)) self.output_order = self.output_order[cand_pos:] else: print('Start is beyond end of output list') self.output_order = [] elif start_from in extras: self.output_order = self.output_order[self.output_order.index(start_from):] else: self.output_order = [] if finish_at != '': if finish_at in requested_order: fai = requested_order.index(finish_at) loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0]) candidates = list([k for k in self.output_order if k in requested_order[fai:]]) cand_pos = len(new_order) if len(candidates)>0: cand_pos = self.output_order.index(candidates[0]) if len(loop_order)>0: cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) if cand_pos < len(self.output_order): print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at)) self.output_order = self.output_order[:cand_pos] else: print('All of block output') elif finish_at in extras: self.output_order = self.output_order[:self.output_order.index(finish_at)] #print('Final order: ' + repr(self.output_order)) @ Merging. Normally merging of dictionaries is done at the data file level, i.e. a whole block is replaced or added. However, in 'overlay' mode, individual keys are added/replaced, which is a block level operation. Looped item overlaps are tricky. We distinguish two cases: at least one key in common, and all keys in common. The latter implies addition of rows only. The former implies deletion of all co-occuring looped items (as they will otherwise have data of different lengths) and therefore either completely replacing the previous item, or adding the new data to the end, and including the other co-looped items. But this would mean that we were passed a loop block with different data lengths in the new object, which is illegal, so we can only add to the end if the new dictionary contains a subset of the attributes in the current dictionary. Therefore we have the following rules (1) Identical attributes in new and old -> append (2) New contains subset of old -> append values for common items and delete extra looped items (3) Old contains subset of new -> new completely replaces old The [[match_att]] keyword is used when old and new blocks have been matched based on an internal attribute (usually _name or _item.name). This attribute should not become looped in overlay mode, obviously, so we need to have a record of it just in case. The rel_keys keyword contains a list of datanames which act as unique keys (in a database sense) inside loop structures. If any keys match in separate datablocks, the row will not be added, but simply replaced. <>= def merge(self,new_block,mode="strict",match_att=[],match_function=None, rel_keys = []): if mode == 'strict': for key in new_block.keys(): if key in self and key not in match_att: raise StarError( "Identical keys %s in strict merge mode" % key) elif key not in match_att: #a new dataname self[key] = new_block[key] # we get here if there are no keys in common, so we can now copy # the loops and not worry about overlaps for one_loop in new_block.loops.values(): self.CreateLoop(one_loop) # we have lost case information self.true_case.update(new_block.true_case) elif mode == 'replace': newkeys = list(new_block.keys()) for ma in match_att: try: newkeys.remove(ma) #don't touch the special ones except ValueError: pass for key in new_block.keys(): if isinstance(key,unicode): self[key] = new_block[key] # creating the loop will remove items from other loops for one_loop in new_block.loops.values(): self.CreateLoop(one_loop) # we have lost case information self.true_case.update(new_block.true_case) elif mode == 'overlay': print('Overlay mode, current overwrite is %s' % self.overwrite) raise StarError('Overlay block merge mode not implemented') save_overwrite = self.overwrite self.overwrite = True for attribute in new_block.keys(): if attribute in match_att: continue #ignore this one new_value = new_block[attribute] #non-looped items if new_block.FindLoop(attribute)<0: #not looped self[attribute] = new_value my_loops = self.loops.values() perfect_overlaps = [a for a in new_block.loops if a in my_loops] for po in perfect_overlaps: loop_keys = [a for a in po if a in rel_keys] #do we have a key? try: newkeypos = map(lambda a:newkeys.index(a),loop_keys) newkeypos = newkeypos[0] #one key per loop for now loop_keys = loop_keys[0] except (ValueError,IndexError): newkeypos = [] overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data new_data = map(lambda a:new_block[a],overlaps) #new packet data packet_data = transpose(overlap_data) new_p_data = transpose(new_data) # remove any packets for which the keys match between old and new; we # make the arbitrary choice that the old data stays if newkeypos: # get matching values in new list print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos]))) key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos]) # filter out any new data with these key values new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data) if new_p_data: new_data = transpose(new_p_data) else: new_data = [] # wipe out the old data and enter the new stuff byebyeloop = self.GetLoop(overlaps[0]) # print("Removing '%r' with overlaps '%r'" % (byebyeloop, overlaps)) # Note that if, in the original dictionary, overlaps are not # looped, GetLoop will return the block itself. So we check # for this case... if byebyeloop != self: self.remove_loop(byebyeloop) self.AddLoopItem((overlaps,overlap_data)) #adding old packets for pd in new_p_data: #adding new packets if pd not in packet_data: for i in range(len(overlaps)): #don't do this at home; we are appending #to something in place self[overlaps[i]].append(pd[i]) self.overwrite = save_overwrite <>= class StarError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nStar Format error: '+ self.value class StarLengthError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nStar length error: ' + self.value class StarDerivationError(Exception): def __init__(self,fail_name): self.fail_name = fail_name def __str__(self): return "Derivation of %s failed, None returned" % self.fail_name # # This is subclassed from AttributeError in order to allow hasattr # to work. # class StarDerivationFailure(AttributeError): def __init__(self,fail_name): self.fail_name = fail_name def __str__(self): return "Derivation of %s failed" % self.fail_name @ \section{Utility functions} These functions do not depend on knowing the internals of the various classes and are therefore kept outside of the class definitions to allow general use. <>= <> <> <> <> <> <> <> @ Listify - used to allow uniform treatment of datanames - otherwise sequence functions might operate on a string instead of a list. <>= def listify(item): if isinstance(item,unicode): return [item] else: return item #Transpose the list of lists passed to us def transpose(base_list): new_lofl = [] full_length = len(base_list) opt_range = range(full_length) for i in range(len(base_list[0])): new_packet = [] for j in opt_range: new_packet.append(base_list[j][i]) new_lofl.append(new_packet) return new_lofl # This routine optimised to return as quickly as possible # as it is called a lot. def not_none(itemlist): """Return true only if no values of None are present""" if itemlist is None: return False if not isinstance(itemlist,(tuple,list)): return True for x in itemlist: if not not_none(x): return False return True <>= <> <> <> @ When loading values, we want to iterate over the items until a "stop_" token is found - this is communicated via the "popout" attribute changing to True. We save the __iter__ method for iterating over packets. Also, when a new packet is begun, all subloops should be extended correspondingly. We are in a special situation where we do not enforce length matching, as we assume that things will be loaded in as we go. Each yield returns a list which should be appended to with a unitary item. So, as the number of packets increases, we need to make sure that the lowest level lists are extended as needed with empty lists. <>= def load_iter(self,coords=[]): count = 0 #to create packet index while not self.popout: # ok, we have a new packet: append a list to our subloops for aloop in self.loops: aloop.new_enclosing_packet() for iname in self.item_order: if isinstance(iname,LoopBlock): #into a nested loop for subitems in iname.load_iter(coords=coords+[count]): # print 'Yielding %s' % `subitems` yield subitems # print 'End of internal loop' else: if self.dimension == 0: # print 'Yielding %s' % `self[iname]` yield self,self[iname] else: backval = self.block[iname] for i in range(len(coords)): # print 'backval, coords: %s, %s' % (`backval`,`coords`) backval = backval[coords[i]] yield self,backval count = count + 1 # count packets self.popout = False # reinitialise # print 'Finished iterating' yield self,'###Blank###' #this value should never be used # an experimental fast iterator for level-1 loops (ie CIF) def fast_load_iter(self): targets = map(lambda a:self.block[a],self.item_order) while targets: for target in targets: yield self,target # Add another list of the required shape to take into account a new outer packet def new_enclosing_packet(self): if self.dimension > 1: #otherwise have a top-level list for iname in self.keys(): #includes lower levels target_list = self[iname] for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... target_list = target_list[-1] target_list.append([]) # print '%s now %s' % (iname,`self[iname]`) @ We recursively expand out all values in nested loops and return a simple dictionary type. Although it only seems to make sense to call this from a dimension 0 LoopBlock, if we are not a level 0 LoopBlock, we drill down until we get a simple value to return, then start looping. We want to build up a return dictionary by adding keys from the deeper loops, but if we simply use the dictionary update method, we will find that we have stale keys from previous inner loops. Therefore, we keep our values as (key,value) tuples which we turn into a Star packet at the last moment. This is now updated to return StarPackets, which are like lists except that they also have attributes set. <>= def recursive_iter(self,dict_so_far={},coord=[]): # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension) my_length = 0 top_items = self.block.items() top_values = self.block.values() #same order as items drill_values = self.block.values() for dimup in range(0,self.dimension): #look higher in the tree if len(drill_values)>0: #this block has values drill_values=drill_values[0] #drill in else: raise StarError("Malformed loop packet %s" % repr( top_items[0] )) my_length = len(drill_values[0]) #length of 'string' entry if self.dimension == 0: #top level for aloop in self.loops: for apacket in aloop.recursive_iter(): # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) ) prep_yield = StarPacket(top_values+apacket.values()) #straight list for name,value in top_items + apacket.items(): setattr(prep_yield,name,value) yield prep_yield else: #in some loop for i in range(my_length): kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys()) kvvals = map(lambda a:a[1],kvpairs) #just values # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs )) if self.loops: for aloop in self.loops: for apacket in aloop.recursive_iter(coord=coord+[i]): # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) ) prep_yield = StarPacket(kvvals+apacket.values()) for name,value in kvpairs + apacket.items(): setattr(prep_yield,name,value) yield prep_yield else: # we're at the bottom of the tree # print "Recursive yielding %s" % repr( dict(kvpairs) ) prep_yield = StarPacket(kvvals) for name,value in kvpairs: setattr(prep_yield,name,value) yield prep_yield # small function to use the coordinates. def coord_to_group(self,dataname,coords): if not isinstance(dataname,unicode): return dataname # flag inner loop processing newm = self[dataname] # newm must be a list or tuple for c in coords: # print "Coord_to_group: %s ->" % (repr( newm )), newm = newm[c] # print repr( newm ) return newm @ Return a series of LoopBlocks with the appropriate packet chosen. This does not loop over interior blocks, so called at the top level it just returns the whole star block. <>= def flat_iterator(self): my_length = 0 top_keys = self.block.keys() if len(top_keys)>0: my_length = len(self.block[top_keys[0]]) for pack_no in range(my_length): yield(self.collapse(pack_no)) <>= #No documentation flags pycifrw-4.4/src/StarFile.py000066400000000000000000003443131345362224200157310ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import __copyright = """ PYCIFRW License Agreement (Python License, Version 2) ----------------------------------------------------- 1. This LICENSE AGREEMENT is between the Australian Nuclear Science and Technology Organisation ("ANSTO"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("PyCIFRW") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, ANSTO hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use PyCIFRW alone or in any derivative version, provided, however, that this License Agreement and ANSTO's notice of copyright, i.e., "Copyright (c) 2001-2014 ANSTO; All Rights Reserved" are retained in PyCIFRW alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates PyCIFRW or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to PyCIFRW. 4. ANSTO is making PyCIFRW available to Licensee on an "AS IS" basis. ANSTO MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, ANSTO MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYCIFRW WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. ANSTO SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYCIFRW FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYCIFRW, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between ANSTO and Licensee. This License Agreement does not grant permission to use ANSTO trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using PyCIFRW, Licensee agrees to be bound by the terms and conditions of this License Agreement. """ import sys # Python 2,3 compatibility try: from urllib import urlopen # for arbitrary opening from urlparse import urlparse, urlunparse except: from urllib.request import urlopen from urllib.parse import urlparse,urlunparse import re,os import textwrap try: from StringIO import StringIO #not cStringIO as we cannot subclass except ImportError: from io import StringIO if isinstance(u"abc",str): #Python 3 unicode = str try: import numpy have_numpy = True except ImportError: have_numpy = False class StarList(list): def __getitem__(self,args): if isinstance(args,(int,slice)): return super(StarList,self).__getitem__(args) elif isinstance(args,tuple) and len(args)>1: #extended comma notation return super(StarList,self).__getitem__(args[0]).__getitem__(args[1:]) else: return super(StarList,self).__getitem__(args[0]) def __str__(self): return "SL("+super(StarList,self).__str__() + ")" class StarDict(dict): pass class LoopBlock(object): def __init__(self,parent_block,dataname): self.loop_no = parent_block.FindLoop(dataname) if self.loop_no < 0: raise KeyError('%s is not in a loop structure' % dataname) self.parent_block = parent_block def keys(self): return self.parent_block.loops[self.loop_no] def values(self): return [self.parent_block[a] for a in self.keys()] #Avoid iterator even though that is Python3-esque def items(self): return list(zip(self.keys(),self.values())) def __getitem__(self,dataname): if isinstance(dataname,int): #a packet request return self.GetPacket(dataname) if dataname in self.keys(): return self.parent_block[dataname] else: raise KeyError('%s not in loop block' % dataname) def __setitem__(self,dataname,value): self.parent_block[dataname] = value self.parent_block.AddLoopName(self.keys()[0],dataname) def __contains__(self,key): return key in self.parent_block.loops[self.loop_no] def has_key(self,key): return key in self def __iter__(self): packet_list = zip(*self.values()) names = self.keys() for p in packet_list: r = StarPacket(p) for n in range(len(names)): setattr(r,names[n].lower(),r[n]) yield r # for compatibility def __getattr__(self,attname): return getattr(self.parent_block,attname) def load_iter(self,coords=[]): count = 0 #to create packet index while not self.popout: # ok, we have a new packet: append a list to our subloops for aloop in self.loops: aloop.new_enclosing_packet() for iname in self.item_order: if isinstance(iname,LoopBlock): #into a nested loop for subitems in iname.load_iter(coords=coords+[count]): # print 'Yielding %s' % `subitems` yield subitems # print 'End of internal loop' else: if self.dimension == 0: # print 'Yielding %s' % `self[iname]` yield self,self[iname] else: backval = self.block[iname] for i in range(len(coords)): # print 'backval, coords: %s, %s' % (`backval`,`coords`) backval = backval[coords[i]] yield self,backval count = count + 1 # count packets self.popout = False # reinitialise # print 'Finished iterating' yield self,'###Blank###' #this value should never be used # an experimental fast iterator for level-1 loops (ie CIF) def fast_load_iter(self): targets = map(lambda a:self.block[a],self.item_order) while targets: for target in targets: yield self,target # Add another list of the required shape to take into account a new outer packet def new_enclosing_packet(self): if self.dimension > 1: #otherwise have a top-level list for iname in self.keys(): #includes lower levels target_list = self[iname] for i in range(3,self.dimension): #dim 2 upwards are lists of lists of... target_list = target_list[-1] target_list.append([]) # print '%s now %s' % (iname,`self[iname]`) def recursive_iter(self,dict_so_far={},coord=[]): # print "Recursive iter: coord %s, keys %s, dim %d" % (`coord`,`self.block.keys()`,self.dimension) my_length = 0 top_items = self.block.items() top_values = self.block.values() #same order as items drill_values = self.block.values() for dimup in range(0,self.dimension): #look higher in the tree if len(drill_values)>0: #this block has values drill_values=drill_values[0] #drill in else: raise StarError("Malformed loop packet %s" % repr( top_items[0] )) my_length = len(drill_values[0]) #length of 'string' entry if self.dimension == 0: #top level for aloop in self.loops: for apacket in aloop.recursive_iter(): # print "Recursive yielding %s" % repr( dict(top_items + apacket.items()) ) prep_yield = StarPacket(top_values+apacket.values()) #straight list for name,value in top_items + apacket.items(): setattr(prep_yield,name,value) yield prep_yield else: #in some loop for i in range(my_length): kvpairs = map(lambda a:(a,self.coord_to_group(a,coord)[i]),self.block.keys()) kvvals = map(lambda a:a[1],kvpairs) #just values # print "Recursive kvpairs at %d: %s" % (i,repr( kvpairs )) if self.loops: for aloop in self.loops: for apacket in aloop.recursive_iter(coord=coord+[i]): # print "Recursive yielding %s" % repr( dict(kvpairs + apacket.items()) ) prep_yield = StarPacket(kvvals+apacket.values()) for name,value in kvpairs + apacket.items(): setattr(prep_yield,name,value) yield prep_yield else: # we're at the bottom of the tree # print "Recursive yielding %s" % repr( dict(kvpairs) ) prep_yield = StarPacket(kvvals) for name,value in kvpairs: setattr(prep_yield,name,value) yield prep_yield # small function to use the coordinates. def coord_to_group(self,dataname,coords): if not isinstance(dataname,unicode): return dataname # flag inner loop processing newm = self[dataname] # newm must be a list or tuple for c in coords: # print "Coord_to_group: %s ->" % (repr( newm )), newm = newm[c] # print repr( newm ) return newm def flat_iterator(self): my_length = 0 top_keys = self.block.keys() if len(top_keys)>0: my_length = len(self.block[top_keys[0]]) for pack_no in range(my_length): yield(self.collapse(pack_no)) def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey) def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname) def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname) def GetPacket(self,index): thispack = StarPacket([]) for myitem in self.parent_block.loops[self.loop_no]: thispack.append(self[myitem][index]) setattr(thispack,myitem,thispack[-1]) return thispack def AddPacket(self,packet): for myitem in self.parent_block.loops[self.loop_no]: old_values = self.parent_block[myitem] old_values.append(packet.__getattribute__(myitem)) self.parent_block[myitem] = old_values def GetItemOrder(self): """Return a list of datanames in this `LoopBlock` in the order that they will be printed""" return self.parent_block.loops[self.loop_no][:] def ChangeItemOrder(self,itemname,newpos): """Change the position at which `itemname` appears when printing out to `newpos`.""" self.parent_block.loops[self.loop_no].remove(itemname.lower()) self.parent_block.loops[self.loop_no].insert(newpos,itemname.lower()) def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos def GetLoopNames(self,keyname): if keyname in self: return self.keys() for aloop in self.loops: try: return aloop.GetLoopNames(keyname) except KeyError: pass raise KeyError('Item does not exist') def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname) def AddToLoop(self,dataname,loopdata): thisloop = self.GetLoop(dataname) for itemname,itemvalue in loopdata.items(): thisloop[itemname] = itemvalue def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" self.update(loopdata) for one_name in loopdata: self.AddLoopName(dataname,one_name) class StarBlock(object): def __init__(self,data = (), maxoutlength=2048, wraplength=80, overwrite=True, characterset='ascii',maxnamelength=-1): self.block = {} #the actual data storage (lower case keys) self.loops = {} #each loop is indexed by a number and contains a list of datanames self.item_order = [] #lower case, loops referenced by integer self.formatting_hints = {} self.true_case = {} #transform lower case to supplied case self.provide_value = False #prefer string version always self.dictionary = None #DDLm dictionary self.popout = False #used during load iteration self.curitem = -1 #used during iteration self.cache_vals = True #store all calculated values self.maxoutlength = maxoutlength self.setmaxnamelength(maxnamelength) #to enforce CIF limit of 75 characters self.set_characterset(characterset) #to check input names self.wraplength = wraplength self.overwrite = overwrite self.string_delimiters = ["'",'"',"\n;"] #universal CIF set self.list_delimiter = " " #CIF2 default self.wrapper = textwrap.TextWrapper() if isinstance(data,(tuple,list)): for item in data: self.AddLoopItem(item) elif isinstance(data,StarBlock): self.block = data.block.copy() self.item_order = data.item_order[:] self.true_case = data.true_case.copy() # loops as well self.loops = data.loops.copy() def setmaxnamelength(self,maxlength): """Set the maximum allowable dataname length (-1 for no check)""" self.maxnamelength = maxlength if maxlength > 0: bad_names = [a for a in self.keys() if len(a)>self.maxnamelength] if len(bad_names)>0: raise StarError('Datanames too long: ' + repr( bad_names )) def set_characterset(self,characterset): """Set the characterset for checking datanames: may be `ascii` or `unicode`""" self.characterset = characterset if characterset == 'ascii': self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M) elif characterset == 'unicode': if sys.maxunicode < 1114111: self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M) else: self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M) def __str__(self): return self.printsection() def __setitem__(self,key,value): if key == "saves": raise StarError("""Setting the saves key is deprecated. Add the save block to an enclosing block collection (e.g. CIF or STAR file) with this block as child""") self.AddItem(key,value) def __getitem__(self,key): if key == "saves": raise StarError("""The saves key is deprecated. Access the save block from the enclosing block collection (e.g. CIF or STAR file object)""") try: rawitem,is_value = self.GetFullItemValue(key) except KeyError: if self.dictionary: # send the dictionary the required key and a pointer to us try: new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=False) except StarDerivationFailure: #try now with defaults included try: new_value = self.dictionary.derive_item(key,self,store_value=self.cache_vals,allow_defaults=True) except StarDerivationFailure as s: print("In StarBlock.__getitem__, " + repr(s)) raise KeyError('No such item: %s' % key) print('Set %s to derived value %s' % (key, repr(new_value))) return new_value else: raise KeyError('No such item: %s' % key) # we now have an item, we can try to convert it to a number if that is appropriate # note numpy values are never stored but are converted to lists if not self.dictionary or not key in self.dictionary: return rawitem print('%s: is_value %s provide_value %s value %s' % (key,repr( is_value ),repr( self.provide_value ),repr( rawitem ))) if is_value: if self.provide_value: return rawitem else: print('Turning %s into string' % repr( rawitem )) return self.convert_to_string(key) else: # a string if self.provide_value and ((not isinstance(rawitem,list) and rawitem != '?' and rawitem != ".") or \ (isinstance(rawitem,list) and '?' not in rawitem and '.' not in rawitem)): return self.dictionary.change_type(key,rawitem) elif self.provide_value: # catch the question marks do_calculate = False if isinstance(rawitem,(list,tuple)): known = [a for a in rawitem if a != '?'] if len(known) == 0: #all questions do_calculate = True elif rawitem == '?': do_calculate = True if do_calculate: # remove old value del self[key] try: new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=False) except StarDerivationFailure as s: try: new_value = self.dictionary.derive_item(key,self,store_value=True,allow_defaults=True) except StarDerivationFailure as s: print("Could not turn %s into a value:" + repr(s)) return rawitem else: print('Set %s to derived value %s' % (key, repr( new_value ))) return new_value return rawitem #can't do anything def __delitem__(self,key): self.RemoveItem(key) def __len__(self): blen = len(self.block) return blen def __nonzero__(self): if self.__len__() > 0: return 1 return 0 # keys returns all internal keys def keys(self): return list(self.block.keys()) #always lower case def values(self): return [self[a] for a in self.keys()] def items(self): return list(zip(self.keys(),self.values())) def __contains__(self,key): if isinstance(key,(unicode,str)) and key.lower() in self.keys(): return True return False def has_key(self,key): return key in self def has_key_or_alias(self,key): """Check if a dataname or alias is available in the block""" initial_test = key in self if initial_test: return True elif self.dictionary: aliases = [k for k in self.dictionary.alias_table.get(key,[]) if self.has_key(k)] if len(aliases)>0: return True return False def get(self,key,default=None): if key in self: retval = self.__getitem__(key) else: retval = default return retval def clear(self): self.block = {} self.loops = {} self.item_order = [] self.true_case = {} # doesn't appear to work def copy(self): newcopy = StarBlock() newcopy.block = self.block.copy() newcopy.loops = [] newcopy.item_order = self.item_order[:] newcopy.true_case = self.true_case.copy() newcopy.loops = self.loops.copy() # return self.copy.im_class(newcopy) #catch inheritance return newcopy def update(self,adict): for key in adict.keys(): self.AddItem(key,adict[key]) def GetItemPosition(self,itemname): """A utility function to get the numerical order in the printout of `itemname`. An item has coordinate `(loop_no,pos)` with the top level having a `loop_no` of -1. If an integer is passed to the routine then it will return the position of the loop referenced by that number.""" if isinstance(itemname,int): # return loop position return (-1, self.item_order.index(itemname)) if not itemname in self: raise ValueError('No such dataname %s' % itemname) testname = itemname.lower() if testname in self.item_order: return (-1,self.item_order.index(testname)) loop_no = self.FindLoop(testname) loop_pos = self.loops[loop_no].index(testname) return loop_no,loop_pos def ChangeItemOrder(self,itemname,newpos): """Move the printout order of `itemname` to `newpos`. If `itemname` is in a loop, `newpos` refers to the order within the loop.""" if isinstance(itemname,(unicode,str)): true_name = itemname.lower() else: true_name = itemname loopno = self.FindLoop(true_name) if loopno < 0: #top level self.item_order.remove(true_name) self.item_order.insert(newpos,true_name) else: self.loops[loopno].remove(true_name) self.loops[loopno].insert(newpos,true_name) def GetItemOrder(self): """Return a list of datanames in the order in which they will be printed. Loops are referred to by numerical index""" return self.item_order[:] def AddItem(self,key,value,precheck=False): """Add dataname `key` to block with value `value`. `value` may be a single value, a list or a tuple. If `precheck` is False (the default), all values will be checked and converted to unicode strings as necessary. If `precheck` is True, this checking is bypassed. No checking is necessary when values are read from a CIF file as they are already in correct form.""" if not isinstance(key,(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( key )) key = unicode(key) #everything is unicode internally if not precheck: self.check_data_name(key,self.maxnamelength) # make sure no nasty characters # check for overwriting if key in self: if not self.overwrite: raise StarError( 'Attempt to insert duplicate item name %s' % key) if not precheck: #need to sanitise regval,empty_val = self.regularise_data(value) pure_string = check_stringiness(regval) self.check_item_value(regval) else: regval,empty_val = value,None pure_string = True # update ancillary information first lower_key = key.lower() if not lower_key in self and self.FindLoop(lower_key)<0: #need to add to order self.item_order.append(lower_key) # always remove from our case table in case the case is different try: del self.true_case[lower_key] except KeyError: pass self.true_case[lower_key] = key if pure_string: self.block.update({lower_key:[regval,empty_val]}) else: self.block.update({lower_key:[empty_val,regval]}) def AddLoopItem(self,incomingdata,precheck=False,maxlength=-1): """*Deprecated*. Use `AddItem` followed by `CreateLoop` if necessary.""" # print "Received data %s" % `incomingdata` # we accept tuples, strings, lists and dicts!! # Direct insertion: we have a string-valued key, with an array # of values -> single-item into our loop if isinstance(incomingdata[0],(tuple,list)): # a whole loop keyvallist = zip(incomingdata[0],incomingdata[1]) for key,value in keyvallist: self.AddItem(key,value) self.CreateLoop(incomingdata[0]) elif not isinstance(incomingdata[0],(unicode,str)): raise TypeError('Star datanames are strings only (got %s)' % repr( incomingdata[0] )) else: self.AddItem(incomingdata[0],incomingdata[1]) def check_data_name(self,dataname,maxlength=-1): if maxlength > 0: self.check_name_length(dataname,maxlength) if dataname[0]!='_': raise StarError( 'Dataname ' + dataname + ' does not begin with _') if self.characterset=='ascii': if len ([a for a in dataname if ord(a) < 33 or ord(a) > 126]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters') else: # print 'Checking %s for unicode characterset conformance' % dataname if len ([a for a in dataname if ord(a) < 33]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (below code point 33)') if len ([a for a in dataname if ord(a) > 126 and ord(a) < 160]) > 0: raise StarError( 'Dataname ' + dataname + ' contains forbidden characters (between code point 127-159)') if len ([a for a in dataname if ord(a) > 0xD7FF and ord(a) < 0xE000]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+D800 and U+E000)') if len ([a for a in dataname if ord(a) > 0xFDCF and ord(a) < 0xFDF0]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (between U+FDD0 and U+FDEF)') if len ([a for a in dataname if ord(a) == 0xFFFE or ord(a) == 0xFFFF]) > 0: raise StarError( 'Dataname ' + dataname + ' contains unsupported characters (U+FFFE and/or U+FFFF)') if len ([a for a in dataname if ord(a) > 0x10000 and (ord(a) & 0xE == 0xE)]) > 0: print('%s fails' % dataname) for a in dataname: print('%x' % ord(a),end="") print() raise StarError( u'Dataname ' + dataname + u' contains unsupported characters (U+xFFFE and/or U+xFFFF)') def check_name_length(self,dataname,maxlength): if len(dataname)>maxlength: raise StarError( 'Dataname %s exceeds maximum length %d' % (dataname,maxlength)) return def check_item_value(self,item): test_item = item if not isinstance(item,(list,dict,tuple)): test_item = [item] #single item list def check_one (it): if isinstance(it,unicode): if it=='': return me = self.char_check.match(it) if not me: print("Fail value check: %s" % it) raise StarError('Bad character in %s' % it) else: if me.span() != (0,len(it)): print("Fail value check, match only %d-%d in string %s" % (me.span()[0],me.span()[1],repr( it ))) raise StarError('Data item "' + repr( it ) + u'"... contains forbidden characters') [check_one(a) for a in test_item] def regularise_data(self,dataitem): """Place dataitem into a list if necessary""" from numbers import Number if isinstance(dataitem,str): return unicode(dataitem),None if isinstance(dataitem,(Number,unicode,StarList,StarDict)): return dataitem,None #assume StarList/StarDict contain unicode if necessary if isinstance(dataitem,(tuple,list)): v,s = zip(*list([self.regularise_data(a) for a in dataitem])) return list(v),list(s) #return dataitem,[None]*len(dataitem) # so try to make into a list try: regval = list(dataitem) except TypeError as value: raise StarError( str(dataitem) + ' is wrong type for data value\n' ) v,s = zip(*list([self.regularise_data(a) for a in regval])) return list(v),list(s) def RemoveItem(self,itemname): """Remove `itemname` from the block.""" # first check any loops loop_no = self.FindLoop(itemname) testkey = itemname.lower() if testkey in self: del self.block[testkey] del self.true_case[testkey] # now remove from loop if loop_no >= 0: self.loops[loop_no].remove(testkey) if len(self.loops[loop_no])==0: del self.loops[loop_no] self.item_order.remove(loop_no) else: #will appear in order list self.item_order.remove(testkey) def RemoveLoopItem(self,itemname): """*Deprecated*. Use `RemoveItem` instead""" self.RemoveItem(itemname) def GetItemValue(self,itemname): """Return value of `itemname`. If `itemname` is looped, a list of all values will be returned.""" return self.GetFullItemValue(itemname)[0] def GetFullItemValue(self,itemname): """Return the value associated with `itemname`, and a boolean flagging whether (True) or not (False) it is in a form suitable for calculation. False is always returned for strings and `StarList` objects.""" try: s,v = self.block[itemname.lower()] except KeyError: raise KeyError('Itemname %s not in datablock' % itemname) # prefer string value unless all are None # are we a looped value? if not isinstance(s,(tuple,list)) or isinstance(s,StarList): if not_none(s): return s,False #a string value else: return v,not isinstance(v,StarList) #a StarList is not calculation-ready elif not_none(s): return s,False #a list of string values else: if len(v)>0: return v,not isinstance(v[0],StarList) return v,True def CreateLoop(self,datanames,order=-1,length_check=True): """Create a loop in the datablock. `datanames` is a list of datanames that together form a loop. If length_check is True, they should have been initialised in the block to have the same number of elements (possibly 0). If `order` is given, the loop will appear at this position in the block when printing out. A loop counts as a single position.""" if length_check: # check lengths: these datanames should exist listed_values = [a for a in datanames if isinstance(self[a],list) and not isinstance(self[a],StarList)] if len(listed_values) == len(datanames): len_set = set([len(self[a]) for a in datanames]) if len(len_set)>1: raise ValueError('Request to loop datanames %s with different lengths: %s' % (repr( datanames ),repr( len_set ))) elif len(listed_values) != 0: raise ValueError('Request to loop datanames where some are single values and some are not') # store as lower case lc_datanames = [d.lower() for d in datanames] # remove these datanames from all other loops [self.loops[a].remove(b) for a in self.loops for b in lc_datanames if b in self.loops[a]] # remove empty loops empty_loops = [a for a in self.loops.keys() if len(self.loops[a])==0] for a in empty_loops: self.item_order.remove(a) del self.loops[a] if len(self.loops)>0: loopno = max(self.loops.keys()) + 1 else: loopno = 1 self.loops[loopno] = list(lc_datanames) if order >= 0: self.item_order.insert(order,loopno) else: self.item_order.append(loopno) # remove these datanames from item ordering self.item_order = [a for a in self.item_order if a not in lc_datanames] def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise StarLengthError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass def FindLoop(self,keyname): """Find the loop that contains `keyname` and return its numerical index or -1 if not present. The numerical index can be used to refer to the loop in other routines.""" loop_no = [a for a in self.loops.keys() if keyname.lower() in self.loops[a]] if len(loop_no)>0: return loop_no[0] else: return -1 def GetLoop(self,keyname): """Return a `StarFile.LoopBlock` object constructed from the loop containing `keyname`. `keyname` is only significant as a way to specify the loop.""" return LoopBlock(self,keyname) def GetLoopNames(self,keyname): if keyname in self: return self.keys() for aloop in self.loops: try: return aloop.GetLoopNames(keyname) except KeyError: pass raise KeyError('Item does not exist') def GetLoopNames(self,keyname): """Return all datanames appearing together with `keyname`""" loop_no = self.FindLoop(keyname) if loop_no >= 0: return self.loops[loop_no] else: raise KeyError('%s is not in any loop' % keyname) def AddLoopName(self,oldname, newname): """Add `newname` to the loop containing `oldname`. If it is already in the new loop, no error is raised. If `newname` is in a different loop, it is removed from that loop. The number of values associated with `newname` must match the number of values associated with all other columns of the new loop or a `ValueError` will be raised.""" lower_newname = newname.lower() loop_no = self.FindLoop(oldname) if loop_no < 0: raise KeyError('%s not in loop' % oldname) if lower_newname in self.loops[loop_no]: return # check length old_provides = self.provide_value self.provide_value = False loop_len = len(self[oldname]) self.provide_value = old_provides if len(self[newname]) != loop_len: raise StarLengthError('Mismatch of loop column lengths for %s: should be %d' % (newname,loop_len)) # remove from any other loops [self.loops[a].remove(lower_newname) for a in self.loops if lower_newname in self.loops[a]] # and add to this loop self.loops[loop_no].append(lower_newname) # remove from item_order if present try: self.item_order.remove(lower_newname) except ValueError: pass def AddToLoop(self,dataname,loopdata): thisloop = self.GetLoop(dataname) for itemname,itemvalue in loopdata.items(): thisloop[itemname] = itemvalue def AddToLoop(self,dataname,loopdata): """*Deprecated*. Use `AddItem` followed by calls to `AddLoopName`. Add multiple columns to the loop containing `dataname`. `loopdata` is a collection of (key,value) pairs, where `key` is the new dataname and `value` is a list of values for that dataname""" self.update(loopdata) for one_name in loopdata: self.AddLoopName(dataname,one_name) def RemoveKeyedPacket(self,keyname,keyvalue): """Remove the packet for which dataname `keyname` takes value `keyvalue`. Only the first such occurrence is removed.""" packet_coord = list(self[keyname]).index(keyvalue) loopnames = self.GetLoopNames(keyname) for dataname in loopnames: self.block[dataname][0] = list(self.block[dataname][0]) del self.block[dataname][0][packet_coord] self.block[dataname][1] = list(self.block[dataname][1]) del self.block[dataname][1][packet_coord] def GetKeyedPacket(self,keyname,keyvalue,no_case=False): """Return the loop packet (a `StarPacket` object) where `keyname` has value `keyvalue`. Ignore case in `keyvalue` if `no_case` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" my_loop = self.GetLoop(keyname) #print("Looking for %s in %s" % (keyvalue, my_loop.parent_block)) #print('Packet check on:' + keyname) #[print(repr(getattr(a,keyname))) for a in my_loop] if no_case: one_pack= [a for a in my_loop if getattr(a,keyname).lower()==keyvalue.lower()] else: one_pack= [a for a in my_loop if getattr(a,keyname)==keyvalue] if len(one_pack)!=1: raise ValueError("Bad packet key %s = %s: returned %d packets" % (keyname,keyvalue,len(one_pack))) print("Keyed packet: %s" % one_pack[0]) return one_pack[0] def GetCompoundKeyedPacket(self,keydict): """Return the loop packet (a `StarPacket` object) where the `{key:(value,caseless)}` pairs in `keydict` take the appropriate values. Ignore case for a given `key` if `caseless` is True. `ValueError` is raised if no packet is found or more than one packet is found.""" #print "Looking for %s in %s" % (keyvalue, self.parent_block[keyname]) keynames = list(keydict.keys()) my_loop = self.GetLoop(keynames[0]) for one_key in keynames: keyval,no_case = keydict[one_key] if no_case: my_loop = list([a for a in my_loop if str(getattr(a,one_key)).lower()==str(keyval).lower()]) else: my_loop = list([a for a in my_loop if getattr(a,one_key)==keyval]) if len(my_loop)!=1: raise ValueError("Bad packet keys %s: returned %d packets" % (repr(keydict),len(my_loop))) print("Compound keyed packet: %s" % my_loop[0]) return my_loop[0] def GetKeyedSemanticPacket(self,keyvalue,cat_id): """Return a complete packet for category `cat_id` where the category key for the category equals `keyvalue`. This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from both categories.""" target_keys = self.dictionary.cat_key_table[cat_id] target_keys = [k[0] for k in target_keys] #one only in each list p = StarPacket() # set case-sensitivity flag lcase = False if self.dictionary[target_keys[0]]['_type.contents'] in ['Code','Tag','Name']: lcase = True for cat_key in target_keys: try: extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except KeyError: #missing key try: test_key = self[cat_key] #generate key if possible print('Test key is %s' % repr( test_key )) if test_key is not None and\ not (isinstance(test_key,list) and (None in test_key or len(test_key)==0)): print('Getting packet for key %s' % repr( keyvalue )) extra_packet = self.GetKeyedPacket(cat_key,keyvalue,no_case=lcase) except: #cannot be generated continue except ValueError: #none/more than one, assume none continue #extra_packet = self.dictionary.generate_default_packet(cat_id,cat_key,keyvalue) p.merge_packet(extra_packet) # the following attributes used to calculate missing values for keyname in target_keys: if hasattr(p,keyname): p.key = [keyname] break if not hasattr(p,"key"): raise ValueError("No key found for %s, packet is %s" % (cat_id,str(p))) p.cif_dictionary = self.dictionary p.fulldata = self return p def GetMultiKeyedSemanticPacket(self,keydict,cat_id): """Return a complete packet for category `cat_id` where the keyvalues are provided as a dictionary of key:(value,caseless) pairs This routine will understand any joined loops, so if separate loops in the datafile belong to the same category hierarchy (e.g. `_atom_site` and `_atom_site_aniso`), the returned `StarPacket` object will contain datanames from the requested category and any children.""" #if len(keyvalues)==1: #simplification # return self.GetKeyedSemanticPacket(keydict[1][0],cat_id) target_keys = self.dictionary.cat_key_table[cat_id] # update the dictionary passed to us with all equivalents, for # simplicity. parallel_keys = list(zip(*target_keys)) #transpose print('Parallel keys:' + repr(parallel_keys)) print('Keydict:' + repr(keydict)) start_keys = list(keydict.keys()) for one_name in start_keys: key_set = [a for a in parallel_keys if one_name in a] for one_key in key_set: keydict[one_key] = keydict[one_name] # target_keys is a list of lists, each of which is a compound key p = StarPacket() # a little function to return the dataname for a key def find_key(key): for one_key in self.dictionary.key_equivs.get(key,[])+[key]: if self.has_key(one_key): return one_key return None for one_set in target_keys: #loop down the categories true_keys = [find_key(k) for k in one_set] true_keys = [k for k in true_keys if k is not None] if len(true_keys)==len(one_set): truekeydict = dict([(t,keydict[k]) for t,k in zip(true_keys,one_set)]) try: extra_packet = self.GetCompoundKeyedPacket(truekeydict) except KeyError: #one or more are missing continue #should try harder? except ValueError: continue else: continue print('Merging packet for keys ' + repr(one_set)) p.merge_packet(extra_packet) # the following attributes used to calculate missing values p.key = true_keys p.cif_dictionary = self.dictionary p.fulldata = self return p def set_grammar(self,new_grammar): self.string_delimiters = ["'",'"',"\n;",None] if new_grammar in ['STAR2','2.0']: self.string_delimiters += ['"""',"'''"] if new_grammar == '2.0': self.list_delimiter = " " elif new_grammar == 'STAR2': self.list_delimiter = ", " elif new_grammar not in ['1.0','1.1']: raise StarError('Request to set unknown grammar %s' % new_grammar) def SetOutputLength(self,wraplength=80,maxoutlength=2048): """Set the maximum output line length (`maxoutlength`) and the line length to wrap at (`wraplength`). The wrap length is a target only and may not always be possible.""" if wraplength > maxoutlength: raise StarError("Wrap length (requested %d) must be <= Maximum line length (requested %d)" % (wraplength,maxoutlength)) self.wraplength = wraplength self.maxoutlength = maxoutlength def printsection(self,instring='',blockstart="",blockend="",indent=0,finish_at='',start_from=''): self.provide_value = False # first make an ordering self.create_ordering(finish_at,start_from) #create self.output_order # now do it... if not instring: outstring = CIFStringIO(target_width=80) # the returned string else: outstring = instring # print block delimiter outstring.write(blockstart,canbreak=True) while len(self.output_order)>0: #print "Remaining to output " + `self.output_order` itemname = self.output_order.pop(0) if not isinstance(itemname,int): #no loop item_spec = [i for i in self.formatting_hints if i['dataname'].lower()==itemname.lower()] if len(item_spec)>0: item_spec = item_spec[0] col_pos = item_spec.get('column',-1) name_pos = item_spec.get('name_pos',-1) else: col_pos = -1 item_spec = {} name_pos = -1 if col_pos < 0: col_pos = 40 outstring.set_tab(col_pos) itemvalue = self[itemname] outstring.write(self.true_case[itemname],mustbreak=True,do_tab=False,startcol=name_pos) outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) #space after itemname self.format_value(itemvalue,outstring,hints=item_spec) else:# we are asked to print a loop block outstring.set_tab(10) #guess this is OK? loop_spec = [i['name_pos'] for i in self.formatting_hints if i["dataname"]=='loop'] if loop_spec: loop_indent = max(loop_spec[0],0) else: loop_indent = indent outstring.write('loop_\n',mustbreak=True,do_tab=False,startcol=loop_indent) self.format_names(outstring,indent+2,loop_no=itemname) self.format_packets(outstring,indent+2,loop_no=itemname) else: returnstring = outstring.getvalue() outstring.close() return returnstring def format_names(self,outstring,indent=0,loop_no=-1): """Print datanames from `loop_no` one per line""" temp_order = self.loops[loop_no][:] #copy format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in temp_order]) while len(temp_order)>0: itemname = temp_order.pop(0) req_indent = format_hints.get(itemname,{}).get('name_pos',indent) outstring.write(' ' * req_indent,do_tab=False) outstring.write(self.true_case[itemname],do_tab=False) outstring.write("\n",do_tab=False) def format_packets(self,outstring,indent=0,loop_no=-1): alldata = [self[a] for a in self.loops[loop_no]] loopnames = self.loops[loop_no] #print 'Alldata: %s' % `alldata` packet_data = list(zip(*alldata)) #print 'Packet data: %s' % `packet_data` #create a dictionary for quick lookup of formatting requirements format_hints = dict([(i['dataname'],i) for i in self.formatting_hints if i['dataname'] in loopnames]) for position in range(len(packet_data)): if position > 0: outstring.write("\n") #new line each packet except first for point in range(len(packet_data[position])): datapoint = packet_data[position][point] format_hint = format_hints.get(loopnames[point],{}) packstring = self.format_packet_item(datapoint,indent,outstring,format_hint) outstring.write(' ',canbreak=True,do_tab=False,delimiter=True) def format_packet_item(self,pack_item,indent,outstring,format_hint): # print 'Formatting %s' % `pack_item` # temporary check for any non-unicode items if isinstance(pack_item,str) and not isinstance(pack_item,unicode): raise StarError("Item {0!r} is not unicode".format(pack_item)) if isinstance(pack_item,unicode): delimiter = format_hint.get('delimiter',None) startcol = format_hint.get('column',-1) outstring.write(self._formatstring(pack_item,delimiter=delimiter),startcol=startcol) else: self.format_value(pack_item,outstring,hints = format_hint) def _formatstring(self,instring,delimiter=None,standard='CIF1',indent=0,hints={}): if hints.get("reformat",False) and "\n" in instring: instring = "\n"+self.do_wrapping(instring,hints["reformat_indent"]) allowed_delimiters = set(self.string_delimiters) if len(instring)==0: allowed_delimiters.difference_update([None]) if len(instring) > (self.maxoutlength-2) or '\n' in instring: allowed_delimiters.intersection_update(["\n;","'''",'"""']) if ' ' in instring or '\t' in instring or '\v' in instring or (len(instring)>0 and instring[0] in '_$#;([{') or ',' in instring: allowed_delimiters.difference_update([None]) if len(instring)>3 and (instring[:4].lower()=='data' or instring[:4].lower()=='save'): allowed_delimiters.difference_update([None]) if len(instring)>5 and instring[:6].lower()=='global': allowed_delimiters.difference_update([None]) if '"' in instring: allowed_delimiters.difference_update(['"',None]) if "'" in instring: allowed_delimiters.difference_update(["'",None]) out_delimiter = "\n;" #default (most conservative) if delimiter in allowed_delimiters: out_delimiter = delimiter elif "'" in allowed_delimiters: out_delimiter = "'" elif '"' in allowed_delimiters: out_delimiter = '"' if out_delimiter in ['"',"'",'"""',"'''"]: return out_delimiter + instring + out_delimiter elif out_delimiter is None: return instring # we are left with semicolon strings # use our protocols: maxlinelength = max([len(a) for a in instring.split('\n')]) if maxlinelength > self.maxoutlength: protocol_string = apply_line_folding(instring) else: protocol_string = instring # now check for embedded delimiters if "\n;" in protocol_string: prefix = "CIF:" while prefix in protocol_string: prefix = prefix + ":" protocol_string = apply_line_prefix(protocol_string,prefix+"> ") return "\n;" + protocol_string + "\n;" def format_value(self,itemvalue,stringsink,compound=False,hints={}): """Format a Star data value""" global have_numpy delimiter = hints.get('delimiter',None) startcol = hints.get('column',-1) if isinstance(itemvalue,str) and not isinstance(itemvalue,unicode): #not allowed raise StarError("Non-unicode value {0} found in block".format(itemvalue)) if isinstance(itemvalue,unicode): #need to sanitize stringsink.write(self._formatstring(itemvalue,delimiter=delimiter,hints=hints),canbreak = True,startcol=startcol) elif isinstance(itemvalue,(list)) or (hasattr(itemvalue,'dtype') and hasattr(itemvalue,'__iter__')): #numpy stringsink.set_tab(0) stringsink.write('[',canbreak=True,newindent=True,mustbreak=compound,startcol=startcol) if len(itemvalue)>0: self.format_value(itemvalue[0],stringsink) for listval in itemvalue[1:]: # print 'Formatting %s' % `listval` stringsink.write(self.list_delimiter,do_tab=False) self.format_value(listval,stringsink,compound=True) stringsink.write(']',unindent=True) elif isinstance(itemvalue,dict): stringsink.set_tab(0) stringsink.write('{',newindent=True,mustbreak=compound,startcol=startcol) #start a new line inside items = list(itemvalue.items()) if len(items)>0: stringsink.write("'"+items[0][0]+"'"+':',canbreak=True) self.format_value(items[0][1],stringsink) for key,value in items[1:]: stringsink.write(self.list_delimiter) stringsink.write("'"+key+"'"+":",canbreak=True) self.format_value(value,stringsink) #never break between key and value stringsink.write('}',unindent=True) elif isinstance(itemvalue,(float,int)) or \ (have_numpy and isinstance(itemvalue,(numpy.number))): #TODO - handle uncertainties stringsink.write(str(itemvalue),canbreak=True,startcol=startcol) #numbers else: raise ValueError('Value in unexpected format for output: %s' % repr( itemvalue )) def create_ordering(self,finish_at,start_from): """Create a canonical ordering that includes loops using our formatting hints dictionary""" requested_order = list([i['dataname'] for i in self.formatting_hints if i['dataname']!='loop']) new_order = [] for item in requested_order: if isinstance(item,unicode) and item.lower() in self.item_order: new_order.append(item.lower()) elif item in self: #in a loop somewhere target_loop = self.FindLoop(item) if target_loop not in new_order: new_order.append(target_loop) # adjust loop name order loopnames = self.loops[target_loop] loop_order = [i for i in requested_order if i in loopnames] unordered = [i for i in loopnames if i not in loop_order] self.loops[target_loop] = loop_order + unordered extras = list([i for i in self.item_order if i not in new_order]) self.output_order = new_order + extras # now handle partial output if start_from != '': if start_from in requested_order: sfi = requested_order.index(start_from) loop_order = [self.FindLoop(k) for k in requested_order[sfi:] if self.FindLoop(k)>0] candidates = list([k for k in self.output_order if k in requested_order[sfi:]]) cand_pos = len(new_order) if len(candidates)>0: cand_pos = self.output_order.index(candidates[0]) if len(loop_order)>0: cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) if cand_pos < len(self.output_order): print('Output starts from %s, requested %s' % (self.output_order[cand_pos],start_from)) self.output_order = self.output_order[cand_pos:] else: print('Start is beyond end of output list') self.output_order = [] elif start_from in extras: self.output_order = self.output_order[self.output_order.index(start_from):] else: self.output_order = [] if finish_at != '': if finish_at in requested_order: fai = requested_order.index(finish_at) loop_order = list([self.FindLoop(k) for k in requested_order[fai:] if self.FindLoop(k)>0]) candidates = list([k for k in self.output_order if k in requested_order[fai:]]) cand_pos = len(new_order) if len(candidates)>0: cand_pos = self.output_order.index(candidates[0]) if len(loop_order)>0: cand_pos = min(cand_pos,self.output_order.index(loop_order[0])) if cand_pos < len(self.output_order): print('Output finishes before %s, requested before %s' % (self.output_order[cand_pos],finish_at)) self.output_order = self.output_order[:cand_pos] else: print('All of block output') elif finish_at in extras: self.output_order = self.output_order[:self.output_order.index(finish_at)] #print('Final order: ' + repr(self.output_order)) def convert_to_string(self,dataname): """Convert values held in dataname value fork to string version""" v,is_value = self.GetFullItemValue(dataname) if not is_value: return v if check_stringiness(v): return v #already strings # TODO...something else return v def do_wrapping(self,instring,indent=3): """Wrap the provided string""" if " " in instring: #already formatted return instring self.wrapper.initial_indent = ' '*indent self.wrapper.subsequent_indent = ' '*indent # remove leading and trailing space instring = instring.strip() # split into paragraphs paras = instring.split("\n\n") wrapped_paras = [self.wrapper.fill(p) for p in paras] return "\n".join(wrapped_paras) def merge(self,new_block,mode="strict",match_att=[],match_function=None, rel_keys = []): if mode == 'strict': for key in new_block.keys(): if key in self and key not in match_att: raise StarError( "Identical keys %s in strict merge mode" % key) elif key not in match_att: #a new dataname self[key] = new_block[key] # we get here if there are no keys in common, so we can now copy # the loops and not worry about overlaps for one_loop in new_block.loops.values(): self.CreateLoop(one_loop) # we have lost case information self.true_case.update(new_block.true_case) elif mode == 'replace': newkeys = list(new_block.keys()) for ma in match_att: try: newkeys.remove(ma) #don't touch the special ones except ValueError: pass for key in new_block.keys(): if isinstance(key,unicode): self[key] = new_block[key] # creating the loop will remove items from other loops for one_loop in new_block.loops.values(): self.CreateLoop(one_loop) # we have lost case information self.true_case.update(new_block.true_case) elif mode == 'overlay': print('Overlay mode, current overwrite is %s' % self.overwrite) raise StarError('Overlay block merge mode not implemented') save_overwrite = self.overwrite self.overwrite = True for attribute in new_block.keys(): if attribute in match_att: continue #ignore this one new_value = new_block[attribute] #non-looped items if new_block.FindLoop(attribute)<0: #not looped self[attribute] = new_value my_loops = self.loops.values() perfect_overlaps = [a for a in new_block.loops if a in my_loops] for po in perfect_overlaps: loop_keys = [a for a in po if a in rel_keys] #do we have a key? try: newkeypos = map(lambda a:newkeys.index(a),loop_keys) newkeypos = newkeypos[0] #one key per loop for now loop_keys = loop_keys[0] except (ValueError,IndexError): newkeypos = [] overlap_data = map(lambda a:listify(self[a]),overlaps) #old packet data new_data = map(lambda a:new_block[a],overlaps) #new packet data packet_data = transpose(overlap_data) new_p_data = transpose(new_data) # remove any packets for which the keys match between old and new; we # make the arbitrary choice that the old data stays if newkeypos: # get matching values in new list print("Old, new data:\n%s\n%s" % (repr(overlap_data[newkeypos]),repr(new_data[newkeypos]))) key_matches = filter(lambda a:a in overlap_data[newkeypos],new_data[newkeypos]) # filter out any new data with these key values new_p_data = filter(lambda a:a[newkeypos] not in key_matches,new_p_data) if new_p_data: new_data = transpose(new_p_data) else: new_data = [] # wipe out the old data and enter the new stuff byebyeloop = self.GetLoop(overlaps[0]) # print("Removing '%r' with overlaps '%r'" % (byebyeloop, overlaps)) # Note that if, in the original dictionary, overlaps are not # looped, GetLoop will return the block itself. So we check # for this case... if byebyeloop != self: self.remove_loop(byebyeloop) self.AddLoopItem((overlaps,overlap_data)) #adding old packets for pd in new_p_data: #adding new packets if pd not in packet_data: for i in range(len(overlaps)): #don't do this at home; we are appending #to something in place self[overlaps[i]].append(pd[i]) self.overwrite = save_overwrite def assign_dictionary(self,dic): if not dic.diclang=="DDLm": print("Warning: ignoring dictionary %s" % dic.my_uri) return self.dictionary = dic def unassign_dictionary(self): """Remove dictionary-dependent behaviour""" self.dictionary = None class StarPacket(list): def merge_packet(self,incoming): """Merge contents of incoming packet with this packet""" new_attrs = [a for a in dir(incoming) if a[0] == '_' and a[1] != "_"] self.extend(incoming) for na in new_attrs: setattr(self,na,getattr(incoming,na)) def __getattr__(self,att_name): """Derive a missing attribute""" if att_name.lower() in self.__dict__: return getattr(self,att_name.lower()) if att_name in ('cif_dictionary','fulldata','key'): raise AttributeError('Programming error: can only assign value of %s' % att_name) d = self.cif_dictionary c = self.fulldata k = self.key assert isinstance(k,list) d.derive_item(att_name,c,store_value=True) # # now pick out the new value # self.key is a list of the key values keydict = dict([(v,(getattr(self,v),True)) for v in k]) full_pack = c.GetCompoundKeyedPacket(keydict) return getattr(full_pack,att_name) class BlockCollection(object): """A container for StarBlock objects. The constructor takes one non-keyword argument `datasource` to set the initial data. If `datasource` is a Python dictionary, the values must be `StarBlock` objects and the keys will be blocknames in the new object. Keyword arguments: standard: `CIF` or `Dic`. `CIF` enforces 75-character blocknames, and will print block contents before that block's save frame. blocktype: The type of blocks held in this container. Normally `StarBlock` or `CifBlock`. characterset: `ascii` or `unicode`. Blocknames and datanames appearing within blocks are restricted to the appropriate characterset. Note that only characters in the basic multilingual plane are accepted. This restriction will be lifted when PyCIFRW is ported to Python3. scoping: `instance` or `dictionary`: `instance` implies that save frames are hidden from save frames lower in the hierarchy or in sibling hierarchies. `dictionary` makes all save frames visible everywhere within a data block. This setting is only relevant for STAR2 dictionaries and STAR2 data files, as save frames are currently not used in plain CIF data files. """ def __init__(self,datasource=None,standard='CIF',blocktype = StarBlock, characterset='ascii',scoping='instance',**kwargs): import collections self.dictionary = {} self.standard = standard self.lower_keys = set() # short_cuts self.renamed = {} self.PC = collections.namedtuple('PC',['block_id','parent']) self.child_table = {} self.visible_keys = [] # for efficiency self.block_input_order = [] # to output in same order self.scoping = scoping #will trigger setting of child table self.blocktype = blocktype self.master_template = {} #for outputting self.set_grammar('2.0') self.set_characterset(characterset) if isinstance(datasource,BlockCollection): self.merge_fast(datasource) self.scoping = scoping #reset visibility elif isinstance(datasource,dict): for key,value in datasource.items(): self[key]= value self.header_comment = '' def set_grammar(self,new_grammar): """Set the syntax and grammar for output to `new_grammar`""" if new_grammar not in ['1.1','1.0','2.0','STAR2']: raise StarError('Unrecognised output grammar %s' % new_grammar) self.grammar = new_grammar def set_characterset(self,characterset): """Set the allowed characters for datanames and datablocks: may be `ascii` or `unicode`. If datanames have already been added to any datablocks, they are not checked.""" self.characterset = characterset for one_block in self.lower_keys: self[one_block].set_characterset(characterset) def unlock(self): """Allow overwriting of all blocks in this collection""" for a in self.lower_keys: self[a].overwrite=True def lock(self): """Disallow overwriting for all blocks in this collection""" for a in self.lower_keys: self[a].overwrite = False def __str__(self): return self.WriteOut() def __setitem__(self,key,value): self.NewBlock(key,value,parent=None) def __getitem__(self,key): if isinstance(key,(unicode,str)): lowerkey = key.lower() if lowerkey in self.lower_keys: return self.dictionary[lowerkey] #print 'Visible keys:' + `self.visible_keys` #print 'All keys' + `self.lower_keys` #print 'Child table' + `self.child_table` raise KeyError('No such item %s' % key) # we have to get an ordered list of the current keys, # as we'll have to delete one of them anyway. # Deletion will delete any key regardless of visibility def __delitem__(self,key): dummy = self[key] #raise error if not present lowerkey = key.lower() # get rid of all children recursively as well children = [a[0] for a in self.child_table.items() if a[1].parent == lowerkey] for child in children: del self[child] #recursive call del self.dictionary[lowerkey] del self.child_table[lowerkey] try: self.visible_keys.remove(lowerkey) except KeyError: pass self.lower_keys.remove(lowerkey) self.block_input_order.remove(lowerkey) def __len__(self): return len(self.visible_keys) def __contains__(self,item): """Support the 'in' operator""" if not isinstance(item,(unicode,str)): return False if item.lower() in self.visible_keys: return True return False # We iterate over all visible def __iter__(self): for one_block in self.keys(): yield self[one_block] # TODO: handle different case def keys(self): return self.visible_keys # Note that has_key does not exist in 3.5 def has_key(self,key): return key in self def get(self,key,default=None): if key in self: # take account of case return self.__getitem__(key) else: return default def clear(self): self.dictionary.clear() self.lower_keys = set() self.child_table = {} self.visible_keys = [] self.block_input_order = [] def copy(self): newcopy = self.dictionary.copy() #all blocks for k,v in self.dictionary.items(): newcopy[k] = v.copy() newcopy = BlockCollection(newcopy) newcopy.child_table = self.child_table.copy() newcopy.lower_keys = self.lower_keys.copy() newcopy.block_input_order = self.block_input_order.copy() newcopy.characterset = self.characterset newcopy.SetTemplate(self.master_template.copy()) newcopy.scoping = self.scoping #this sets visible keys return newcopy def update(self,adict): for key in adict.keys(): self[key] = adict[key] def items(self): return [(a,self[a]) for a in self.keys()] def first_block(self): """Return the 'first' block. This is not necessarily the first block in the file.""" if self.keys(): return self[self.keys()[0]] def NewBlock(self,blockname,blockcontents=None,fix=True,parent=None): """Add a new block named `blockname` with contents `blockcontents`. If `fix` is True, `blockname` will have spaces and tabs replaced by underscores. `parent` allows a parent block to be set so that block hierarchies can be created. Depending on the output standard, these blocks will be printed out as nested save frames or ignored.""" if blockcontents is None: blockcontents = self.blocktype() if self.standard == "CIF": blockcontents.setmaxnamelength(75) if len(blockname)>75: raise StarError('Blockname %s is longer than 75 characters' % blockname) if fix: newblockname = re.sub('[ \t]','_',blockname) else: newblockname = blockname new_lowerbn = newblockname.lower() if new_lowerbn in self.lower_keys: #already there if self.standard is not None: toplevelnames = [a[0] for a in self.child_table.items() if a[1].parent==None] if parent is None and new_lowerbn not in toplevelnames: #can give a new key to this one while new_lowerbn in self.lower_keys: new_lowerbn = new_lowerbn + '+' elif parent is not None and new_lowerbn in toplevelnames: #can fix a different one replace_name = new_lowerbn while replace_name in self.lower_keys: replace_name = replace_name + '+' self._rekey(new_lowerbn,replace_name) # now continue on to add in the new block if parent.lower() == new_lowerbn: #the new block's requested parent just got renamed!! parent = replace_name else: raise StarError( "Attempt to replace existing block " + blockname) else: del self[new_lowerbn] self.dictionary.update({new_lowerbn:blockcontents}) self.lower_keys.add(new_lowerbn) self.block_input_order.append(new_lowerbn) if parent is None: self.child_table[new_lowerbn]=self.PC(newblockname,None) self.visible_keys.append(new_lowerbn) else: if parent.lower() in self.lower_keys: if self.scoping == 'instance': self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) else: self.child_table[new_lowerbn]=self.PC(newblockname,parent.lower()) self.visible_keys.append(new_lowerbn) else: print('Warning:Parent block %s does not exist for child %s' % (parent,newblockname)) self[new_lowerbn].set_grammar(self.grammar) self[new_lowerbn].set_characterset(self.characterset) self[new_lowerbn].formatting_hints = self.master_template return new_lowerbn #in case calling routine wants to know def _rekey(self,oldname,newname,block_id=''): """The block with key [[oldname]] gets [[newname]] as a new key, but the printed name does not change unless [[block_id]] is given. Prefer [[rename]] for a safe version.""" move_block = self[oldname] #old block is_visible = oldname in self.visible_keys move_block_info = self.child_table[oldname] #old info move_block_children = [a for a in self.child_table.items() if a[1].parent==oldname] # now rewrite the necessary bits self.child_table.update(dict([(a[0],self.PC(a[1].block_id,newname)) for a in move_block_children])) oldpos = self.block_input_order.index(oldname) del self[oldname] #do this after updating child table so we don't delete children self.dictionary.update({newname:move_block}) self.lower_keys.add(newname) #print 'Block input order was: ' + `self.block_input_order` self.block_input_order[oldpos:oldpos]=[newname] if block_id == '': self.child_table.update({newname:move_block_info}) else: self.child_table.update({newname:self.PC(block_id,move_block_info.parent)}) if is_visible: self.visible_keys += [newname] def rename(self,oldname,newname): """Rename datablock from [[oldname]] to [[newname]]. Both key and printed name are changed. No conformance checks are conducted.""" realoldname = oldname.lower() realnewname = newname.lower() if realnewname in self.lower_keys: raise StarError('Cannot change blockname %s to %s as %s already present' % (oldname,newname,newname)) if realoldname not in self.lower_keys: raise KeyError('Cannot find old block %s' % realoldname) self._rekey(realoldname,realnewname,block_id=newname) def makebc(self,namelist,scoping='dictionary'): """Make a block collection from a list of block names""" newbc = BlockCollection() block_lower = [n.lower() for n in namelist] proto_child_table = [a for a in self.child_table.items() if a[0] in block_lower] newbc.child_table = dict(proto_child_table) new_top_level = [(a[0],self.PC(a[1].block_id,None)) for a in newbc.child_table.items() if a[1].parent not in block_lower] newbc.child_table.update(dict(new_top_level)) newbc.lower_keys = set([a[0] for a in proto_child_table]) newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) newbc.scoping = scoping newbc.block_input_order = block_lower return newbc def merge_fast(self,new_bc,parent=None): """Do a fast merge. WARNING: this may change one or more of its frame headers in order to remove duplicate frames. Please keep a handle to the block object instead of the text of the header.""" if self.standard is None: mode = 'replace' else: mode = 'strict' overlap_flag = not self.lower_keys.isdisjoint(new_bc.lower_keys) if parent is not None: parent_name = [a[0] for a in self.dictionary.items() if a[1] == parent] if len(parent_name)==0 or len(parent_name)>1: raise StarError("Unable to find unique parent block name: have %s" % str(parent_name)) parent_name = parent_name[0] else: parent_name = None #an error will be thrown if we treat as a string if overlap_flag and mode != 'replace': double_keys = self.lower_keys.intersection(new_bc.lower_keys) for dup_key in double_keys: our_parent = self.child_table[dup_key].parent their_parent = new_bc.child_table[dup_key].parent if (our_parent is None and their_parent is not None and parent is None) or\ parent is not None: #rename our block start_key = dup_key while start_key in self.lower_keys: start_key = start_key+'+' self._rekey(dup_key,start_key) if parent_name.lower() == dup_key: #we just renamed the prospective parent! parent_name = start_key elif our_parent is not None and their_parent is None and parent is None: start_key = dup_key while start_key in new_bc.lower_keys: start_key = start_key+'+' new_bc._rekey(dup_key,start_key) else: raise StarError("In strict merge mode:duplicate keys %s" % dup_key) self.dictionary.update(new_bc.dictionary) self.lower_keys.update(new_bc.lower_keys) self.visible_keys += (list(new_bc.lower_keys)) self.block_input_order += new_bc.block_input_order #print('Block input order now:' + repr(self.block_input_order)) self.child_table.update(new_bc.child_table) if parent_name is not None: #redo the child_table entries reparent_list = [(a[0],a[1].block_id) for a in new_bc.child_table.items() if a[1].parent==None] reparent_dict = [(a[0],self.PC(a[1],parent_name.lower())) for a in reparent_list] self.child_table.update(dict(reparent_dict)) def merge(self,new_bc,mode=None,parent=None,single_block=[], idblock="",match_att=[],match_function=None): if mode is None: if self.standard is None: mode = 'replace' else: mode = 'strict' if single_block: self[single_block[0]].merge(new_bc[single_block[1]],mode, match_att=match_att, match_function=match_function) return None base_keys = [a[1].block_id for a in self.child_table.items()] block_to_item = base_keys #default new_keys = [a[1].block_id for a in new_bc.child_table.items()] #get list of incoming blocks if match_att: #make a blockname -> item name map if match_function: block_to_item = [match_function(self[a]) for a in self.keys()] else: block_to_item = [self[a].get(match_att[0],None) for a in self.keys()] #print `block_to_item` for key in new_keys: #run over incoming blocknames if key == idblock: continue #skip dictionary id basekey = key #default value if len(match_att)>0: attval = new_bc[key].get(match_att[0],0) #0 if ignoring matching else: attval = 0 for ii in range(len(block_to_item)): #do this way to get looped names thisatt = block_to_item[ii] #keyname in old block #print "Looking for %s in %s" % (attval,thisatt) if attval == thisatt or \ (isinstance(thisatt,list) and attval in thisatt): basekey = base_keys.pop(ii) block_to_item.remove(thisatt) break if not basekey in self or mode=="replace": new_parent = new_bc.get_parent(key) if parent is not None and new_parent is None: new_parent = parent self.NewBlock(basekey,new_bc[key],parent=new_parent) #add the block else: if mode=="strict": raise StarError( "In strict merge mode: block %s in old and block %s in new files" % (basekey,key)) elif mode=="overlay": # print "Merging block %s with %s" % (basekey,key) self[basekey].merge(new_bc[key],mode,match_att=match_att) else: raise StarError( "Merge called with unknown mode %s" % mode) def checknamelengths(self,target_block,maxlength=-1): if maxlength < 0: return else: toolong = [a for a in target_block.keys() if len(a)>maxlength] outstring = "" if toolong: outstring = "\n".join(toolong) raise StarError( 'Following data names too long:' + outstring) def get_all(self,item_name): raw_values = [self[a].get(item_name) for a in self.keys()] raw_values = [a for a in raw_values if a != None] ret_vals = [] for rv in raw_values: if isinstance(rv,list): for rvv in rv: if rvv not in ret_vals: ret_vals.append(rvv) else: if rv not in ret_vals: ret_vals.append(rv) return ret_vals def __setattr__(self,attr_name,newval): if attr_name == 'scoping': if newval not in ('dictionary','instance'): raise StarError("Star file may only have 'dictionary' or 'instance' scoping, not %s" % newval) if newval == 'dictionary': self.visible_keys = [a for a in self.lower_keys] else: #only top-level datablocks visible self.visible_keys = [a[0] for a in self.child_table.items() if a[1].parent==None] object.__setattr__(self,attr_name,newval) def get_parent(self,blockname): """Return the name of the block enclosing [[blockname]] in canonical form (lower case)""" possibles = (a for a in self.child_table.items() if a[0] == blockname.lower()) try: first = next(possibles) #get first one except: raise StarError('no parent for %s' % blockname) try: second = next(possibles) except StopIteration: return first[1].parent raise StarError('More than one parent for %s' % blockname) def get_roots(self): """Get the top-level blocks""" return [a for a in self.child_table.items() if a[1].parent==None] def get_children(self,blockname,include_parent=False,scoping='dictionary'): """Get all children of [[blockname]] as a block collection. If [[include_parent]] is True, the parent block will also be included in the block collection as the root.""" newbc = BlockCollection() block_lower = blockname.lower() proto_child_table = [a for a in self.child_table.items() if self.is_child_of_parent(block_lower,a[1].block_id)] newbc.child_table = dict(proto_child_table) if not include_parent: newbc.child_table.update(dict([(a[0],self.PC(a[1].block_id,None)) for a in proto_child_table if a[1].parent == block_lower])) newbc.lower_keys = set([a[0] for a in proto_child_table]) newbc.dictionary = dict((a[0],self.dictionary[a[0]]) for a in proto_child_table) if include_parent: newbc.child_table.update({block_lower:self.PC(self.child_table[block_lower].block_id,None)}) newbc.lower_keys.add(block_lower) newbc.dictionary.update({block_lower:self.dictionary[block_lower]}) newbc.scoping = scoping return newbc def get_immediate_children(self,parentname): """Get the next level of children of the given block as a list, without nested levels""" child_handles = [a for a in self.child_table.items() if a[1].parent == parentname.lower()] return child_handles # This takes time def get_child_list(self,parentname): """Get a list of all child categories in alphabetical order""" child_handles = [a[0] for a in self.child_table.items() if self.is_child_of_parent(parentname.lower(),a[0])] child_handles.sort() return child_handles def is_child_of_parent(self,parentname,blockname): """Return `True` if `blockname` is a child of `parentname`""" checkname = parentname.lower() more_children = [a[0] for a in self.child_table.items() if a[1].parent == checkname] if blockname.lower() in more_children: return True else: for one_child in more_children: if self.is_child_of_parent(one_child,blockname): return True return False def set_parent(self,parentname,childname): """Set the parent block""" # first check that both blocks exist if parentname.lower() not in self.lower_keys: raise KeyError('Parent block %s does not exist' % parentname) if childname.lower() not in self.lower_keys: raise KeyError('Child block %s does not exist' % childname) old_entry = self.child_table[childname.lower()] self.child_table[childname.lower()]=self.PC(old_entry.block_id, parentname.lower()) self.scoping = self.scoping #reset visibility def SetTemplate(self,template_file): """Use `template_file` as a template for all block output""" self.master_template = process_template(template_file) for b in self.dictionary.values(): b.formatting_hints = self.master_template def WriteOut(self,comment='',wraplength=80,maxoutlength=0,blockorder=None,saves_after=None): """Return the contents of this file as a string, wrapping if possible at `wraplength` characters and restricting maximum line length to `maxoutlength`. Delimiters and save frame nesting are controlled by `self.grammar`. If `blockorder` is provided, blocks are output in this order unless nested save frames have been requested (STAR2). The default block order is the order in which blocks were input. `saves_after` inserts all save frames after the given dataname, which allows less important items to appear later. Useful in conjunction with a template for dictionary files.""" if maxoutlength != 0: self.SetOutputLength(maxoutlength) if not comment: comment = self.header_comment outstring = StringIO() if self.grammar == "2.0" and comment[0:10] != r"#\#CIF_2.0": outstring.write(r"#\#CIF_2.0" + "\n") outstring.write(comment) # prepare all blocks for b in self.dictionary.values(): b.set_grammar(self.grammar) b.formatting_hints = self.master_template b.SetOutputLength(wraplength,self.maxoutlength) # loop over top-level # monitor output all_names = list(self.child_table.keys()) #i.e. lower case if blockorder is None: blockorder = self.block_input_order top_block_names = [(a,self.child_table[a].block_id) for a in blockorder if self.child_table[a].parent is None] for blockref,blockname in top_block_names: print('Writing %s, ' % blockname + repr(self[blockref])) outstring.write('\n' + 'data_' +blockname+'\n') all_names.remove(blockref) if self.standard == 'Dic': #put contents before save frames outstring.write(self[blockref].printsection(finish_at='_dictionary_valid.application')) if self.grammar == 'STAR2': #nested save frames child_refs = self.get_immediate_children(blockref) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,4) outstring.write('\n' + 'save_'+ '\n') elif self.grammar in ('1.0','1.1','2.0'): #non-nested save frames child_refs = [a for a in blockorder if self.is_child_of_parent(blockref,a)] for child_ref in child_refs: child_name = self.child_table[child_ref].block_id outstring.write('\n\n' + 'save_' + child_name + '\n') outstring.write(str(self[child_ref])) outstring.write('\n\n' + 'save_' + '\n') all_names.remove(child_ref.lower()) else: raise StarError('Grammar %s is not recognised for output' % self.grammar) if self.standard != 'Dic': #put contents after save frames outstring.write(str(self[blockref])) else: outstring.write(self[blockref].printsection(start_from='_dictionary_valid.application')) returnstring = outstring.getvalue() outstring.close() if len(all_names)>0: print('WARNING: following blocks not output: %s' % repr(all_names)) else: print('All blocks output.') return returnstring def block_to_string_nested(self,block_ref,block_id,outstring,indentlevel=0): """Output a complete datablock indexed by [[block_ref]] and named [[block_id]], including children, and syntactically nesting save frames""" child_refs = self.get_immediate_children(block_ref) self[block_ref].set_grammar(self.grammar) if self.standard == 'Dic': outstring.write(str(self[block_ref])) for child_ref,child_info in child_refs: child_name = child_info.block_id outstring.write('\n' + 'save_' + child_name + '\n') self.block_to_string_nested(child_ref,child_name,outstring,indentlevel) outstring.write('\n' + ' '*indentlevel + 'save_' + '\n') if self.standard != 'Dic': outstring.write(str(self[block_ref])) class StarFile(BlockCollection): def __init__(self,datasource=None,maxinlength=-1,maxoutlength=0, scoping='instance',grammar='1.1',scantype='standard', permissive=False,**kwargs): super(StarFile,self).__init__(datasource=datasource,**kwargs) self.my_uri = getattr(datasource,'my_uri','') if maxoutlength == 0: self.maxoutlength = 2048 else: self.maxoutlength = maxoutlength self.scoping = scoping if isinstance(datasource,(unicode,str)) or hasattr(datasource,"read"): ReadStar(datasource,prepared=self,grammar=grammar,scantype=scantype, maxlength = maxinlength,permissive=permissive) self.header_comment = \ """#\\#STAR ########################################################################## # STAR Format file # Produced by PySTARRW module # # This is a STAR file. STAR is a superset of the CIF file type. For # more information, please refer to International Tables for Crystallography, # Volume G, Chapter 2.1 # ########################################################################## """ def set_uri(self,my_uri): self.my_uri = my_uri class CIFStringIO(StringIO): def __init__(self,target_width=80,**kwargs): StringIO.__init__(self,**kwargs) self.currentpos = 0 self.target_width = target_width self.tabwidth = -1 self.indentlist = [0] self.last_char = "" def write(self,outstring,canbreak=False,mustbreak=False,do_tab=True,newindent=False,unindent=False, delimiter=False,startcol=-1): """Write a string with correct linebreak, tabs and indents""" # do we need to break? if delimiter: if len(outstring)>1: raise ValueError('Delimiter %s is longer than one character' % repr( outstring )) output_delimiter = True if mustbreak: #insert a new line and indent temp_string = '\n' + ' ' * self.indentlist[-1] StringIO.write(self,temp_string) self.currentpos = self.indentlist[-1] self.last_char = temp_string[-1] if self.currentpos+len(outstring)>self.target_width: #try to break if not delimiter and outstring[0]!='\n': #ie ; if canbreak: temp_string = '\n' + ' ' * self.indentlist[-1] StringIO.write(self,temp_string) self.currentpos = self.indentlist[-1] self.last_char = temp_string[-1] else: #assume a break will be forced on next value output_delimiter = False #the line break becomes the delimiter #try to match requested column if startcol > 0: if self.currentpos < startcol: StringIO.write(self,(startcol - self.currentpos)* ' ') self.currentpos = startcol self.last_char = ' ' else: print('Could not format %s at column %d as already at %d' % (outstring,startcol,self.currentpos)) startcol = -1 #so that tabbing works as a backup #handle tabs if self.tabwidth >0 and do_tab and startcol < 0: next_stop = ((self.currentpos//self.tabwidth)+1)*self.tabwidth #print 'Currentpos %d: Next tab stop at %d' % (self.currentpos,next_stop) if self.currentpos < next_stop: StringIO.write(self,(next_stop-self.currentpos)*' ') self.currentpos = next_stop self.last_char = ' ' #calculate indentation after tabs and col setting applied if newindent: #indent by current amount if self.indentlist[-1] == 0: #first time self.indentlist.append(self.currentpos) # print 'Indentlist: ' + `self.indentlist` else: self.indentlist.append(self.indentlist[-1]+2) elif unindent: if len(self.indentlist)>1: self.indentlist.pop() else: print('Warning: cannot unindent any further') #check that we still need a delimiter if self.last_char in [' ','\n','\t']: output_delimiter = False #now output the string - every invocation comes through here if (delimiter and output_delimiter) or not delimiter: StringIO.write(self,outstring) last_line_break = outstring.rfind('\n') if last_line_break >=0: self.currentpos = len(outstring)-last_line_break else: self.currentpos = self.currentpos + len(outstring) #remember the last character if len(outstring)>0: self.last_char = outstring[-1] def set_tab(self,tabwidth): """Set the tab stop position""" self.tabwidth = tabwidth class StarError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nStar Format error: '+ self.value class StarLengthError(Exception): def __init__(self,value): self.value = value def __str__(self): return '\nStar length error: ' + self.value class StarDerivationError(Exception): def __init__(self,fail_name): self.fail_name = fail_name def __str__(self): return "Derivation of %s failed, None returned" % self.fail_name # # This is subclassed from AttributeError in order to allow hasattr # to work. # class StarDerivationFailure(AttributeError): def __init__(self,fail_name): self.fail_name = fail_name def __str__(self): return "Derivation of %s failed" % self.fail_name def ReadStar(filename,prepared = None, maxlength=-1, scantype='standard',grammar='STAR2',CBF=False, permissive=False): """ Read in a STAR file, returning the contents in the `prepared` object. * `filename` may be a URL, a file path on the local system, or any object with a `read` method. * `prepared` provides a `StarFile` or `CifFile` object that the contents of `filename` will be added to. * `maxlength` is the maximum allowable line length in the input file. This has been set at 2048 characters for CIF but is unlimited (-1) for STAR files. * `grammar` chooses the STAR grammar variant. `1.0` is the original 1992 CIF/STAR grammar and `1.1` is identical except for the exclusion of square brackets as the first characters in undelimited datanames. `2.0` will read files in the CIF2.0 standard, and `STAR2` will read files according to the STAR2 publication. If grammar is `None` or `auto`, autodetection will be attempted in the order `2.0`, `1.1` and `1.0`. This will always succeed for conformant CIF2.0 files. Note that (nested) save frames are read in all grammar variations and then flagged afterwards if they do not match the requested grammar. * `scantype` can be `standard` or `flex`. `standard` provides pure Python parsing at the cost of a factor of 10 or so in speed. `flex` will tokenise the input CIF file using fast C routines. Note that running PyCIFRW in Jython uses native Java regular expressions to provide a speedup regardless of this argument. * `CBF` flags that the input file is in Crystallographic Binary File format. The binary block is excised from the input data stream before parsing and is not available in the returned object. * `permissive` allows non UTF8 encodings (currently only latin1) in the input file. These are a violation of the standard. """ # save desired scoping save_scoping = prepared.scoping from . import YappsStarParser_1_1 as Y11 from . import YappsStarParser_1_0 as Y10 from . import YappsStarParser_2_0 as Y20 from . import YappsStarParser_STAR2 as YST if prepared is None: prepared = StarFile() if grammar == "auto" or grammar is None: try_list = [('2.0',Y20),('1.1',Y11),('1.0',Y10)] elif grammar == '1.0': try_list = [('1.0',Y10)] elif grammar == '1.1': try_list = [('1.1',Y11)] elif grammar == '2.0': try_list = [('2.0',Y20)] elif grammar == 'STAR2': try_list = [('STAR2',YST)] else: raise AttributeError('Unknown STAR/CIF grammar requested, %s' % repr( grammar )) if isinstance(filename,(unicode,str)): # create an absolute URL relpath = urlparse(filename) if relpath.scheme == "": if not os.path.isabs(filename): fullpath = os.path.join(os.getcwd(),filename) else: fullpath = filename newrel = list(relpath) newrel[0] = "file" newrel[2] = fullpath my_uri = urlunparse(newrel) else: my_uri = urlunparse(relpath) # print("Full URL is: " + my_uri) filestream = urlopen(my_uri) try: text = filestream.read().decode('utf-8-sig') except UnicodeDecodeError: if permissive: text = filestream.read().decode('latin1') print("WARNING: %s violates standard (latin1 encoding instead of UTF8)." % filename) else: raise StarError("%s: bad encoding (must be utf8 or ascii)" % filename) filestream.close() else: filestream = filename #already opened for us text = filestream.read() if not isinstance(text,unicode): try: text = text.decode('utf-8-sig') #CIF is always ascii/utf8 except UnicodeDecodeError: if permissive: text = filestream.read().decode('latin1') print("WARNING: text violates CIF standard (latin1 encoding instead of UTF8)") else: raise StarError("Bad input encoding (must be utf8 or ascii)") my_uri = "" if not text: # empty file, return empty block return prepared.set_uri(my_uri) # filter out non-ASCII characters in CBF files if required. We assume # that the binary is enclosed in a fixed string that occurs # nowhere else. if CBF: text_bits = text.split("-BINARY-FORMAT-SECTION-") text = text_bits[0] for section in range(2,len(text_bits),2): text = text+" (binary omitted)"+text_bits[section] # we recognise ctrl-Z as end of file endoffile = text.find(chr(26)) if endoffile >= 0: text = text[:endoffile] split = text.split('\n') if maxlength > 0: toolong = [a for a in split if len(a)>maxlength] if toolong: pos = split.index(toolong[0]) raise StarError( 'Line %d contains more than %d characters' % (pos+1,maxlength)) # honour the header string if text[:10] != "#\#CIF_2.0" and ('2.0',Y20) in try_list: try_list.remove(('2.0',Y20),) if not try_list: raise StarError('File %s missing CIF2.0 header' % (filename)) for grammar_name,Y in try_list: if scantype == 'standard' or grammar_name in ['2.0','STAR2']: parser = Y.StarParser(Y.StarParserScanner(text)) else: parser = Y.StarParser(Y.yappsrt.Scanner(None,[],text,scantype='flex')) # handle encoding switch if grammar_name in ['2.0','STAR2']: prepared.set_characterset('unicode') else: prepared.set_characterset('ascii') proto_star = None try: proto_star = getattr(parser,"input")(prepared) except Y.yappsrt.YappsSyntaxError as e: input = parser._scanner.input Y.yappsrt.print_error(input, e, parser._scanner) except Y.yappsrt.NoMoreTokens: print('Could not complete parsing; stopped around here:',file=sys.stderr) print(parser._scanner,file=sys.stderr) except ValueError: print('Unexpected error:') import traceback traceback.print_exc() if proto_star is not None: proto_star.set_grammar(grammar_name) #remember for output break if proto_star is None: errorstring = 'Syntax error in input file: last value parsed was %s' % Y.lastval errorstring = errorstring + '\nParser status: %s' % repr( parser._scanner ) raise StarError( errorstring) # set visibility correctly proto_star.scoping = 'dictionary' proto_star.set_uri(my_uri) proto_star.scoping = save_scoping return proto_star def get_dim(dataitem,current=0,packlen=0): zerotypes = [int, float, str] if type(dataitem) in zerotypes: return current, packlen if not dataitem.__class__ == ().__class__ and \ not dataitem.__class__ == [].__class__: return current, packlen elif len(dataitem)>0: # print "Get_dim: %d: %s" % (current,`dataitem`) return get_dim(dataitem[0],current+1,len(dataitem)) else: return current+1,0 def apply_line_folding(instring,minwraplength=60,maxwraplength=80): """Insert line folding characters into instring between min/max wraplength""" # first check that we need to do this lines = instring.split('\n') line_len = [len(l) for l in lines] if max(line_len) < maxwraplength and re.match("\\[ \v\t\f]*\n",instring) is None: return instring outstring = "\\\n" #header for l in lines: if len(l) < maxwraplength: outstring = outstring + l if len(l) > 0 and l[-1]=='\\': #who'da thunk it? A line ending with a backslash outstring = outstring + "\\\n" # outstring = outstring + "\n" # put back the split character else: current_bit = l while len(current_bit) > maxwraplength: space_pos = re.search('[ \v\f\t]+',current_bit[minwraplength:]) if space_pos is not None and space_pos.start()[^;\\\n][^\n\\\\]+)(?P\\\\{1,2}[ \t\v\f]*\n)",instring) if prefix_match is not None: prefix_text = prefix_match.group('prefix') print('Found prefix %s' % prefix_text) prefix_end = prefix_match.end('folding') # keep any line folding instructions if prefix_match.group('folding')[:2]=='\\\\': #two backslashes outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") return "\\" + outstring #keep line folding first line else: outstring = instring[prefix_match.end('folding')-1:].replace("\n"+prefix_text,"\n") return outstring[1:] #drop first line ending, no longer necessary else: return instring def listify(item): if isinstance(item,unicode): return [item] else: return item #Transpose the list of lists passed to us def transpose(base_list): new_lofl = [] full_length = len(base_list) opt_range = range(full_length) for i in range(len(base_list[0])): new_packet = [] for j in opt_range: new_packet.append(base_list[j][i]) new_lofl.append(new_packet) return new_lofl # This routine optimised to return as quickly as possible # as it is called a lot. def not_none(itemlist): """Return true only if no values of None are present""" if itemlist is None: return False if not isinstance(itemlist,(tuple,list)): return True for x in itemlist: if not not_none(x): return False return True def check_stringiness(data): """Check that the contents of data are all strings""" if not hasattr(data,'dtype'): #so not Numpy from numbers import Number if isinstance(data,Number): return False elif isinstance(data,(unicode,str)): return True elif data is None:return False #should be data are None :) else: for one_item in data: if not check_stringiness(one_item): return False return True #all must be strings else: #numerical python import numpy if data.ndim == 0: #a bare value if data.dtype.kind in ['S','U']: return True else: return False else: for one_item in numpy.nditer(data): print('numpy data: ' + repr( one_item )) if not check_stringiness(one_item): return False return True def process_template(template_file): """Process a template datafile to formatting instructions""" template_as_cif = StarFile(template_file,grammar="2.0").first_block() if isinstance(template_file,(unicode,str)): template_string = open(template_file).read() else: #a StringIO object template_file.seek(0) #reset template_string = template_file.read() #template_as_lines = template_string.split("\n") #template_as_lines = [l for l in template_as_lines if len(l)>0 and l[0]!='#'] #template_as_lines = [l for l in template_as_lines if l.split()[0] != 'loop_'] #template_full_lines = dict([(l.split()[0],l) for l in template_as_lines if len(l.split())>0]) form_hints = [] #ordered array of hint dictionaries find_indent = "^ +" for item in template_as_cif.item_order: #order of input if not isinstance(item,int): #not nested hint_dict = {"dataname":item} # find the line in the file start_pos = re.search("(^[ \t]*(?P" + item + ")[ \t\n]+)(?P([\S]+)|(^;))",template_string,re.I|re.M) if start_pos.group("spec") != None: spec_pos = start_pos.start("spec")-start_pos.start(0) spec_char = template_string[start_pos.start("spec"):start_pos.start("spec")+3] if spec_char[0] in '\'";': hint_dict.update({"delimiter":spec_char[0]}) if spec_char == '"""' or spec_char == "'''": hint_dict.update({"delimiter":spec_char}) if spec_char[0] != ";": #so we need to work out the column number hint_dict.update({"column":spec_pos}) else: #need to put in the carriage return hint_dict.update({"delimiter":"\n;"}) # can we format the text? text_val = template_as_cif[item] hint_dict["reformat"] = "\n\t" in text_val or "\n " in text_val if hint_dict["reformat"]: #find the indentation p = re.search(find_indent,text_val,re.M) if p.group() is not None: hint_dict["reformat_indent"]=p.end() - p.start() if start_pos.group('name') != None: name_pos = start_pos.start('name') - start_pos.start(0) hint_dict.update({"name_pos":name_pos}) #print '%s: %s' % (item,`hint_dict`) form_hints.append(hint_dict) else: #loop block testnames = template_as_cif.loops[item] total_items = len(template_as_cif.loops[item]) testname = testnames[0] #find the loop spec line in the file loop_regex = "(^[ \t]*(?Ploop_)[ \t\n\r]+(?P" + testname + ")([ \t\n\r]+_[\S]+){%d}[ \t]*$(?P(.(?!_loop|_[\S]+))*))" % (total_items - 1) loop_line = re.search(loop_regex,template_string,re.I|re.M|re.S) loop_so_far = loop_line.end() packet_text = loop_line.group('packet') loop_indent = loop_line.start('loop') - loop_line.start(0) form_hints.append({"dataname":'loop','name_pos':loop_indent}) packet_regex = "[ \t]*(?P(?P'''([^\n\r\f']*)''')|(?P'([^\n\r\f']*)'+)|(?P\"([^\n\r\"]*)\"+)|(?P[^\s]+))" packet_pos = re.finditer(packet_regex,packet_text) line_end_pos = re.finditer("^",packet_text,re.M) next_end = next(line_end_pos).end() last_end = next_end for loopname in testnames: #find the name in the file for name pos name_regex = "(^[ \t]*(?P" + loopname + "))" name_match = re.search(name_regex,template_string,re.I|re.M|re.S) loop_name_indent = name_match.start('name')-name_match.start(0) hint_dict = {"dataname":loopname,"name_pos":loop_name_indent} #find the value thismatch = next(packet_pos) while thismatch.start('all') > next_end: try: last_end = next_end next_end = next(line_end_pos).start() print('next end %d' % next_end) except StopIteration: break print('Start %d, last_end %d' % (thismatch.start('all'),last_end)) col_pos = thismatch.start('all') - last_end + 1 if thismatch.group('none') is None: if thismatch.group('sqqq') is not None: hint_dict.update({'delimiter':"'''"}) else: hint_dict.update({'delimiter':thismatch.groups()[0][0]}) hint_dict.update({'column':col_pos}) print('%s: %s' % (loopname,repr( hint_dict ))) form_hints.append(hint_dict) return form_hints #No documentation flags pycifrw-4.4/src/TypeContentsParser.html000066400000000000000000000070351345362224200203450ustar00rootroot00000000000000 TypeContentsParser.nw Noweb literate programming file for the DDLm _type.contents type specification using Yapps3.

<TypeContents_syntax>=
# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

<Helper functions>
%%
parser TypeParser:
    <Regular expressions>
    <Grammar specification>
%%

Helper functions.

We have a monitor function which we can call to save the last parsed value (and print, if we are debugging). We also have functions for stripping off delimiters from strings. Finally, we match up our loops after reading them in. Note that we have function stripextras, which is only for semicolon strings, and stripstring, which is for getting rid of the inverted commas.

<Helper functions>= (<-U)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

<Regular expressions>= (<-U)
# first handle whitespace
ignore: "([ \t\n\r])"
# now the tokens
token container: "[A-Za-z]+\("
token identifier: "[A-Za-z]+" 
token c_c_b: "\)"
token o_c_b: "\("
token comma: "\,"
token END: '$'

The final returned value is a possible-nested list with string-valued entries, which can then be interpreted as simple types.

<Grammar specification>= (<-U)
# now the rules

rule input: ( (( 
            base_element         {{p = [base_element]}}
            (
            comma base_element         {{p.append(base_element)}} #
            )*
            END                  {{if len(p)==1: p = p[0]}} 
            )
            ))                   {{return p}}


     rule base_element:  (container  element_list c_c_b  {{return element_list}}
                          |
                          identifier )  {{return identifier}}

     rule element_list:  ( base_element         {{p = [base_element]}}
                         ( comma base_element   {{p.append(base_element)}}
                         ) *
                         )                     {{return p}}

pycifrw-4.4/src/TypeContentsParser.nw000066400000000000000000000041341345362224200200220ustar00rootroot00000000000000@ Noweb literate programming file for the DDLm _type.contents type specification using Yapps3. <>= # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import <> %% parser TypeParser: <> <> %% @ Helper functions. We have a monitor function which we can call to save the last parsed value (and print, if we are debugging). We also have functions for stripping off delimiters from strings. Finally, we match up our loops after reading them in. Note that we have function stripextras, which is only for semicolon strings, and stripstring, which is for getting rid of the inverted commas. <>= # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value @ <>= # first handle whitespace ignore: "([ \t\n\r])" # now the tokens token container: "[A-Za-z]+\(" token identifier: "[A-Za-z]+" token c_c_b: "\)" token o_c_b: "\(" token comma: "\," token END: '$' @ The final returned value is a possible-nested list with string-valued entries, which can then be interpreted as simple types. <>= # now the rules rule input: ( (( base_element {{p = [base_element]}} ( comma base_element {{p.append(base_element)}} # )* END {{if len(p)==1: p = p[0]}} ) )) {{return p}} rule base_element: (container element_list c_c_b {{return element_list}} | identifier ) {{return identifier}} rule element_list: ( base_element {{p = [base_element]}} ( comma base_element {{p.append(base_element)}} ) * ) {{return p}} pycifrw-4.4/src/TypeContentsParser.py000066400000000000000000000055311345362224200200300ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Begin -- grammar generated by Yapps import sys, re from . import yapps3_compiled_rt as yappsrt class TypeParserScanner(yappsrt.Scanner): def __init__(self, *args,**kwargs): patterns = [ ('([ \t\n\r])', '([ \t\n\r])'), ('container', '[A-Za-z]+\\('), ('identifier', '[A-Za-z]+'), ('c_c_b', '\\)'), ('o_c_b', '\\('), ('comma', '\\,'), ('END', '$'), ] yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r])'],*args,**kwargs) class TypeParser(yappsrt.Parser): Context = yappsrt.Context def input(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'input', []) base_element = self.base_element(_context) p = [base_element] while self._peek('END', 'comma') == 'comma': comma = self._scan('comma') base_element = self.base_element(_context) p.append(base_element) if self._peek() not in ['END', 'comma']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'END'])) END = self._scan('END') if len(p)==1: p = p[0] return p def base_element(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'base_element', []) _token = self._peek('container', 'identifier') if _token == 'container': container = self._scan('container') element_list = self.element_list(_context) c_c_b = self._scan('c_c_b') return element_list else: # == 'identifier' identifier = self._scan('identifier') return identifier def element_list(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'element_list', []) base_element = self.base_element(_context) p = [base_element] while self._peek('comma', 'c_c_b') == 'comma': comma = self._scan('comma') base_element = self.base_element(_context) p.append(base_element) if self._peek() not in ['comma', 'c_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['comma', 'c_c_b'])) return p def parse(rule, text): P = TypeParser(TypeParserScanner(text)) return yappsrt.wrap_error_reporter(P, rule) # End -- grammar generated by Yapps pycifrw-4.4/src/YappsStarParser.html000066400000000000000000001063441345362224200176370ustar00rootroot00000000000000 YappsStarParser.nw Noweb literate programming file for Star grammar and parser specification. We are using Amit Patel's excellent context-sensitive Yapps2 parser. ' This was chosen because it enables us to process long semicolon delimited strings without running into Python recursion limits. In the original kjParsing implementation, it was impossible to get the lexer to return a single line of text within the semicolon-delimited string as that re would have matched a single line of text anywhere in the file. The resulting very long match expression only worked for text strings less than about 9000 characters in length. For further information about Yapps2, see http://theory.stanford.edu/ amitp/Yapps/

Several standards are available, of which four are implemented: 1.0, 1.1, CIF2 and STAR2. CIF2 differs from STAR2 in that lists have comma separators and no nested save frames are allowed. Note that 1.0,1.1 and CIF2/STAR2 differ in their treatment of unquoted data values beginning with brackets.

<1.0_syntax>=
<Python2-3 compatibility>
<Helper functions>
%%
parser StarParser:
    <Regular expressions 1.0>
    <Grammar specification 1.1>
%%

<1.1_syntax>=
<Python2-3 compatibility>
<Helper functions>
%%
parser StarParser:
    <Regular expressions 1.1>
    <Grammar specification 1.1>
%%

The following two recipes produce CIF2 and STAR2 syntax.

<CIF2_syntax>=
<Python2-3 compatibility>
<Helper functions>
%%
parser StarParser:
    <Regular expressions CIF2>
    <Grammar specification CIF2>
%%

The STAR2 syntax

<STAR2_syntax>=
<Python2-3 compatibility>
<Helper functions>
%%
parser StarParser:
    <Regular expressions STAR2>
    <Grammar specification STAR2>
%%

Helper functions.

We have a monitor function which we can call to save the last parsed value (and print, if we are debugging). We also have functions for stripping off delimiters from strings. Finally, we match up our loops after reading them in. Note that we have function stripextras, which is only for semicolon strings, and stripstring, which is for getting rid of the inverted commas.

<Helper functions>= (<-U <-U <-U <-U)
# An alternative specification for the Cif Parser, based on Yapps2
# by Amit Patel (http://theory.stanford.edu/~amitp/Yapps)
#
# helper code: we define our match tokens
lastval = ''
def monitor(location,value):
    global lastval
    #print 'At %s: %s' % (location,repr(value))
    lastval = repr(value)
    return value

# Strip extras gets rid of leading and trailing whitespace, and
# semicolons.
def stripextras(value):
     from .StarFile import remove_line_folding, remove_line_prefix
     # we get rid of semicolons and leading/trailing terminators etc.
     import re
     jj = re.compile("[\n\r\f \t\v]*")
     semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;")
     cut = semis.match(value)
     if cut:        #we have a semicolon-delimited string
          nv = value[cut.end():len(value)-2]
          try:
             if nv[-1]=='\r': nv = nv[:-1]
          except IndexError:    #empty data value
             pass
          # apply protocols
          nv = remove_line_prefix(nv)
          nv = remove_line_folding(nv)
          return nv
     else:
          cut = jj.match(value)
          if cut:
               return stripstring(value[cut.end():])
          return value

# helper function to get rid of inverted commas etc.

def stripstring(value):
     if value:
         if value[0]== '\'' and value[-1]=='\'':
           return value[1:-1]
         if value[0]=='"' and value[-1]=='"':
           return value[1:-1]
     return value

# helper function to get rid of triple quotes
def striptriple(value):
    if value:
        if value[:3] == '"""' and value[-3:] == '"""':
            return value[3:-3]
        if value[:3] == "'''" and value[-3:] == "'''":
            return value[3:-3]
    return value

# helper function to populate a StarBlock given a list of names
# and values .
#
# Note that there may be an empty list at the very end of our itemlists,
# so we remove that if necessary.
#

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print('Making loop with %s' % repr(itemlists))
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % repr(loop_seq)
        print(error_string, file=sys.stderr)
        raise ValueError(error_string)

# return an object with the appropriate amount of nesting
def make_empty(nestlevel):
    gd = []
    for i in range(1,nestlevel):
        gd = [gd]
    return gd

# this function updates a dictionary first checking for name collisions,
# which imply that the CIF is invalid.  We need case insensitivity for
# names.

# Unfortunately we cannot check loop item contents against non-loop contents
# in a non-messy way during parsing, as we may not have easy access to previous
# key value pairs in the context of our call (unlike our built-in access to all
# previous loops).
# For this reason, we don't waste time checking looped items against non-looped
# names during parsing of a data block.  This would only match a subset of the
# final items.   We do check against ordinary items, however.
#
# Note the following situations:
# (1) new_dict is empty -> we have just added a loop; do no checking
# (2) new_dict is not empty -> we have some new key-value pairs
#
def cif_update(old_dict,new_dict,loops):
    old_keys = map(lambda a:a.lower(),old_dict.keys())
    if new_dict != {}:    # otherwise we have a new loop
        #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys()))
        for new_key in new_dict.keys():
            if new_key.lower() in old_keys:
                raise CifError("Duplicate dataname or blockname %s in input file" % new_key)
            old_dict[new_key] = new_dict[new_key]
#
# this takes two lines, so we couldn't fit it into a one line execution statement...
def order_update(order_array,new_name):
    order_array.append(new_name)
    return new_name

# and finally...turn a sequence into a python dict (thanks to Stackoverflow)
def pairwise(iterable):
    it = iter(iterable)
    while 1:
        yield next(it), next(it)

We can simplify the BNC specification of Nick Spadaccini. First of all, we do not have to have type I and type II strings, which are distinguished by the presence or absence of a line feed directly preceding them and thus by being allowed a semicolon at the front or not. We take care of this by treating as whitespace all terminators except for those with a following semicolon, so that a carriage-return-semicolon sequence matches the start_sc_line uniquely.

We include reserved words and save frames. The other reserved words have no rules defined, so will flag a syntax error. However, as yapps is a context-sensitive parser, it will by default make any word found starting with our reserved words into a data value if it occurs in the expected position, so we explicity exclude stuff starting with our words in the definition of data_value_1.

The syntax rules below correspond to the current STAR2 paper. Commas are not allowed in non-delimited data values so that they can be used to separate list items.

Note that we do not recognise characters outside the Unicode basic multilingual plane in datanames, data headings and save headings. This is due to a limitation of Python 2 unicode strings and will be removed when PyCIFRW is ported to Python 3.

<Regular expressions STAR2>= (<-U)
<STAR2-CIF2 common regular expressions block 1>
<STAR2-CIF2 common regular expressions block 2>
token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$',_\{\}\[\]][^\s,\{\}\[\]]*)"

<STAR2-CIF2 common regular expressions block 1>= (<-U U->)
# first handle whitespace and comments, keeping whitespace
# before a semicolon
ignore: "([ \t\n\r](?!;))|[ \t]"
ignore: "(#.*[\n\r](?!;))|(#.*)"
# now the tokens
token LBLOCK:  "(L|l)(O|o)(O|o)(P|p)_"        # loop_
token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_"
token STOP: "(S|s)(T|t)(O|o)(P|p)_"
token save_heading: u"(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+"
token save_end: "(S|s)(A|a)(V|v)(E|e)_"
token data_name: u"_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+" #_followed by stuff
token data_heading: u"(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+"
token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+"
token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+"
token end_sc_line: ";"
token c_c_b: "\}"
token o_c_b: "\{"
token c_s_b: "\]"
token o_s_b: "\["
#token dat_val_nocomma_nosq: "([^\s\"#$,'_\(\{\[\]][^\s,\[\]]*)|'(('(?![\s,]))|([^\n\r\f']))*'+|\"((\"(?![\s,]))|([^\n\r\"]))*\"+"
token dat_val_internal_sq: "\[([^\s\[\]]*)\]"
# token dat_val_nocomma_nocurl: "([^\s\"#$,'_\(\{\[\]][^\s,}]*)|'(('(?![\s,]))|([^\n\r\f']))*'+|\"([^\n\r\"])*\"+"
# For tests of new DDLm syntax - no quotes or apostrophes in strings, no commas, braces or square brackets in undelimited data values
# This token for triple-quote delimited strings must come before single-quote delimited strings to avoid the opening quotes being
# interpreted as a single-quote delimited string
token triple_quote_data_value: "(?s)'''.*?'''|\"\"\".*?\"\"\""
token single_quote_data_value: "'([^\n\r\f'])*'+|\"([^\n\r\"])*\"+"

Currently just a single line but we allow a whole block just in case.

<STAR2-CIF2 common regular expressions block 2>= (<-U U->)
token END: '$'

CIF 2.0 uses spaces instead of commas to separate list values so commas are allowed in data values

<Regular expressions CIF2>= (<-U)
<STAR2-CIF2 common regular expressions block 1>
token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_\{\}\[\]][^\s\{\}\[\]]*)"
<STAR2-CIF2 common regular expressions block 2>

CIF 1.1 does not allow unquoted data values to begin with a bracket character, but does not have bracket expressions as such.

<Regular expressions 1.1>= (<-U)
# first handle whitespace and comments, keeping whitespace
# before a semicolon
ignore: "([ \t\n\r](?!;))|[ \t]"
ignore: "(#.*[\n\r](?!;))|(#.*)"
# now the tokens
token LBLOCK:  "(L|l)(O|o)(O|o)(P|p)_"        # loop_
token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_"
token STOP: "(S|s)(T|t)(O|o)(P|p)_"
token save_heading: "(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
token save_end: "(S|s)(A|a)(V|v)(E|e)_"
token data_name: "_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" #_followed by stuff
token data_heading: "(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+"
token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+"
token end_sc_line: ";"
token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_\{\[\]][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+"
token END: '$'

The original CIF specification allowed brackets to begin data values, even if not quoted. That is the only difference.

<Regular expressions 1.0>= (<-U)
# first handle whitespace and comments, keeping whitespace
# before a semicolon
ignore: "([ \t\n\r](?!;))|[ \t]"
ignore: "(#.*[\n\r](?!;))|(#.*)"
# now the tokens
token LBLOCK:  "(L|l)(O|o)(O|o)(P|p)_"        # loop_
token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_"
token STOP: "(S|s)(T|t)(O|o)(P|p)_"
token save_heading: "(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
token save_end: "(S|s)(A|a)(V|v)(E|e)_"
token data_name: "_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" #_followed by stuff
token data_heading: "(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+"
token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+"
token end_sc_line: ";"
token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+"
token END: '$'

The final returned value is a StarFile, with each key a datablock name. The value attached to each key is an entire dictionary for that block. We bypass the standard __setitem__ methods to gain precision in checking for duplicate blocknames and skipping name checks.

Note in the following grammar that we have adjusted for some yapps idiosyncracies: in particular, a nested bracket expression needs to be distinguished from the top-level nested bracket expression otherwise the context-sensitive parser will search for all those items which could follow the top level bracket expression in nested expressions. The current version produces slightly incorrect error messages in that any type of close bracket is supposedly OK, even though only a particular type will be accepted.

We also have to deal with Dimension-type lists, where there may be square brackets as part of the value (e.g. [4[5]]). This requires catching internal square brackets as well. The current grammar specification catches only this case i.e. the first element of the array can be of the form xxx[yyy]. No other elements can have this form, and there can be no trailing characters. This form can be allowed for other elements by trivial expansion of the current description but, until further notice, I do not think it is useful to allow square brackets in list values.

<Grammar specification STAR2>= (<-U)
<CIF2-STAR2 common grammar>
<STAR2-specific grammar>

<Grammar specification CIF2>= (<-U)
<CIF2-STAR2 common grammar>
<CIF2-specific grammar>

CIF2 and STAR2 are almost identical in grammar

<CIF2-STAR2 common grammar>= (<-U <-U)
# now the rules

rule input<<prepared>>: ( ((
            dblock<<prepared>>         {{allblocks = prepared; allblocks.merge_fast(dblock)}}
            (
            dblock<<prepared>>         {{allblocks.merge_fast(dblock)}} #
            )*
            END
            )
            |
            (
            END                 {{allblocks = prepared}}
            )))                   {{allblocks.unlock(); return allblocks}}

     rule dblock<<prepared>>: ( data_heading {{heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]}}# a data heading
                  (
                   dataseq<<stored_block>>   #because merging may have changed the heading  
                  |
                  save_frame<<prepared>>     {{thisbc.merge_fast(save_frame,parent=stored_block)}}
                  )*
                   )                      {{stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))}} # but may be empty

     rule dataseq<<starblock>>:  data<<starblock>>
                       (
                       data<<starblock>>
                       )*

     rule data<<currentblock>>:        top_loop      {{makeloop(currentblock,top_loop)}}
                                        |
                                        datakvpair    {{currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)}} #kv pair

     rule datakvpair: data_name data_value {{return [data_name,data_value]}} # name-value

     rule data_value: (data_value_1          {{thisval = data_value_1}}
                      |
                      delimited_data_value  {{thisval = delimited_data_value}}
                      |
                      sc_lines_of_text      {{thisval = stripextras(sc_lines_of_text)}}
                      |
                      bracket_expression    {{thisval = bracket_expression}}
                      )                     {{return monitor('data_value',thisval)}}

     rule delimited_data_value: (triple_quote_data_value      {{thisval = striptriple(triple_quote_data_value)}}
                                |
                                single_quote_data_value       {{thisval = stripstring(single_quote_data_value)}}
                                )                             {{return thisval}}

     rule sc_lines_of_text: start_sc_line   {{lines = StringIO();lines.write(start_sc_line)}}
                            (
                            sc_line_of_text {{lines.write(sc_line_of_text)}}
                            )*
                            end_sc_line     {{lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())}}

     rule bracket_expression:  square_bracket_expr   {{return square_bracket_expr}}
                            |
                              curly_bracket_expr    {{return curly_bracket_expr}}


# due to the inability of the parser to backtrack, we contruct our loops in helper functions,
# and simply collect data during parsing proper.

     rule top_loop: LBLOCK loopfield loopvalues {{return loopfield,loopvalues}}

# OK: a loopfield is either a sequence of datanames

     rule loopfield: (            {{loop_seq=[] }}
                     (
                                  ( data_name  )  {{loop_seq.append(data_name)}}
                      )*
                      )                        {{return loop_seq}} # sequence of data names


     rule loopvalues: (
                       (data_value   ) {{dataloop=[data_value]}}
                       (
                       (data_value  ) {{dataloop.append(monitor('loopval',data_value))}}
                       )*
                       )              {{return dataloop}}

     rule save_frame<<prepared>>: save_heading   {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} 
                      (
                      dataseq<<savebc[savehead]>>
                      |
                      save_frame<<prepared>>     {{savebc.merge_fast(save_frame,parent=stored_block)}}
                      )*
                      save_end           {{return monitor('save_frame',savebc)}}

STAR2 specifies nested save frames and comma-separated list and table elements, whereas CIF2 has space-separated elements.

<STAR2-specific grammar>= (<-U)
     rule save_frame<<prepared>>: save_heading   {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} 
                      (
                      dataseq<<savebc[savehead]>>
                      |
                      save_frame<<prepared>>     {{savebc.merge_fast(save_frame,parent=stored_block)}}
                      )*
                      save_end           {{return monitor('save_frame',savebc)}}

     rule square_bracket_expr: o_s_b            {{this_list = []}}
                            (  data_value       {{this_list.append(data_value)}}
                              ( ","
                                data_value       {{this_list.append(data_value)}}
                              ) *
                            ) *
                               c_s_b                     {{return StarList(this_list)}}

     rule curly_bracket_expr: ( o_c_b                     {{table_as_list = []}}
                             ( delimited_data_value       {{table_as_list = [delimited_data_value]}}
                              ":"
                              data_value                 {{table_as_list.append(data_value)}}
                            ( ","
                              delimited_data_value       {{table_as_list.append(delimited_data_value)}}
                              ":"
                              data_value                 {{table_as_list.append(data_value)}}
                            ) *
                               ) *
                              c_c_b )                    {{return StarDict(pairwise(table_as_list))}}



<CIF2-specific grammar>= (<-U)
     rule save_frame<<prepared>>: save_heading   {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} 
                      (
                      dataseq<<savebc[savehead]>>
                      )*
                      save_end           {{return monitor('save_frame',savebc)}}


      rule square_bracket_expr: o_s_b            {{this_list = []}}
                            (  data_value       {{this_list.append(data_value)}}
                              (
                                data_value       {{this_list.append(data_value)}}
                              ) *
                            ) *
                               c_s_b                     {{return StarList(this_list)}}

     rule curly_bracket_expr: ( o_c_b                    {{table_as_list = []}}
                            (  delimited_data_value      {{table_as_list = [delimited_data_value]}}
                              ":"
                              data_value                 {{table_as_list.append(data_value)}}
                            (
                              delimited_data_value       {{table_as_list.append(delimited_data_value)}}
                              ":"
                              data_value                 {{table_as_list.append(data_value)}}
                            ) *
                            ) *
                              c_c_b )                    {{return StarDict(pairwise(table_as_list))}}



The CIF 1.1 grammar specification does not include bracket expressions, but does exclude brackets from beginning unquoted data values. We pass through the argument prepared so we can deal with non-standard dictionary files that contain duplicate datablocks.

<Grammar specification 1.1>= (<-U <-U)
# now the rules

rule input<<prepared>>: ( ((
            dblock<<prepared>>         {{allblocks = prepared;allblocks.merge_fast(dblock)}}
            (
            dblock<<prepared>>         {{allblocks.merge_fast(dblock)}} #
            )*
            END
            )
            |
            (
            END                 {{allblocks = prepared}}
            )))                   {{allblocks.unlock();return allblocks}}

    rule dblock<<prepared>>: ( data_heading {{heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]}}# a data heading
                  (
                   dataseq<<thisbc[heading]>>
                  |
                  save_frame<<prepared>>     {{thisbc.merge_fast(save_frame,parent=act_block)}}
                  )*
# A trick to force rechecking of all datanames, which was skipped by the precheck = True option below
                   )                      {{thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))}} # but may be empty

     rule dataseq<<starblock>>:  data<<starblock>>
                       (
                       data<<starblock>>
                       )*

     rule data<<currentblock>>:        top_loop      {{makeloop(currentblock,top_loop)}}
                                        |
                                        datakvpair    {{currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)}} #kv pair

     rule datakvpair: data_name data_value {{return [data_name,data_value]}} # name-value

     rule data_value: (data_value_1          {{thisval = stripstring(data_value_1)}}
                      |
                      sc_lines_of_text      {{thisval = stripextras(sc_lines_of_text)}}
                      )                     {{return monitor('data_value',thisval)}}

     rule sc_lines_of_text: start_sc_line   {{lines = StringIO();lines.write(start_sc_line)}}
                            (
                            sc_line_of_text {{lines.write(sc_line_of_text)}}
                            )*
                            end_sc_line     {{lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())}}

# due to the inability of the parser to backtrack, we contruct our loops in helper functions,
# and simply collect data during parsing proper.

     rule top_loop: LBLOCK loopfield loopvalues {{return loopfield,loopvalues}}

# OK: a loopfield is either a sequence of dataname*,loopfield with stop
# or else dataname,loopfield without stop

     rule loopfield: (            {{toploop=[]}}
                     (
                                  ( data_name  )  {{toploop.append(data_name)}}
                      )*
                      )                        {{return toploop}} # sequence of data names


     rule loopvalues: (
                       (data_value   ) {{dataloop=[data_value]}}
                       (
                       (data_value  ) {{dataloop.append(monitor('loopval',data_value))}}
                       )*
                       )              {{return dataloop}}

     rule save_frame<<prepared>>: save_heading   {{savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname] }} 
                      (
                      dataseq<<savebc[savehead]>>
                      |
                      save_frame<<prepared>>     {{savebc.merge_fast(save_frame,parent=act_block)}}
                      )*
                      save_end           {{return monitor('save_frame',savebc)}}


Python 2/3 compatibility. We try to keep the code as portable across the 2-3 divide as we can.

<Python2-3 compatibility>= (<-U <-U <-U <-U)
# To maximize python3/python2 compatibility
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from .StarFile import StarBlock,StarFile,StarList,StarDict
from io import StringIO
pycifrw-4.4/src/YappsStarParser.nw000066400000000000000000000654001345362224200173140ustar00rootroot00000000000000@ Noweb literate programming file for Star grammar and parser specification. We are using Amit Patel's excellent context-sensitive Yapps2 parser. ' This was chosen because it enables us to process long semicolon delimited strings without running into Python recursion limits. In the original kjParsing implementation, it was impossible to get the lexer to return a single line of text within the semicolon-delimited string as that re would have matched a single line of text anywhere in the file. The resulting very long match expression only worked for text strings less than about 9000 characters in length. For further information about Yapps2, see http://theory.stanford.edu/~amitp/Yapps/ Several standards are available, of which four are implemented: 1.0, 1.1, CIF2 and STAR2. CIF2 differs from STAR2 in that lists have comma separators and no nested save frames are allowed. Note that 1.0,1.1 and CIF2/STAR2 differ in their treatment of unquoted data values beginning with brackets. <<1.0_syntax>>= <> <> %% parser StarParser: <> <> %% <<1.1_syntax>>= <> <> %% parser StarParser: <> <> %% @ The following two recipes produce CIF2 and STAR2 syntax. <>= <> <> %% parser StarParser: <> <> %% @ The STAR2 syntax <>= <> <> %% parser StarParser: <> <> %% @ Helper functions. We have a monitor function which we can call to save the last parsed value (and print, if we are debugging). We also have functions for stripping off delimiters from strings. Finally, we match up our loops after reading them in. Note that we have function stripextras, which is only for semicolon strings, and stripstring, which is for getting rid of the inverted commas. <>= # An alternative specification for the Cif Parser, based on Yapps2 # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Strip extras gets rid of leading and trailing whitespace, and # semicolons. def stripextras(value): from .StarFile import remove_line_folding, remove_line_prefix # we get rid of semicolons and leading/trailing terminators etc. import re jj = re.compile("[\n\r\f \t\v]*") semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") cut = semis.match(value) if cut: #we have a semicolon-delimited string nv = value[cut.end():len(value)-2] try: if nv[-1]=='\r': nv = nv[:-1] except IndexError: #empty data value pass # apply protocols nv = remove_line_prefix(nv) nv = remove_line_folding(nv) return nv else: cut = jj.match(value) if cut: return stripstring(value[cut.end():]) return value # helper function to get rid of inverted commas etc. def stripstring(value): if value: if value[0]== '\'' and value[-1]=='\'': return value[1:-1] if value[0]=='"' and value[-1]=='"': return value[1:-1] return value # helper function to get rid of triple quotes def striptriple(value): if value: if value[:3] == '"""' and value[-3:] == '"""': return value[3:-3] if value[:3] == "'''" and value[-3:] == "'''": return value[3:-3] return value # helper function to populate a StarBlock given a list of names # and values . # # Note that there may be an empty list at the very end of our itemlists, # so we remove that if necessary. # def makeloop(target_block,loopdata): loop_seq,itemlists = loopdata if itemlists[-1] == []: itemlists.pop(-1) # print('Making loop with %s' % repr(itemlists)) step_size = len(loop_seq) for col_no in range(step_size): target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) # now construct the loop try: target_block.CreateLoop(loop_seq) #will raise ValueError on problem except ValueError: error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) print(error_string, file=sys.stderr) raise ValueError(error_string) # return an object with the appropriate amount of nesting def make_empty(nestlevel): gd = [] for i in range(1,nestlevel): gd = [gd] return gd # this function updates a dictionary first checking for name collisions, # which imply that the CIF is invalid. We need case insensitivity for # names. # Unfortunately we cannot check loop item contents against non-loop contents # in a non-messy way during parsing, as we may not have easy access to previous # key value pairs in the context of our call (unlike our built-in access to all # previous loops). # For this reason, we don't waste time checking looped items against non-looped # names during parsing of a data block. This would only match a subset of the # final items. We do check against ordinary items, however. # # Note the following situations: # (1) new_dict is empty -> we have just added a loop; do no checking # (2) new_dict is not empty -> we have some new key-value pairs # def cif_update(old_dict,new_dict,loops): old_keys = map(lambda a:a.lower(),old_dict.keys()) if new_dict != {}: # otherwise we have a new loop #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) for new_key in new_dict.keys(): if new_key.lower() in old_keys: raise CifError("Duplicate dataname or blockname %s in input file" % new_key) old_dict[new_key] = new_dict[new_key] # # this takes two lines, so we couldn't fit it into a one line execution statement... def order_update(order_array,new_name): order_array.append(new_name) return new_name # and finally...turn a sequence into a python dict (thanks to Stackoverflow) def pairwise(iterable): it = iter(iterable) while 1: yield next(it), next(it) @ We can simplify the BNC specification of Nick Spadaccini. First of all, we do not have to have type I and type II strings, which are distinguished by the presence or absence of a line feed directly preceding them and thus by being allowed a semicolon at the front or not. We take care of this by treating as whitespace all terminators except for those with a following semicolon, so that a carriage-return-semicolon sequence matches the start_sc_line uniquely. We include reserved words and save frames. The other reserved words have no rules defined, so will flag a syntax error. However, as yapps is a context-sensitive parser, it will by default make any word found starting with our reserved words into a data value if it occurs in the expected position, so we explicity exclude stuff starting with our words in the definition of data_value_1. The syntax rules below correspond to the current STAR2 paper. Commas are not allowed in non-delimited data values so that they can be used to separate list items. Note that we do not recognise characters outside the Unicode basic multilingual plane in datanames, data headings and save headings. This is due to a limitation of Python 2 unicode strings and will be removed when PyCIFRW is ported to Python 3. <>= <> <> token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$',_\{\}\[\]][^\s,\{\}\[\]]*)" <>= # first handle whitespace and comments, keeping whitespace # before a semicolon ignore: "([ \t\n\r](?!;))|[ \t]" ignore: "(#.*[\n\r](?!;))|(#.*)" # now the tokens token LBLOCK: "(L|l)(O|o)(O|o)(P|p)_" # loop_ token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_" token STOP: "(S|s)(T|t)(O|o)(P|p)_" token save_heading: u"(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+" token save_end: "(S|s)(A|a)(V|v)(E|e)_" token data_name: u"_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+" #_followed by stuff token data_heading: u"(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD\U000F0000-\U000FFFFD\U00100000-\U0010FFFD-]+" token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+" token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+" token end_sc_line: ";" token c_c_b: "\}" token o_c_b: "\{" token c_s_b: "\]" token o_s_b: "\[" #token dat_val_nocomma_nosq: "([^\s\"#$,'_\(\{\[\]][^\s,\[\]]*)|'(('(?![\s,]))|([^\n\r\f']))*'+|\"((\"(?![\s,]))|([^\n\r\"]))*\"+" token dat_val_internal_sq: "\[([^\s\[\]]*)\]" # token dat_val_nocomma_nocurl: "([^\s\"#$,'_\(\{\[\]][^\s,}]*)|'(('(?![\s,]))|([^\n\r\f']))*'+|\"([^\n\r\"])*\"+" # For tests of new DDLm syntax - no quotes or apostrophes in strings, no commas, braces or square brackets in undelimited data values # This token for triple-quote delimited strings must come before single-quote delimited strings to avoid the opening quotes being # interpreted as a single-quote delimited string token triple_quote_data_value: "(?s)'''.*?'''|\"\"\".*?\"\"\"" token single_quote_data_value: "'([^\n\r\f'])*'+|\"([^\n\r\"])*\"+" @ Currently just a single line but we allow a whole block just in case. <>= token END: '$' @ CIF 2.0 uses spaces instead of commas to separate list values so commas are allowed in data values <>= <> token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_\{\}\[\]][^\s\{\}\[\]]*)" <> @ CIF 1.1 does not allow unquoted data values to begin with a bracket character, but does not have bracket expressions as such. <>= # first handle whitespace and comments, keeping whitespace # before a semicolon ignore: "([ \t\n\r](?!;))|[ \t]" ignore: "(#.*[\n\r](?!;))|(#.*)" # now the tokens token LBLOCK: "(L|l)(O|o)(O|o)(P|p)_" # loop_ token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_" token STOP: "(S|s)(T|t)(O|o)(P|p)_" token save_heading: "(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" token save_end: "(S|s)(A|a)(V|v)(E|e)_" token data_name: "_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" #_followed by stuff token data_heading: "(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+" token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+" token end_sc_line: ";" token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_\{\[\]][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+" token END: '$' @ The original CIF specification allowed brackets to begin data values, even if not quoted. That is the only difference. <>= # first handle whitespace and comments, keeping whitespace # before a semicolon ignore: "([ \t\n\r](?!;))|[ \t]" ignore: "(#.*[\n\r](?!;))|(#.*)" # now the tokens token LBLOCK: "(L|l)(O|o)(O|o)(P|p)_" # loop_ token GLOBAL: "(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_" token STOP: "(S|s)(T|t)(O|o)(P|p)_" token save_heading: "(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" token save_end: "(S|s)(A|a)(V|v)(E|e)_" token data_name: "_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" #_followed by stuff token data_heading: "(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" token start_sc_line: "(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+" token sc_line_of_text: "[^;\r\n]([^\r\n])*(\r\n|\r|\n)+" token end_sc_line: ";" token data_value_1: "((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+" token END: '$' @ The final returned value is a StarFile, with each key a datablock name. The value attached to each key is an entire dictionary for that block. We bypass the standard __setitem__ methods to gain precision in checking for duplicate blocknames and skipping name checks. Note in the following grammar that we have adjusted for some yapps idiosyncracies: in particular, a nested bracket expression needs to be distinguished from the top-level nested bracket expression otherwise the context-sensitive parser will search for all those items which could follow the top level bracket expression in nested expressions. The current version produces slightly incorrect error messages in that any type of close bracket is supposedly OK, even though only a particular type will be accepted. We also have to deal with Dimension-type lists, where there may be square brackets as part of the value (e.g. [4[5]]). This requires catching internal square brackets as well. The current grammar specification catches only this case i.e. the first element of the array can be of the form xxx[yyy]. No other elements can have this form, and there can be no trailing characters. This form can be allowed for other elements by trivial expansion of the current description but, until further notice, I do not think it is useful to allow square brackets in list values. <>= <> <> <>= <> <> @ CIF2 and STAR2 are almost identical in grammar <>= # now the rules rule input@<>: ( (( dblock@<> {{allblocks = prepared; allblocks.merge_fast(dblock)}} ( dblock@<> {{allblocks.merge_fast(dblock)}} # )* END ) | ( END {{allblocks = prepared}} ))) {{allblocks.unlock(); return allblocks}} rule dblock@<>: ( data_heading {{heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading]}}# a data heading ( dataseq@<> #because merging may have changed the heading | save_frame@<> {{thisbc.merge_fast(save_frame,parent=stored_block)}} )* ) {{stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc))}} # but may be empty rule dataseq@<>: data@<> ( data@<> )* rule data@<>: top_loop {{makeloop(currentblock,top_loop)}} | datakvpair {{currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False)}} #kv pair rule datakvpair: data_name data_value {{return [data_name,data_value]}} # name-value rule data_value: (data_value_1 {{thisval = data_value_1}} | delimited_data_value {{thisval = delimited_data_value}} | sc_lines_of_text {{thisval = stripextras(sc_lines_of_text)}} | bracket_expression {{thisval = bracket_expression}} ) {{return monitor('data_value',thisval)}} rule delimited_data_value: (triple_quote_data_value {{thisval = striptriple(triple_quote_data_value)}} | single_quote_data_value {{thisval = stripstring(single_quote_data_value)}} ) {{return thisval}} rule sc_lines_of_text: start_sc_line {{lines = StringIO();lines.write(start_sc_line)}} ( sc_line_of_text {{lines.write(sc_line_of_text)}} )* end_sc_line {{lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())}} rule bracket_expression: square_bracket_expr {{return square_bracket_expr}} | curly_bracket_expr {{return curly_bracket_expr}} # due to the inability of the parser to backtrack, we contruct our loops in helper functions, # and simply collect data during parsing proper. rule top_loop: LBLOCK loopfield loopvalues {{return loopfield,loopvalues}} # OK: a loopfield is either a sequence of datanames rule loopfield: ( {{loop_seq=[] }} ( ( data_name ) {{loop_seq.append(data_name)}} )* ) {{return loop_seq}} # sequence of data names rule loopvalues: ( (data_value ) {{dataloop=[data_value]}} ( (data_value ) {{dataloop.append(monitor('loopval',data_value))}} )* ) {{return dataloop}} rule save_frame@<>: save_heading {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} ( dataseq@<> | save_frame@<> {{savebc.merge_fast(save_frame,parent=stored_block)}} )* save_end {{return monitor('save_frame',savebc)}} @ STAR2 specifies nested save frames and comma-separated list and table elements, whereas CIF2 has space-separated elements. <>= rule save_frame@<>: save_heading {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} ( dataseq@<> | save_frame@<> {{savebc.merge_fast(save_frame,parent=stored_block)}} )* save_end {{return monitor('save_frame',savebc)}} rule square_bracket_expr: o_s_b {{this_list = []}} ( data_value {{this_list.append(data_value)}} ( "," data_value {{this_list.append(data_value)}} ) * ) * c_s_b {{return StarList(this_list)}} rule curly_bracket_expr: ( o_c_b {{table_as_list = []}} ( delimited_data_value {{table_as_list = [delimited_data_value]}} ":" data_value {{table_as_list.append(data_value)}} ( "," delimited_data_value {{table_as_list.append(delimited_data_value)}} ":" data_value {{table_as_list.append(data_value)}} ) * ) * c_c_b ) {{return StarDict(pairwise(table_as_list))}} <>= rule save_frame@<>: save_heading {{savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] }} ( dataseq@<> )* save_end {{return monitor('save_frame',savebc)}} rule square_bracket_expr: o_s_b {{this_list = []}} ( data_value {{this_list.append(data_value)}} ( data_value {{this_list.append(data_value)}} ) * ) * c_s_b {{return StarList(this_list)}} rule curly_bracket_expr: ( o_c_b {{table_as_list = []}} ( delimited_data_value {{table_as_list = [delimited_data_value]}} ":" data_value {{table_as_list.append(data_value)}} ( delimited_data_value {{table_as_list.append(delimited_data_value)}} ":" data_value {{table_as_list.append(data_value)}} ) * ) * c_c_b ) {{return StarDict(pairwise(table_as_list))}} @ The CIF 1.1 grammar specification does not include bracket expressions, but does exclude brackets from beginning unquoted data values. We pass through the argument [[prepared]] so we can deal with non-standard dictionary files that contain duplicate datablocks. <>= # now the rules rule input@<>: ( (( dblock@<> {{allblocks = prepared;allblocks.merge_fast(dblock)}} ( dblock@<> {{allblocks.merge_fast(dblock)}} # )* END ) | ( END {{allblocks = prepared}} ))) {{allblocks.unlock();return allblocks}} rule dblock@<>: ( data_heading {{heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname]}}# a data heading ( dataseq@<> | save_frame@<> {{thisbc.merge_fast(save_frame,parent=act_block)}} )* # A trick to force rechecking of all datanames, which was skipped by the precheck = True option below ) {{thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc))}} # but may be empty rule dataseq@<>: data@<> ( data@<> )* rule data@<>: top_loop {{makeloop(currentblock,top_loop)}} | datakvpair {{currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True)}} #kv pair rule datakvpair: data_name data_value {{return [data_name,data_value]}} # name-value rule data_value: (data_value_1 {{thisval = stripstring(data_value_1)}} | sc_lines_of_text {{thisval = stripextras(sc_lines_of_text)}} ) {{return monitor('data_value',thisval)}} rule sc_lines_of_text: start_sc_line {{lines = StringIO();lines.write(start_sc_line)}} ( sc_line_of_text {{lines.write(sc_line_of_text)}} )* end_sc_line {{lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue())}} # due to the inability of the parser to backtrack, we contruct our loops in helper functions, # and simply collect data during parsing proper. rule top_loop: LBLOCK loopfield loopvalues {{return loopfield,loopvalues}} # OK: a loopfield is either a sequence of dataname*,loopfield with stop # or else dataname,loopfield without stop rule loopfield: ( {{toploop=[]}} ( ( data_name ) {{toploop.append(data_name)}} )* ) {{return toploop}} # sequence of data names rule loopvalues: ( (data_value ) {{dataloop=[data_value]}} ( (data_value ) {{dataloop.append(monitor('loopval',data_value))}} )* ) {{return dataloop}} rule save_frame@<>: save_heading {{savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname] }} ( dataseq@<> | save_frame@<> {{savebc.merge_fast(save_frame,parent=act_block)}} )* save_end {{return monitor('save_frame',savebc)}} @ Python 2/3 compatibility. We try to keep the code as portable across the 2-3 divide as we can. <>= # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from .StarFile import StarBlock,StarFile,StarList,StarDict from io import StringIO pycifrw-4.4/src/YappsStarParser_1_0.py000066400000000000000000000335411345362224200177600ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from .StarFile import StarBlock,StarFile,StarList,StarDict from io import StringIO # An alternative specification for the Cif Parser, based on Yapps2 # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Strip extras gets rid of leading and trailing whitespace, and # semicolons. def stripextras(value): from .StarFile import remove_line_folding, remove_line_prefix # we get rid of semicolons and leading/trailing terminators etc. import re jj = re.compile("[\n\r\f \t\v]*") semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") cut = semis.match(value) if cut: #we have a semicolon-delimited string nv = value[cut.end():len(value)-2] try: if nv[-1]=='\r': nv = nv[:-1] except IndexError: #empty data value pass # apply protocols nv = remove_line_prefix(nv) nv = remove_line_folding(nv) return nv else: cut = jj.match(value) if cut: return stripstring(value[cut.end():]) return value # helper function to get rid of inverted commas etc. def stripstring(value): if value: if value[0]== '\'' and value[-1]=='\'': return value[1:-1] if value[0]=='"' and value[-1]=='"': return value[1:-1] return value # helper function to get rid of triple quotes def striptriple(value): if value: if value[:3] == '"""' and value[-3:] == '"""': return value[3:-3] if value[:3] == "'''" and value[-3:] == "'''": return value[3:-3] return value # helper function to populate a StarBlock given a list of names # and values . # # Note that there may be an empty list at the very end of our itemlists, # so we remove that if necessary. # def makeloop(target_block,loopdata): loop_seq,itemlists = loopdata if itemlists[-1] == []: itemlists.pop(-1) # print('Making loop with %s' % repr(itemlists)) step_size = len(loop_seq) for col_no in range(step_size): target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) # now construct the loop try: target_block.CreateLoop(loop_seq) #will raise ValueError on problem except ValueError: error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) print(error_string, file=sys.stderr) raise ValueError(error_string) # return an object with the appropriate amount of nesting def make_empty(nestlevel): gd = [] for i in range(1,nestlevel): gd = [gd] return gd # this function updates a dictionary first checking for name collisions, # which imply that the CIF is invalid. We need case insensitivity for # names. # Unfortunately we cannot check loop item contents against non-loop contents # in a non-messy way during parsing, as we may not have easy access to previous # key value pairs in the context of our call (unlike our built-in access to all # previous loops). # For this reason, we don't waste time checking looped items against non-looped # names during parsing of a data block. This would only match a subset of the # final items. We do check against ordinary items, however. # # Note the following situations: # (1) new_dict is empty -> we have just added a loop; do no checking # (2) new_dict is not empty -> we have some new key-value pairs # def cif_update(old_dict,new_dict,loops): old_keys = map(lambda a:a.lower(),old_dict.keys()) if new_dict != {}: # otherwise we have a new loop #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) for new_key in new_dict.keys(): if new_key.lower() in old_keys: raise CifError("Duplicate dataname or blockname %s in input file" % new_key) old_dict[new_key] = new_dict[new_key] # # this takes two lines, so we couldn't fit it into a one line execution statement... def order_update(order_array,new_name): order_array.append(new_name) return new_name # and finally...turn a sequence into a python dict (thanks to Stackoverflow) def pairwise(iterable): it = iter(iterable) while 1: yield next(it), next(it) # Begin -- grammar generated by Yapps import sys, re from . import yapps3_compiled_rt as yappsrt class StarParserScanner(yappsrt.Scanner): def __init__(self, *args,**kwargs): patterns = [ ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'), ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'), ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'), ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'), ('STOP', '(S|s)(T|t)(O|o)(P|p)_'), ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('save_end', '(S|s)(A|a)(V|v)(E|e)_'), ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'), ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'), ('end_sc_line', ';'), ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'), ('END', '$'), ] yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs) class StarParser(yappsrt.Parser): Context = yappsrt.Context def input(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared]) _token = self._peek('END', 'data_heading') if _token == 'data_heading': dblock = self.dblock(prepared, _context) allblocks = prepared;allblocks.merge_fast(dblock) while self._peek('END', 'data_heading') == 'data_heading': dblock = self.dblock(prepared, _context) allblocks.merge_fast(dblock) if self._peek() not in ['END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) END = self._scan('END') else: # == 'END' END = self._scan('END') allblocks = prepared allblocks.unlock();return allblocks def dblock(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared]) data_heading = self._scan('data_heading') heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname] while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(thisbc[heading], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) thisbc.merge_fast(save_frame,parent=act_block) if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading'])) thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc)) def dataseq(self, starblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) data = self.data(starblock, _context) while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']: data = self.data(starblock, _context) if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) def data(self, currentblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) _token = self._peek('LBLOCK', 'data_name') if _token == 'LBLOCK': top_loop = self.top_loop(_context) makeloop(currentblock,top_loop) else: # == 'data_name' datakvpair = self.datakvpair(_context) currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True) def datakvpair(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) data_name = self._scan('data_name') data_value = self.data_value(_context) return [data_name,data_value] def data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) _token = self._peek('data_value_1', 'start_sc_line') if _token == 'data_value_1': data_value_1 = self._scan('data_value_1') thisval = stripstring(data_value_1) else: # == 'start_sc_line' sc_lines_of_text = self.sc_lines_of_text(_context) thisval = stripextras(sc_lines_of_text) return monitor('data_value',thisval) def sc_lines_of_text(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) start_sc_line = self._scan('start_sc_line') lines = StringIO();lines.write(start_sc_line) while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': sc_line_of_text = self._scan('sc_line_of_text') lines.write(sc_line_of_text) if self._peek() not in ['end_sc_line', 'sc_line_of_text']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) end_sc_line = self._scan('end_sc_line') lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue()) def top_loop(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) LBLOCK = self._scan('LBLOCK') loopfield = self.loopfield(_context) loopvalues = self.loopvalues(_context) return loopfield,loopvalues def loopfield(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) toploop=[] while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name': data_name = self._scan('data_name') toploop.append(data_name) if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line'])) return toploop def loopvalues(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) data_value = self.data_value(_context) dataloop=[data_value] while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']: data_value = self.data_value(_context) dataloop.append(monitor('loopval',data_value)) if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) return dataloop def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname] while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(savebc[savehead], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) savebc.merge_fast(save_frame,parent=act_block) if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def parse(rule, text): P = StarParser(StarParserScanner(text)) return yappsrt.wrap_error_reporter(P, rule) # End -- grammar generated by Yapps pycifrw-4.4/src/YappsStarParser_1_1.py000066400000000000000000000335511345362224200177620ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from .StarFile import StarBlock,StarFile,StarList,StarDict from io import StringIO # An alternative specification for the Cif Parser, based on Yapps2 # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Strip extras gets rid of leading and trailing whitespace, and # semicolons. def stripextras(value): from .StarFile import remove_line_folding, remove_line_prefix # we get rid of semicolons and leading/trailing terminators etc. import re jj = re.compile("[\n\r\f \t\v]*") semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") cut = semis.match(value) if cut: #we have a semicolon-delimited string nv = value[cut.end():len(value)-2] try: if nv[-1]=='\r': nv = nv[:-1] except IndexError: #empty data value pass # apply protocols nv = remove_line_prefix(nv) nv = remove_line_folding(nv) return nv else: cut = jj.match(value) if cut: return stripstring(value[cut.end():]) return value # helper function to get rid of inverted commas etc. def stripstring(value): if value: if value[0]== '\'' and value[-1]=='\'': return value[1:-1] if value[0]=='"' and value[-1]=='"': return value[1:-1] return value # helper function to get rid of triple quotes def striptriple(value): if value: if value[:3] == '"""' and value[-3:] == '"""': return value[3:-3] if value[:3] == "'''" and value[-3:] == "'''": return value[3:-3] return value # helper function to populate a StarBlock given a list of names # and values . # # Note that there may be an empty list at the very end of our itemlists, # so we remove that if necessary. # def makeloop(target_block,loopdata): loop_seq,itemlists = loopdata if itemlists[-1] == []: itemlists.pop(-1) # print('Making loop with %s' % repr(itemlists)) step_size = len(loop_seq) for col_no in range(step_size): target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) # now construct the loop try: target_block.CreateLoop(loop_seq) #will raise ValueError on problem except ValueError: error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) print(error_string, file=sys.stderr) raise ValueError(error_string) # return an object with the appropriate amount of nesting def make_empty(nestlevel): gd = [] for i in range(1,nestlevel): gd = [gd] return gd # this function updates a dictionary first checking for name collisions, # which imply that the CIF is invalid. We need case insensitivity for # names. # Unfortunately we cannot check loop item contents against non-loop contents # in a non-messy way during parsing, as we may not have easy access to previous # key value pairs in the context of our call (unlike our built-in access to all # previous loops). # For this reason, we don't waste time checking looped items against non-looped # names during parsing of a data block. This would only match a subset of the # final items. We do check against ordinary items, however. # # Note the following situations: # (1) new_dict is empty -> we have just added a loop; do no checking # (2) new_dict is not empty -> we have some new key-value pairs # def cif_update(old_dict,new_dict,loops): old_keys = map(lambda a:a.lower(),old_dict.keys()) if new_dict != {}: # otherwise we have a new loop #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) for new_key in new_dict.keys(): if new_key.lower() in old_keys: raise CifError("Duplicate dataname or blockname %s in input file" % new_key) old_dict[new_key] = new_dict[new_key] # # this takes two lines, so we couldn't fit it into a one line execution statement... def order_update(order_array,new_name): order_array.append(new_name) return new_name # and finally...turn a sequence into a python dict (thanks to Stackoverflow) def pairwise(iterable): it = iter(iterable) while 1: yield next(it), next(it) # Begin -- grammar generated by Yapps import sys, re from . import yapps3_compiled_rt as yappsrt class StarParserScanner(yappsrt.Scanner): def __init__(self, *args,**kwargs): patterns = [ ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'), ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'), ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'), ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'), ('STOP', '(S|s)(T|t)(O|o)(P|p)_'), ('save_heading', '(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('save_end', '(S|s)(A|a)(V|v)(E|e)_'), ('data_name', '_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('data_heading', '(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_-]+'), ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'), ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'), ('end_sc_line', ';'), ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\{\\[\\]][^\\s]*)|\'((\'(?=\\S))|([^\n\r\x0c\']))*\'+|"(("(?=\\S))|([^\n\r"]))*"+'), ('END', '$'), ] yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs) class StarParser(yappsrt.Parser): Context = yappsrt.Context def input(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared]) _token = self._peek('END', 'data_heading') if _token == 'data_heading': dblock = self.dblock(prepared, _context) allblocks = prepared;allblocks.merge_fast(dblock) while self._peek('END', 'data_heading') == 'data_heading': dblock = self.dblock(prepared, _context) allblocks.merge_fast(dblock) if self._peek() not in ['END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) END = self._scan('END') else: # == 'END' END = self._scan('END') allblocks = prepared allblocks.unlock();return allblocks def dblock(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared]) data_heading = self._scan('data_heading') heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);newname = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));act_block=thisbc[newname] while self._peek('save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(thisbc[heading], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) thisbc.merge_fast(save_frame,parent=act_block) if self._peek() not in ['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'LBLOCK', 'data_name', 'save_end', 'END', 'data_heading'])) thisbc[heading].setmaxnamelength(thisbc[heading].maxnamelength);return (monitor('dblock',thisbc)) def dataseq(self, starblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) data = self.data(starblock, _context) while self._peek('LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['LBLOCK', 'data_name']: data = self.data(starblock, _context) if self._peek() not in ['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) def data(self, currentblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) _token = self._peek('LBLOCK', 'data_name') if _token == 'LBLOCK': top_loop = self.top_loop(_context) makeloop(currentblock,top_loop) else: # == 'data_name' datakvpair = self.datakvpair(_context) currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=True) def datakvpair(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) data_name = self._scan('data_name') data_value = self.data_value(_context) return [data_name,data_value] def data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) _token = self._peek('data_value_1', 'start_sc_line') if _token == 'data_value_1': data_value_1 = self._scan('data_value_1') thisval = stripstring(data_value_1) else: # == 'start_sc_line' sc_lines_of_text = self.sc_lines_of_text(_context) thisval = stripextras(sc_lines_of_text) return monitor('data_value',thisval) def sc_lines_of_text(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) start_sc_line = self._scan('start_sc_line') lines = StringIO();lines.write(start_sc_line) while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': sc_line_of_text = self._scan('sc_line_of_text') lines.write(sc_line_of_text) if self._peek() not in ['end_sc_line', 'sc_line_of_text']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) end_sc_line = self._scan('end_sc_line') lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue()) def top_loop(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) LBLOCK = self._scan('LBLOCK') loopfield = self.loopfield(_context) loopvalues = self.loopvalues(_context) return loopfield,loopvalues def loopfield(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) toploop=[] while self._peek('data_name', 'data_value_1', 'start_sc_line') == 'data_name': data_name = self._scan('data_name') toploop.append(data_name) if self._peek() not in ['data_name', 'data_value_1', 'start_sc_line']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'start_sc_line'])) return toploop def loopvalues(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) data_value = self.data_value(_context) dataloop=[data_value] while self._peek('data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading') in ['data_value_1', 'start_sc_line']: data_value = self.data_value(_context) dataloop.append(monitor('loopval',data_value)) if self._peek() not in ['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'start_sc_line', 'LBLOCK', 'data_name', 'save_heading', 'save_end', 'END', 'data_heading'])) return dataloop def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname=savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));act_block=savebc[newname] while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(savebc[savehead], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) savebc.merge_fast(save_frame,parent=act_block) if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def parse(rule, text): P = StarParser(StarParserScanner(text)) return yappsrt.wrap_error_reporter(P, rule) # End -- grammar generated by Yapps pycifrw-4.4/src/YappsStarParser_2_0.py000066400000000000000000000516611345362224200177640ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from .StarFile import StarBlock,StarFile,StarList,StarDict from io import StringIO # An alternative specification for the Cif Parser, based on Yapps2 # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Strip extras gets rid of leading and trailing whitespace, and # semicolons. def stripextras(value): from .StarFile import remove_line_folding, remove_line_prefix # we get rid of semicolons and leading/trailing terminators etc. import re jj = re.compile("[\n\r\f \t\v]*") semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") cut = semis.match(value) if cut: #we have a semicolon-delimited string nv = value[cut.end():len(value)-2] try: if nv[-1]=='\r': nv = nv[:-1] except IndexError: #empty data value pass # apply protocols nv = remove_line_prefix(nv) nv = remove_line_folding(nv) return nv else: cut = jj.match(value) if cut: return stripstring(value[cut.end():]) return value # helper function to get rid of inverted commas etc. def stripstring(value): if value: if value[0]== '\'' and value[-1]=='\'': return value[1:-1] if value[0]=='"' and value[-1]=='"': return value[1:-1] return value # helper function to get rid of triple quotes def striptriple(value): if value: if value[:3] == '"""' and value[-3:] == '"""': return value[3:-3] if value[:3] == "'''" and value[-3:] == "'''": return value[3:-3] return value # helper function to populate a StarBlock given a list of names # and values . # # Note that there may be an empty list at the very end of our itemlists, # so we remove that if necessary. # def makeloop(target_block,loopdata): loop_seq,itemlists = loopdata if itemlists[-1] == []: itemlists.pop(-1) # print('Making loop with %s' % repr(itemlists)) step_size = len(loop_seq) for col_no in range(step_size): target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) # now construct the loop try: target_block.CreateLoop(loop_seq) #will raise ValueError on problem except ValueError: error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) print(error_string, file=sys.stderr) raise ValueError(error_string) # return an object with the appropriate amount of nesting def make_empty(nestlevel): gd = [] for i in range(1,nestlevel): gd = [gd] return gd # this function updates a dictionary first checking for name collisions, # which imply that the CIF is invalid. We need case insensitivity for # names. # Unfortunately we cannot check loop item contents against non-loop contents # in a non-messy way during parsing, as we may not have easy access to previous # key value pairs in the context of our call (unlike our built-in access to all # previous loops). # For this reason, we don't waste time checking looped items against non-looped # names during parsing of a data block. This would only match a subset of the # final items. We do check against ordinary items, however. # # Note the following situations: # (1) new_dict is empty -> we have just added a loop; do no checking # (2) new_dict is not empty -> we have some new key-value pairs # def cif_update(old_dict,new_dict,loops): old_keys = map(lambda a:a.lower(),old_dict.keys()) if new_dict != {}: # otherwise we have a new loop #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) for new_key in new_dict.keys(): if new_key.lower() in old_keys: raise CifError("Duplicate dataname or blockname %s in input file" % new_key) old_dict[new_key] = new_dict[new_key] # # this takes two lines, so we couldn't fit it into a one line execution statement... def order_update(order_array,new_name): order_array.append(new_name) return new_name # and finally...turn a sequence into a python dict (thanks to Stackoverflow) def pairwise(iterable): it = iter(iterable) while 1: yield next(it), next(it) # Begin -- grammar generated by Yapps import sys, re from . import yapps3_compiled_rt as yappsrt class StarParserScanner(yappsrt.Scanner): def __init__(self, *args,**kwargs): patterns = [ ('":"', ':'), ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'), ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'), ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'), ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'), ('STOP', '(S|s)(T|t)(O|o)(P|p)_'), ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('save_end', '(S|s)(A|a)(V|v)(E|e)_'), ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'), ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'), ('end_sc_line', ';'), ('c_c_b', '\\}'), ('o_c_b', '\\{'), ('c_s_b', '\\]'), ('o_s_b', '\\['), ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'), ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'), ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'), ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\'_\\{\\}\\[\\]][^\\s\\{\\}\\[\\]]*)'), ('END', '$'), ] yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs) class StarParser(yappsrt.Parser): Context = yappsrt.Context def input(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared]) _token = self._peek('END', 'data_heading') if _token == 'data_heading': dblock = self.dblock(prepared, _context) allblocks = prepared; allblocks.merge_fast(dblock) while self._peek('END', 'data_heading') == 'data_heading': dblock = self.dblock(prepared, _context) allblocks.merge_fast(dblock) if self._peek() not in ['END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) END = self._scan('END') else: # == 'END' END = self._scan('END') allblocks = prepared allblocks.unlock(); return allblocks def dblock(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared]) data_heading = self._scan('data_heading') heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading] while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(stored_block, _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) thisbc.merge_fast(save_frame,parent=stored_block) if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading'])) stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc)) def dataseq(self, starblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) data = self.data(starblock, _context) while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: data = self.data(starblock, _context) if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) def data(self, currentblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) _token = self._peek('LBLOCK', 'data_name') if _token == 'LBLOCK': top_loop = self.top_loop(_context) makeloop(currentblock,top_loop) else: # == 'data_name' datakvpair = self.datakvpair(_context) currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False) def datakvpair(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) data_name = self._scan('data_name') data_value = self.data_value(_context) return [data_name,data_value] def data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') if _token == 'data_value_1': data_value_1 = self._scan('data_value_1') thisval = data_value_1 elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']: delimited_data_value = self.delimited_data_value(_context) thisval = delimited_data_value elif _token == 'start_sc_line': sc_lines_of_text = self.sc_lines_of_text(_context) thisval = stripextras(sc_lines_of_text) else: # in ['o_s_b', 'o_c_b'] bracket_expression = self.bracket_expression(_context) thisval = bracket_expression return monitor('data_value',thisval) def delimited_data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', []) _token = self._peek('triple_quote_data_value', 'single_quote_data_value') if _token == 'triple_quote_data_value': triple_quote_data_value = self._scan('triple_quote_data_value') thisval = striptriple(triple_quote_data_value) else: # == 'single_quote_data_value' single_quote_data_value = self._scan('single_quote_data_value') thisval = stripstring(single_quote_data_value) return thisval def sc_lines_of_text(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) start_sc_line = self._scan('start_sc_line') lines = StringIO();lines.write(start_sc_line) while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': sc_line_of_text = self._scan('sc_line_of_text') lines.write(sc_line_of_text) if self._peek() not in ['end_sc_line', 'sc_line_of_text']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) end_sc_line = self._scan('end_sc_line') lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue()) def bracket_expression(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', []) _token = self._peek('o_s_b', 'o_c_b') if _token == 'o_s_b': square_bracket_expr = self.square_bracket_expr(_context) return square_bracket_expr else: # == 'o_c_b' curly_bracket_expr = self.curly_bracket_expr(_context) return curly_bracket_expr def top_loop(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) LBLOCK = self._scan('LBLOCK') loopfield = self.loopfield(_context) loopvalues = self.loopvalues(_context) return loopfield,loopvalues def loopfield(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) loop_seq=[] while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name': data_name = self._scan('data_name') loop_seq.append(data_name) if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b'])) return loop_seq def loopvalues(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) data_value = self.data_value(_context) dataloop=[data_value] while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: data_value = self.data_value(_context) dataloop.append(monitor('loopval',data_value)) if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) return dataloop def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: dataseq = self.dataseq(savebc[savehead], _context) if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] while self._peek('save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading') in ['LBLOCK', 'data_name']: dataseq = self.dataseq(savebc[savehead], _context) if self._peek() not in ['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'LBLOCK', 'data_name', 'save_heading', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def square_bracket_expr(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', []) o_s_b = self._scan('o_s_b') this_list = [] while self._peek('c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') != 'c_s_b': data_value = self.data_value(_context) this_list.append(data_value) while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b') != 'c_s_b': data_value = self.data_value(_context) this_list.append(data_value) if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'c_s_b', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'c_s_b'])) if self._peek() not in ['c_s_b', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b'])) c_s_b = self._scan('c_s_b') return StarList(this_list) def curly_bracket_expr(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', []) o_c_b = self._scan('o_c_b') table_as_list = [] while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value') != 'c_c_b': delimited_data_value = self.delimited_data_value(_context) table_as_list = [delimited_data_value] self._scan('":"') data_value = self.data_value(_context) table_as_list.append(data_value) while self._peek('triple_quote_data_value', 'single_quote_data_value', 'c_c_b') != 'c_c_b': delimited_data_value = self.delimited_data_value(_context) table_as_list.append(delimited_data_value) self._scan('":"') data_value = self.data_value(_context) table_as_list.append(data_value) if self._peek() not in ['triple_quote_data_value', 'single_quote_data_value', 'c_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b'])) if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b'])) c_c_b = self._scan('c_c_b') return StarDict(pairwise(table_as_list)) def parse(rule, text): P = StarParser(StarParserScanner(text)) return yappsrt.wrap_error_reporter(P, rule) # End -- grammar generated by Yapps pycifrw-4.4/src/YappsStarParser_STAR2.py000066400000000000000000000533641345362224200202410ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import from .StarFile import StarBlock,StarFile,StarList,StarDict from io import StringIO # An alternative specification for the Cif Parser, based on Yapps2 # by Amit Patel (http://theory.stanford.edu/~amitp/Yapps) # # helper code: we define our match tokens lastval = '' def monitor(location,value): global lastval #print 'At %s: %s' % (location,repr(value)) lastval = repr(value) return value # Strip extras gets rid of leading and trailing whitespace, and # semicolons. def stripextras(value): from .StarFile import remove_line_folding, remove_line_prefix # we get rid of semicolons and leading/trailing terminators etc. import re jj = re.compile("[\n\r\f \t\v]*") semis = re.compile("[\n\r\f \t\v]*[\n\r\f]\n*;") cut = semis.match(value) if cut: #we have a semicolon-delimited string nv = value[cut.end():len(value)-2] try: if nv[-1]=='\r': nv = nv[:-1] except IndexError: #empty data value pass # apply protocols nv = remove_line_prefix(nv) nv = remove_line_folding(nv) return nv else: cut = jj.match(value) if cut: return stripstring(value[cut.end():]) return value # helper function to get rid of inverted commas etc. def stripstring(value): if value: if value[0]== '\'' and value[-1]=='\'': return value[1:-1] if value[0]=='"' and value[-1]=='"': return value[1:-1] return value # helper function to get rid of triple quotes def striptriple(value): if value: if value[:3] == '"""' and value[-3:] == '"""': return value[3:-3] if value[:3] == "'''" and value[-3:] == "'''": return value[3:-3] return value # helper function to populate a StarBlock given a list of names # and values . # # Note that there may be an empty list at the very end of our itemlists, # so we remove that if necessary. # def makeloop(target_block,loopdata): loop_seq,itemlists = loopdata if itemlists[-1] == []: itemlists.pop(-1) # print('Making loop with %s' % repr(itemlists)) step_size = len(loop_seq) for col_no in range(step_size): target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True) # now construct the loop try: target_block.CreateLoop(loop_seq) #will raise ValueError on problem except ValueError: error_string = 'Incorrect number of loop values for loop containing %s' % repr(loop_seq) print(error_string, file=sys.stderr) raise ValueError(error_string) # return an object with the appropriate amount of nesting def make_empty(nestlevel): gd = [] for i in range(1,nestlevel): gd = [gd] return gd # this function updates a dictionary first checking for name collisions, # which imply that the CIF is invalid. We need case insensitivity for # names. # Unfortunately we cannot check loop item contents against non-loop contents # in a non-messy way during parsing, as we may not have easy access to previous # key value pairs in the context of our call (unlike our built-in access to all # previous loops). # For this reason, we don't waste time checking looped items against non-looped # names during parsing of a data block. This would only match a subset of the # final items. We do check against ordinary items, however. # # Note the following situations: # (1) new_dict is empty -> we have just added a loop; do no checking # (2) new_dict is not empty -> we have some new key-value pairs # def cif_update(old_dict,new_dict,loops): old_keys = map(lambda a:a.lower(),old_dict.keys()) if new_dict != {}: # otherwise we have a new loop #print 'Comparing %s to %s' % (repr(old_keys),repr(new_dict.keys())) for new_key in new_dict.keys(): if new_key.lower() in old_keys: raise CifError("Duplicate dataname or blockname %s in input file" % new_key) old_dict[new_key] = new_dict[new_key] # # this takes two lines, so we couldn't fit it into a one line execution statement... def order_update(order_array,new_name): order_array.append(new_name) return new_name # and finally...turn a sequence into a python dict (thanks to Stackoverflow) def pairwise(iterable): it = iter(iterable) while 1: yield next(it), next(it) # Begin -- grammar generated by Yapps import sys, re from . import yapps3_compiled_rt as yappsrt class StarParserScanner(yappsrt.Scanner): def __init__(self, *args,**kwargs): patterns = [ ('":"', ':'), ('","', ','), ('([ \t\n\r](?!;))|[ \t]', '([ \t\n\r](?!;))|[ \t]'), ('(#.*[\n\r](?!;))|(#.*)', '(#.*[\n\r](?!;))|(#.*)'), ('LBLOCK', '(L|l)(O|o)(O|o)(P|p)_'), ('GLOBAL', '(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_'), ('STOP', '(S|s)(T|t)(O|o)(P|p)_'), ('save_heading', u'(S|s)(A|a)(V|v)(E|e)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('save_end', '(S|s)(A|a)(V|v)(E|e)_'), ('data_name', u'_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('data_heading', u'(D|d)(A|a)(T|t)(A|a)_[][!%&\\(\\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\\|~"#$\';_\xa0-\ud7ff\ue000-\ufdcf\ufdf0-\ufffd\U00010000-\U0001fffd\U00020000-\U0002fffd\U00030000-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e0000-\U000efffd\U000f0000-\U000ffffd\U00100000-\U0010fffd-]+'), ('start_sc_line', '(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+'), ('sc_line_of_text', '[^;\r\n]([^\r\n])*(\r\n|\r|\n)+'), ('end_sc_line', ';'), ('c_c_b', '\\}'), ('o_c_b', '\\{'), ('c_s_b', '\\]'), ('o_s_b', '\\['), ('dat_val_internal_sq', '\\[([^\\s\\[\\]]*)\\]'), ('triple_quote_data_value', '(?s)\'\'\'.*?\'\'\'|""".*?"""'), ('single_quote_data_value', '\'([^\n\r\x0c\'])*\'+|"([^\n\r"])*"+'), ('END', '$'), ('data_value_1', '((?!(((S|s)(A|a)(V|v)(E|e)_[^\\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\\s]*)))[^\\s"#$\',_\\{\\}\\[\\]][^\\s,\\{\\}\\[\\]]*)'), ] yappsrt.Scanner.__init__(self,patterns,['([ \t\n\r](?!;))|[ \t]', '(#.*[\n\r](?!;))|(#.*)'],*args,**kwargs) class StarParser(yappsrt.Parser): Context = yappsrt.Context def input(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'input', [prepared]) _token = self._peek('END', 'data_heading') if _token == 'data_heading': dblock = self.dblock(prepared, _context) allblocks = prepared; allblocks.merge_fast(dblock) while self._peek('END', 'data_heading') == 'data_heading': dblock = self.dblock(prepared, _context) allblocks.merge_fast(dblock) if self._peek() not in ['END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['END', 'data_heading'])) END = self._scan('END') else: # == 'END' END = self._scan('END') allblocks = prepared allblocks.unlock(); return allblocks def dblock(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dblock', [prepared]) data_heading = self._scan('data_heading') heading = data_heading[5:];thisbc=StarFile(characterset='unicode',standard=prepared.standard);act_heading = thisbc.NewBlock(heading,prepared.blocktype(overwrite=False));stored_block = thisbc[act_heading] while self._peek('save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(stored_block, _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) thisbc.merge_fast(save_frame,parent=stored_block) if self._peek() not in ['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_heading', 'save_end', 'LBLOCK', 'data_name', 'END', 'data_heading'])) stored_block.setmaxnamelength(stored_block.maxnamelength);return (monitor('dblock',thisbc)) def dataseq(self, starblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'dataseq', [starblock]) data = self.data(starblock, _context) while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['LBLOCK', 'data_name']: data = self.data(starblock, _context) if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) def data(self, currentblock, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data', [currentblock]) _token = self._peek('LBLOCK', 'data_name') if _token == 'LBLOCK': top_loop = self.top_loop(_context) makeloop(currentblock,top_loop) else: # == 'data_name' datakvpair = self.datakvpair(_context) currentblock.AddItem(datakvpair[0],datakvpair[1],precheck=False) def datakvpair(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'datakvpair', []) data_name = self._scan('data_name') data_value = self.data_value(_context) return [data_name,data_value] def data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'data_value', []) _token = self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') if _token == 'data_value_1': data_value_1 = self._scan('data_value_1') thisval = data_value_1 elif _token not in ['start_sc_line', 'o_s_b', 'o_c_b']: delimited_data_value = self.delimited_data_value(_context) thisval = delimited_data_value elif _token == 'start_sc_line': sc_lines_of_text = self.sc_lines_of_text(_context) thisval = stripextras(sc_lines_of_text) else: # in ['o_s_b', 'o_c_b'] bracket_expression = self.bracket_expression(_context) thisval = bracket_expression return monitor('data_value',thisval) def delimited_data_value(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'delimited_data_value', []) _token = self._peek('triple_quote_data_value', 'single_quote_data_value') if _token == 'triple_quote_data_value': triple_quote_data_value = self._scan('triple_quote_data_value') thisval = striptriple(triple_quote_data_value) else: # == 'single_quote_data_value' single_quote_data_value = self._scan('single_quote_data_value') thisval = stripstring(single_quote_data_value) return thisval def sc_lines_of_text(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'sc_lines_of_text', []) start_sc_line = self._scan('start_sc_line') lines = StringIO();lines.write(start_sc_line) while self._peek('end_sc_line', 'sc_line_of_text') == 'sc_line_of_text': sc_line_of_text = self._scan('sc_line_of_text') lines.write(sc_line_of_text) if self._peek() not in ['end_sc_line', 'sc_line_of_text']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['sc_line_of_text', 'end_sc_line'])) end_sc_line = self._scan('end_sc_line') lines.write(end_sc_line);return monitor('sc_line_of_text',lines.getvalue()) def bracket_expression(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'bracket_expression', []) _token = self._peek('o_s_b', 'o_c_b') if _token == 'o_s_b': square_bracket_expr = self.square_bracket_expr(_context) return square_bracket_expr else: # == 'o_c_b' curly_bracket_expr = self.curly_bracket_expr(_context) return curly_bracket_expr def top_loop(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'top_loop', []) LBLOCK = self._scan('LBLOCK') loopfield = self.loopfield(_context) loopvalues = self.loopvalues(_context) return loopfield,loopvalues def loopfield(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopfield', []) loop_seq=[] while self._peek('data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == 'data_name': data_name = self._scan('data_name') loop_seq.append(data_name) if self._peek() not in ['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_name', 'data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b'])) return loop_seq def loopvalues(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'loopvalues', []) data_value = self.data_value(_context) dataloop=[data_value] while self._peek('data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading') in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: data_value = self.data_value(_context) dataloop.append(monitor('loopval',data_value)) if self._peek() not in ['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b', 'LBLOCK', 'data_name', 'save_end', 'save_heading', 'END', 'data_heading'])) return dataloop def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(savebc[savehead], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) savebc.merge_fast(save_frame,parent=stored_block) if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def save_frame(self, prepared, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'save_frame', [prepared]) save_heading = self._scan('save_heading') savehead = save_heading[5:];savebc = StarFile();newname = savebc.NewBlock(savehead,prepared.blocktype(overwrite=False));stored_block = savebc[newname] while self._peek('save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading') in ['save_heading', 'LBLOCK', 'data_name']: _token = self._peek('save_heading', 'LBLOCK', 'data_name') if _token != 'save_heading': dataseq = self.dataseq(savebc[savehead], _context) else: # == 'save_heading' save_frame = self.save_frame(prepared, _context) savebc.merge_fast(save_frame,parent=stored_block) if self._peek() not in ['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['save_end', 'save_heading', 'LBLOCK', 'data_name', 'END', 'data_heading'])) save_end = self._scan('save_end') return monitor('save_frame',savebc) def square_bracket_expr(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'square_bracket_expr', []) o_s_b = self._scan('o_s_b') this_list = [] while self._peek('c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') not in ['c_s_b', '","']: data_value = self.data_value(_context) this_list.append(data_value) while self._peek('","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b') == '","': self._scan('","') data_value = self.data_value(_context) this_list.append(data_value) if self._peek() not in ['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b'])) if self._peek() not in ['c_s_b', 'data_value_1', '","', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', 'o_s_b', 'o_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['data_value_1', 'c_s_b', 'triple_quote_data_value', 'single_quote_data_value', 'start_sc_line', '","', 'o_s_b', 'o_c_b'])) c_s_b = self._scan('c_s_b') return StarList(this_list) def curly_bracket_expr(self, _parent=None): _context = self.Context(_parent, self._scanner, self._pos, 'curly_bracket_expr', []) o_c_b = self._scan('o_c_b') table_as_list = [] while self._peek('c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","') in ['triple_quote_data_value', 'single_quote_data_value']: delimited_data_value = self.delimited_data_value(_context) table_as_list = [delimited_data_value] self._scan('":"') data_value = self.data_value(_context) table_as_list.append(data_value) while self._peek('","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b') == '","': self._scan('","') delimited_data_value = self.delimited_data_value(_context) table_as_list.append(delimited_data_value) self._scan('":"') data_value = self.data_value(_context) table_as_list.append(data_value) if self._peek() not in ['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['","', 'triple_quote_data_value', 'single_quote_data_value', 'c_c_b'])) if self._peek() not in ['c_c_b', 'triple_quote_data_value', 'single_quote_data_value', '","']: raise yappsrt.YappsSyntaxError(charpos=self._scanner.get_prev_char_pos(), context=_context, msg='Need one of ' + ', '.join(['triple_quote_data_value', 'single_quote_data_value', 'c_c_b', '","'])) c_c_b = self._scan('c_c_b') return StarDict(pairwise(table_as_list)) def parse(rule, text): P = StarParser(StarParserScanner(text)) return yappsrt.wrap_error_reporter(P, rule) # End -- grammar generated by Yapps pycifrw-4.4/src/__init__.py000066400000000000000000000006771345362224200157610ustar00rootroot00000000000000from __future__ import absolute_import # print("Name is " + repr(__name__)) from .StarFile import StarError,ReadStar,StarList,apply_line_folding,apply_line_prefix from .CifFile_module import CifDic,CifError, CifBlock,ReadCif,ValidCifFile,ValidCifError,Validate,CifFile from .CifFile_module import get_number_with_esd,convert_type,validate_report from .StarFile import remove_line_prefix,remove_line_folding from .StarFile import check_stringiness pycifrw-4.4/src/cif-lex.html000066400000000000000000000314331345362224200160570ustar00rootroot00000000000000 cif-lex.nw A Noweb literate programming file for Star grammar and parser specification, replacing the long-serving Yapps2 parser.

Several standards are available, of which four are implemented: 1.0, 1.1, CIF2 and STAR2. CIF2 differs from STAR2 in that lists have comma separators and no nested save frames are allowed. Note that 1.0,1.1 and CIF2/STAR2 differ in their treatment of unquoted data values beginning with brackets. Because of the large commonality, we express each of the standards as slight deviations from a general standard using Noweb chunks.

Old CIF 1.0 standard. This differs from 1.1 in allowing square brackets to begin an undelimited text string.

<Lexer 1.0>=
<Common v1 lexer code>
<CIF1.0 data value>
<Common postamble>

<Common postamble>= (<-U U-> U-> U->)
lexer = lex.lex(debug=1)
if __name__ == "__main__":
    lex.runmain(lexer)

A CIF1.0 data value allows naked square brackets at the front of undelimited data values.

<CIF1.0 data value>= (<-U)
def t_DATA_VALUE_1(t):
     r"((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+"
     if len(t.value)>1:
        if t.value[0]== '\'' and t.value[-1]=='\'':
           t.value = t.value[1:-1]
        elif t.value[0]=='"' and t.value[-1]=='"':
           t.value = t.value[1:-1]
     return t
     
<Lexer 1.1>=
<Common v1 lexer code>
<CIF1.1 data value>
<Common postamble>

<CIF1.1 data value>= (<-U)
def t_DATA_VALUE_1(t):
     r"((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_][^\s]*)|'(('(?=\S))|([^\n\r\f']))*'+|\"((\"(?=\S))|([^\n\r\"]))*\"+"
     if len(t.value)>1:
        if t.value[0]== '\'' and t.value[-1]=='\'':
           t.value = t.value[1:-1]
        elif t.value[0]=='"' and t.value[-1]=='"':
           t.value = t.value[1:-1]
     return t

<Common v1 lexer code>= (<-U <-U)
# An new lexer for CIF using PLY
#
import ply.lex as lex
import re
from StarFile import remove_line_folding,remove_line_prefix


states = (
    ('semicolon','exclusive'),
)

tokens  = (
    'COMMENT',
    'WHITESPACE',
    'LBLOCK',
    'GLOBAL',
    'STOP',
    'SAVE_HEADING',
    'SAVE_END',
    'DATA_NAME',
    'DATA_HEADING',
    'START_SC_LINE',
    'SC_LINE_OF_TEXT',
    'END_SC_LINE',
    'DATA_VALUE_1'
    )

t_ignore_WHITESPACE = r"([ \t\n\r](?!;))|[ \t]"

t_ignore_COMMENT = r"(\#.*[\n\r](?!;))|(\#.*)"

def t_error(t):
    print 'Illegal character %s' % repr(t.value[0])

def t_LBLOCK(t):
     r"(L|l)(O|o)(O|o)(P|p)_"
     return t

def t_GLOBAL(t):
     r"(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_"
     return t

def t_STOP(t):
     r"(S|s)(T|t)(O|o)(P|p)_"
     return t

def t_SAVE_HEADING(t):
    r"(S|s)(A|a)(V|v)(E|e)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
    return t

def t_SAVE_END(t):
    r"(S|s)(A|a)(V|v)(E|e)_"
    return t

def t_DATA_NAME(t):
    r"_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+" #_followed by stuff
    return t

def t_DATA_HEADING(t):
    r"(D|d)(A|a)(T|t)(A|a)_[][!%&\(\)*+,./:<=>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+"
    return t

def t_START_SC_LINE(t):
    r"(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+"
    t.lexer.begin('semicolon')
    t.lexer.sctext = t.value[t.value.find(';')+1:]

def t_semicolon_SC_LINE_OF_TEXT(t):
    r"[^;\r\n]([^\r\n])*(\r\n|\r|\n)+"
    t.lexer.sctext += t.value

def t_semicolon_END_SC_LINE(t):
    r';'
    t.lexer.begin('INITIAL')
    t.value = t.lexer.sctext[:-1]  #drop eol
    if len(t.value)>0 and t.value[-1] == '\r': t.value = t.value[:-1]
    t.value = remove_line_folding(t.value)
    return t

<Lexer 2.0>=
<Common v2 lexer code>
<CIF2.0 data value>
<Common postamble>

Commas are allowed in non-delimited data values in CIF2.0 but not STAR2.0. Semicolons are allowed in CIF2.0 non-delimited values as long as it is not the beginning of a line - this case should be picked up by the start_sc_line check *before* the data value check.

<CIF2.0 data value>= (<-U)
def t_DATA_VALUE_1(t):
     r"((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$'_\{\}\[\]][^\s\{\}\[\]]*)"
     return t

<Lexer STAR2>=
<Common v2 lexer code>
<STAR2.0 data value>
<Common postamble>

STAR2.0 uses commas to separate list and table items so commas are not allowed in non-delimited values.

<STAR2.0 data value>= (<-U)
def t_DATA_VALUE_1(t):
     r"((?!(((S|s)(A|a)(V|v)(E|e)_[^\s]*)|((G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_[^\s]*)|((S|s)(T|t)(O|o)(P|p)_[^\s]*)|((D|d)(A|a)(T|t)(A|a)_[^\s]*)))[^\s\"#$',_\{\}\[\]][^\s,\{\}\[\]]*)"
     return t

The reason for switching to PLY from Yapps is that some Python builds cannot handle the wide characters allowed by our Unicode standard, and Yapps does not have any simple way to construct regular expressions conditionally.

<Common v2 lexer code>= (<-U <-U)
# An new lexer for CIF using PLY
#
import ply.lex as lex
from ply.lex import TOKEN
import re,sys
from StarFile import remove_line_folding,remove_line_prefix

# Following unicode fix based on suggestion of Pavol Juhas
# Check our Unicode status
if sys.maxunicode < 111411:
     print 'Warning: Narrow Python build detected. Unicode characters outside the Basic Multilingual Plane are not supported'
     rewidechars = ""
else:
     rewidechars = u"\U00010000-\U0010FFFD"

# Define some unicode ranges to save space - not currently used
non_blank_chars = u"[\u0021-\u007E\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD" + rewidechars + "]" 
# everything that is allowed
all_chars = u"[\u0009\u000A\u000D\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD" + rewidechars + "]" 
# Construct the regular expressions accordingly
dname_regexp = "_" + non_blank_chars + "+"
save_regexp = ur"(S|s)(A|a)(V|v)(E|e)_" + non_blank_chars + "+"
dheading_regexp = ur"(D|d)(A|a)(T|t)(A|a)_"+ non_blank_chars + "+"

states = (
    ('semicolon','exclusive'),
    ('tripleq','exclusive'),
    ('triplea','exclusive')
)

tokens  = (
    'COMMENT',
    'WHITESPACE',
    'LBLOCK',
    'GLOBAL',
    'STOP',
    'SAVE_HEADING',
    'SAVE_END',
    'DATA_NAME',
    'DATA_HEADING',
    'START_SC_LINE',
    'SC_LINE_OF_TEXT',
    'END_SC_LINE',
    'DAT_VAL_NOCOMMA_NOSQ',
    'DAT_VAL_INTERNAL_SQ',
    'TRIPLE_QUOTE_START',
    'TRIPLE_QUOTE_DATA_VALUE',
    'TRIPLE_APOST_START',
    'TRIPLE_APOST_DATA_VALUE',
    'LINE_OF_TEXT',
    'SINGLE_QUOTE_DATA_VALUE',
    'DATA_VALUE_1'
    )

t_ignore_WHITESPACE = r"([ \t\n\r](?!;))|[ \t]"

t_ignore_COMMENT = r"(\#.*[\n\r](?!;))|(\#.*)"

literals = ['{','}','[',']',':']

def t_error(t):
    print 'Illegal character %s' % repr(t.value[0])

def t_LBLOCK(t):
     r"(L|l)(O|o)(O|o)(P|p)_"
     return t

def t_GLOBAL(t):
     r"(G|g)(L|l)(O|o)(B|b)(A|a)(L|l)_"
     return t

def t_STOP(t):
     r"(S|s)(T|t)(O|o)(P|p)_"
     return t

@TOKEN(save_regexp)
def t_SAVE_HEADING(t):
    return t

def t_SAVE_END(t):
    r"(S|s)(A|a)(V|v)(E|e)_"
    return t

@TOKEN(dname_regexp)
def t_DATA_NAME(t):    
    return t

@TOKEN(dheading_regexp)
def t_DATA_HEADING(t):
    return t

def t_START_SC_LINE(t):
    r"(\n|\r\n);([^\n\r])*(\r\n|\r|\n)+"
    t.lexer.begin('semicolon')
    t.lexer.sctext = t.value[t.value.find(';')+1:]

def t_semicolon_SC_LINE_OF_TEXT(t):
    r"[^;\r\n]([^\r\n])*(\r\n|\r|\n)+"
    t.lexer.sctext += t.value

def t_semicolon_END_SC_LINE(t):
    r';'
    t.lexer.begin('INITIAL')
    t.value = t.lexer.sctext[:-1]  #drop eol
    if t.value[-1] == '\r': t.value = t.value[:-1]
    t.value = remove_line_prefix(t.value)
    t.value = remove_line_folding(t.value)
    return t

def t_DAT_VAL_INTERNAL_SQ(t):
    r"\[([^\s\[\]]*)\]"
    return t

def t_TRIPLE_QUOTE_START(t):
    r"\"\"\""
    t.lexer.begin('tripleq')
    t.lexer.tqval = ""

def t_tripleq_TRIPLE_QUOTE_DATA_VALUE(t):
    r"([^\r\n]*)\"\"\""
    t.lexer.begin('INITIAL')
    t.value = t.lexer.tqval + t.value[:-3]
    return t

def t_tripleq_triplea_LINE_OF_TEXT(t):
    r"([^\r\n])*(\r\n|\r|\n)+"
    t.lexer.tqval += t.value

def t_TRIPLE_APOST_START(t):
    r"'''"
    t.lexer.begin('triplea')
    t.lexer.tqval = ""

def t_triplea_TRIPLE_APOST_DATA_VALUE(t):
    r"([^\r\n]*)'''"
    t.lexer.begin('INITIAL')
    t.value = t.lexer.tqval + t.value[:-3]
    return t

def t_SINGLE_QUOTE_DATA_VALUE(t):
    r"'([^\n\r\f'])*'+|\"([^\n\r\"])*\"+"
    t.value = t.value[1:-1]
    return t
pycifrw-4.4/src/cif-yacc.html000066400000000000000000000200441345362224200162020ustar00rootroot00000000000000 cif-yacc.nwCommon parser for CIF/STAR grammar

<CIF2 grammar>=
import ply.yacc as yacc
from cif_lex_2_0 import tokens
from StarFile import StarFile,StarBlock
<Helper functions>
<CIF grammar common features>
<CIF2 additions>
<Postamble>

<CIF1.1 grammar>=
import ply.yacc as yacc
from cif_lex_1_1 import tokens
from StarFile import StarFile,StarBlock
<Helper functions>
<CIF grammar common features>
<CIF1 specific productions>
<Postamble>


<Postamble>= (<-U <-U)
parser = yacc.yacc()

Copied from our Yapps parser

<CIF grammar common features>= (<-U <-U)

def p_input(p):
    ''' input : dblock
              | input dblock
              | '''
    if len(p) == 1:
        p[0] = StarFile()
    elif len(p) == 2:
        p[0] = p[1]
    else:
        p[1].merge_fast(p[2])
        p[0] = p[1]

def p_dblock(p):
    ''' dblock : DATA_HEADING data_contents
               | DATA_HEADING '''
    heading = p[1][5:]
    p[0] = StarFile(characterset='unicode')
    p[0].NewBlock(heading,StarBlock(overwrite=False))
    if len(p) == 3:
        # Traverse our mini AST
        for dc in p[2]:
            if dc[0] == 'SAVE':
                p[0].merge_fast(dc[1],parent=heading)
            elif dc[0] == 'KVPAIR':
                p[0][heading].AddItem(dc[1],dc[2],precheck=False)
            elif dc[0] == 'LOOP':
                makeloop(p[0][heading],dc[1:])
            else:
                raise SyntaxError, 'Programming error, what is ' + `dc[0]`

def p_data_contents(p):
    ''' data_contents : dataseq 
                     | save_frame
                     | data_contents dataseq 
                     | data_contents save_frame '''
    if len(p) == 2:
        p[0] = p[1]
    else:
        p[0] = p[1] + p[2]

def p_dataseq(p):
    ''' dataseq : data
                | dataseq data '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_data(p):
    ''' data : top_loop
             | datakvpair '''
    p[0] = p[1]

def p_datakvpair(p):
    ''' datakvpair : DATA_NAME data_value '''
    p[0] = ['KVPAIR',p[1],p[2]]

def p_top_loop(p):
    ''' top_loop : LBLOCK loopfield loopvalues '''
    p[0] = ['LOOP',p[2],p[3]]

def p_loopfield(p):
    ''' loopfield : DATA_NAME
                  | loopfield DATA_NAME '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_loopvalues(p):
    ''' loopvalues : data_value
                   | loopvalues data_value '''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1] + [p[2]]

def p_save_frame(p):
    ''' save_frame : SAVE_HEADING data_contents SAVE_END
                   | SAVE_HEADING SAVE_END '''
    heading = p[1][5:]
    myframe = StarFile(characterset='unicode')
    myframe.NewBlock(heading,StarBlock(overwrite=False))
    if len(p) == 4:
        # Traverse our mini AST
        for dc in p[2]:
            if dc[0] == 'SAVE':
                myframe.merge_fast(dc[1],parent=heading)
            elif dc[0] == 'KVPAIR':
                myframe[heading].AddItem(dc[1],dc[2],precheck=False)
            elif dc[0] == 'LOOP':
                makeloop(myframe[heading],dc[1:])
    p[0] = [['SAVE',myframe]]

These productions for CIF1 do not include the bracket expressions or separate delimited expressions.

<CIF1 specific productions>= (<-U)
def p_data_value(p):
    ''' data_value : DATA_VALUE_1
                   | END_SC_LINE '''
    p[0] = p[1]

CIF2 most notably adds lists and tables.

<CIF2 additions>= (<-U)
def p_data_value(p):
    ''' data_value : DATA_VALUE_1
                   | delimited_data_value
                   | END_SC_LINE
                   | square_bracket_expr
                   | curly_bracket_expr '''
    p[0] = p[1]

def p_delimited_data_value(p):
    ''' delimited_data_value : TRIPLE_QUOTE_DATA_VALUE
                             | TRIPLE_APOST_DATA_VALUE
                             | SINGLE_QUOTE_DATA_VALUE '''
    p[0] = p[1]

def p_square_bracket_expr(p):
    ''' square_bracket_expr : '[' list_builder ']' '''
    p[0] = StarList(p[2])

def p_list_builder(p):
    ''' list_builder : data_value
                     | list_builder data_value
                     | '''
    if len(p) == 2:
        p[0] = [p[1]]
    elif len(p) == 3:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = []

def p_curly_bracket_expr(p):
    ''' curly_bracket_expr : '{' table_builder '}' '''
    p[0] = StarDict(pairwise(p[2]))

def p_table_builder(p):
    ''' table_builder : delimited_data_value ':' data_value
                      | table_builder delimited_data_value ':' data_value '''
    if len(p) == 4:
        p[0] = [p[1],p[3]]
    elif len(p) == 5:
        p[0] = p[1] + [p[2],p[4]] 

Some convenience functions

<Helper functions>= (<-U <-U)
def pairwise(iterable):
    itnext = iter(p[2]).next
    while 1:
            yield itnext(),itnext()

def makeloop(target_block,loopdata):
    loop_seq,itemlists = loopdata
    if itemlists[-1] == []: itemlists.pop(-1)
    # print 'Making loop with %s' % `itemlists`
    step_size = len(loop_seq)
    for col_no in range(step_size):
       target_block.AddItem(loop_seq[col_no], itemlists[col_no::step_size],precheck=True)
    # print 'Makeloop constructed %s' % `loopstructure`
    # now construct the loop
    try:
        target_block.CreateLoop(loop_seq)  #will raise ValueError on problem
    except ValueError:
        error_string =  'Incorrect number of loop values for loop containing %s' % `loop_seq`
        print >>sys.stderr, error_string
        raise ValueError, error_string
pycifrw-4.4/src/drel/000077500000000000000000000000001345362224200145645ustar00rootroot00000000000000pycifrw-4.4/src/drel/__init__.py000066400000000000000000000000151345362224200166710ustar00rootroot00000000000000# Dummy file pycifrw-4.4/src/drel/drel_ast_yacc.py000066400000000000000000000407571345362224200177470ustar00rootroot00000000000000# A dREL grammar written for python-ply # # The output is an Abstract Syntax Tree that represents a # function fragment that needs to be wrapped with information # appropriate to the target language. # The grammar is based on the Python 2.7 grammar, in # consultation with Doug du Boulay's JsCifBrowser # grammar (also derived from a Python grammar). # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import from .drel_lex import lexer,tokens import ply.yacc as yacc # Overall translation unit # Our input is a sequence of statements def p_input(p): '''input : maybe_nline statement | input statement ''' if p[1] is None: p[0] = p[2] else: so_far = p[1][1] new_statements = p[2][1] p[0] = ["STATEMENTS",p[1][1] + p[2][1]] #print('input now {!r}'.format(p[0])) # We distinguish between compound statements and # small statements. Small statements may be # chained together on a single line with semicolon # separators. Compound statements are not separated # in this way, and will always be terminated by # a newline. def p_statement(p): '''statement : simple_stmt newlines | simple_stmt ";" newlines | compound_stmt ''' p[0] = p[1] # A simple statement is a sequence of small statements terminated by # a NEWLINE or EOF def p_simple_stmt(p): ''' simple_stmt : small_stmt | simple_stmt ";" small_stmt ''' if len(p) == 2: p[0] = ["STATEMENTS",[p[1]]] else: p[0] = ["STATEMENTS",p[1][1] + [p[3]]] # This production appears inside a set of braces. Any statement # will be automatically terminated by a newline so we do not # need to include that here def p_statements(p): '''statements : statement | statements statement ''' if len(p) == 2: p[0] = p[1] else: p[0] = ["STATEMENTS", p[1][1] + [p[2]]] def p_small_stmt(p): '''small_stmt : expr_stmt | print_stmt | break_stmt | next_stmt''' p[0] = p[1] def p_break_stmt(p): '''break_stmt : BREAK''' p[0] = ["BREAK"] def p_next_stmt(p): '''next_stmt : NEXT''' p[0] = ["NEXT"] def p_print_stmt(p): '''print_stmt : PRINT expression ''' p[0] = ['PRINT', p[2]] # Note here that a simple testlist_star_expr is useless as in our # side-effect-free world it will be evaluated and discarded. We # could just drop it right now but we let it go through to the # AST processor for language-dependent processing def p_expr_stmt(p): ''' expr_stmt : testlist_star_expr | testlist_star_expr AUGOP testlist_star_expr | testlist_star_expr "=" testlist_star_expr | fancy_drel_assignment_stmt ''' if len(p) == 2 and p[1][0] != 'FANCY_ASSIGN': # we have a list of expressions which we p[0] = ["EXPRLIST",p[1]] elif len(p) == 2 and p[1][0] == 'FANCY_ASSIGN': p[0] = p[1] else: p[0] = ["ASSIGN",p[1],p[2],p[3]] def p_testlist_star_expr(p): # list of expressions in fact ''' testlist_star_expr : expression | testlist_star_expr "," maybe_nline expression ''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[4]] # Simplified from the python 2.5 version due to apparent conflict with # the other type of IF expression... # def p_expression(p): '''expression : or_test ''' p[0] = ["EXPR",p[1]] # This is too generous, as it allows a function call on the # LHS to be assigned to. This will cause a syntax error on # execution. def p_or_test(p): ''' or_test : and_test | or_test OR and_test | or_test BADOR and_test''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP","or",p[1],p[3]] def p_and_test(p): '''and_test : not_test | and_test AND not_test | and_test BADAND not_test''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP","and", p[1],p[3]] def p_not_test(p): '''not_test : comparison | NOT not_test''' if len(p) == 2: p[0] = p[1] else: p[0] = ["UNARY","not",p[2]] def p_comparison(p): '''comparison : a_expr | a_expr comp_operator a_expr''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP",p[2],p[1],p[3]] def p_comp_operator(p): '''comp_operator : restricted_comp_operator | IN | NOT IN ''' if len(p)==3: p[0] = "not in" else: p[0] = p[1] def p_restricted_comp_operator(p): #for loop tests '''restricted_comp_operator : "<" | ">" | GTE | LTE | NEQ | ISEQUAL ''' p[0] = p[1] def p_a_expr(p): '''a_expr : m_expr | a_expr "+" m_expr | a_expr "-" m_expr''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP",p[2], p[1], p[3]] def p_m_expr(p): #note bitwise and and or are pycifrw extensions '''m_expr : u_expr | m_expr "*" u_expr | m_expr "/" u_expr | m_expr "^" u_expr | m_expr "&" u_expr | m_expr "|" u_expr ''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP",p[2],p[1],p[3]] def p_u_expr(p): '''u_expr : power | "-" u_expr | "+" u_expr''' if len(p) == 2: p[0] = p[1] else: p[0] = ["SIGN",p[1],p[2]] def p_power(p): '''power : primary | primary POWER u_expr''' if len(p) == 2: p[0] = p[1] else: p[0] = ["MATHOP","**",p[1] , p[3]] # print('At power: p[0] is {!r}'.format(p[0])) def p_primary(p): '''primary : atom | attributeref | subscription | slicing | call''' # print 'Primary -> %s' % repr(p[1]) p[0] = p[1] def p_atom(p): '''atom : ID | item_tag | literal | enclosure''' # print 'Atom -> %s' % repr(p[1]) p[0] = ["ATOM",p[1]] def p_item_tag(p): '''item_tag : ITEM_TAG''' p[0] = ["ITEM_TAG",p[1]] def p_literal(p): '''literal : stringliteral | INTEGER | HEXINT | OCTINT | BININT | REAL | IMAGINARY''' # print 'literal-> %s' % repr(p[1]) p[0] = ["LITERAL",p[1]] def p_stringliteral(p): '''stringliteral : STRPREFIX SHORTSTRING | STRPREFIX LONGSTRING | SHORTSTRING | LONGSTRING''' if len(p)==3: p[0] = p[1]+p[2] else: p[0] = p[1] def p_enclosure(p): '''enclosure : parenth_form | string_conversion | list_display | table_display ''' p[0]=p[1] def p_parenth_form(p): '''parenth_form : OPEN_PAREN testlist_star_expr CLOSE_PAREN | OPEN_PAREN CLOSE_PAREN ''' if len(p) == 3: p[0] = ["GROUP"] else: p[0] = ["GROUP",p[2]] # print('Parens: {!r}'.format(p[0])) def p_string_conversion(p): '''string_conversion : "`" testlist_star_expr "`" ''' # WARNING: NOT IN PUBLISHED dREL papaer p[0] = ["FUNC_CALL","str",p[2]] def p_list_display(p): ''' list_display : "[" maybe_nline listmaker maybe_nline "]" | "[" maybe_nline "]" ''' if len(p) == 4: p[0] = ["LIST"] else: p[0] = ["LIST"] + p[3] # scrap the trailing comma def p_listmaker(p): '''listmaker : expression listmaker2 ''' p[0] = [p[1]] + p[2] # print('listmaker: {!r}'.format(p[0])) def p_listmaker2(p): '''listmaker2 : "," maybe_nline expression | listmaker2 "," maybe_nline expression | ''' if len(p) == 4: p[0] = [p[3]] elif len(p) < 2: p[0] = [] else: p[0] = p[1] + [p[4]] # define tables def p_table_display(p): ''' table_display : "{" maybe_nline tablemaker maybe_nline "}" | "{" maybe_nline "}" ''' if len(p) == 4: p[0] = ["TABLE"] else: p[0] = ["TABLE"] + p[3] def p_tablemaker(p): '''tablemaker : stringliteral ":" expression tablemaker2 ''' p[0] = [(p[1],p[3])] + p[4] def p_tablemaker2(p): '''tablemaker2 : "," maybe_nline stringliteral ":" expression | tablemaker2 "," maybe_nline stringliteral ":" expression | ''' if len(p) == 6: p[0] = [(p[3],p[5])] elif len(p) < 2: p[0] = [] else: p[0] = p[1] + [(p[4],p[6])] # Note that we need to catch tags of the form 't.12', which # our lexer will interpret as ID REAL. We therefore also # accept t.12(3), which is not allowed, but we don't bother # trying to catch this error here. def p_attributeref(p): '''attributeref : primary attribute_tag ''' p[0] = ["ATTRIBUTE",p[1],p[2]] def p_attribute_tag(p): '''attribute_tag : "." ID | REAL ''' if len(p) == 3: p[0] = p[2] else: p[0] = p[1][1:] def p_subscription(p): '''subscription : primary "[" expression "]" ''' p[0] = ["SUBSCRIPTION",p[1],p[3]] def p_slicing(p): '''slicing : primary "[" proper_slice "]" | primary "[" slice_list "]" ''' p[0] = ["SLICE", p[1], p[3] ] def p_proper_slice(p): '''proper_slice : short_slice | long_slice ''' p[0] = [p[1]] # Our AST slice convention is that, if anything is mentioned, # the first element is always # explicitly mentioned. A single element will be a starting # element. Two elements are start and finish. An empty list # is all elements. Three elements are start, finish, step. # We combine these into a list of slices. def p_short_slice(p): '''short_slice : ":" | expression ":" expression | ":" expression | expression ":" ''' if len(p) == 2: p[0] = [] if len(p) == 4: p[0] = [p[1],p[3]] if len(p) == 3 and p[1] == ":": p[0] = [0,p[2]] if len(p) == 3 and p[2] == ":": p[0] = [p[1]] def p_long_slice(p): '''long_slice : short_slice ":" expression ''' if len(p) == 4: p[0] = p[1] + [p[3]] else: p[0] = p[1] def p_slice_list(p): ''' slice_list : slice_item | slice_list "," slice_item ''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[3]] def p_slice_item(p): ''' slice_item : expression | proper_slice ''' p[0] = p[1] def p_call(p): '''call : ID OPEN_PAREN CLOSE_PAREN | ID OPEN_PAREN argument_list CLOSE_PAREN ''' if len(p) == 4: p[0] = ["FUNC_CALL",p[1],[]] else: p[0] = ["FUNC_CALL",p[1],p[3]] #print("Function call: {!r}".format(p[0])) # These are the arguments to a call, not a definition def p_argument_list(p): '''argument_list : func_arg | argument_list "," func_arg ''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[3]] def p_func_arg(p): '''func_arg : expression ''' p[0] = p[1] def p_fancy_drel_assignment_stmt(p): '''fancy_drel_assignment_stmt : ID OPEN_PAREN dotlist CLOSE_PAREN ''' p[0] = ["FANCY_ASSIGN",p[1],p[3]] # print("Fancy assignment -> {!r}".format(p[0])) # Something made up specially for drel. A newline is OK between assignments def p_dotlist(p): '''dotlist : "." ID "=" expression | dotlist "," "." ID "=" expression ''' if len(p) <= 5: #first element of dotlist p[0] = [[p[2],p[4]]] else: #append to previous elements p[0] = p[1] + [[p[4],p[6]]] def p_exprlist(p): ''' exprlist : a_expr | exprlist "," a_expr ''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[3]] # a (potentially enclosed) list of ids def p_id_list(p): ''' id_list : ID | id_list "," ID ''' if len(p) == 2: p[0] = [p[1]] else: p[0] = p[1] + [p[3]] # now for the compound statements. We prepare them as a "STATEMENT" # list for smooth processing in the simple_stmt production def p_compound_stmt(p): '''compound_stmt : if_stmt | if_else_stmt | for_stmt | do_stmt | loop_stmt | with_stmt | repeat_stmt | funcdef ''' p[0] = ["STATEMENTS",[p[1]]] #print "Compound statement: \n" + p[0] # There must only be one else statement at the end of the else if statements, # so we show this by creating a separate production def p_if_else_stmt(p): '''if_else_stmt : if_stmt ELSE suite''' p[0] = p[1] p[0].append(p[3]) # The AST node is [IF_EXPR,cond, suite,[[elseif cond1,suite],[elseifcond2,suite]...]] def p_if_stmt(p): '''if_stmt : IF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite | if_stmt ELSEIF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite ''' if len(p) == 7: p[0] = ["IF_EXPR"] p[0].append(p[3]) p[0].append(p[6]) p[0].append([]) else: p[0] = p[1] p[0][3].append([p[4],p[7]]) # Note the dREL divergence from Python here: we allow compound # statements to follow without a separate block (like C etc.) Where # we have a single statement immediately following we have to make the # statement block. A small_stmt will be a single production, so must # be put into a list in order to match the 'statements' structure # (i.e. 2nd element is a list of statements). A compound_stmt is thus # forced to be also a non-listed object. def p_suite(p): '''suite : statement | "{" maybe_nline statements "}" maybe_nline ''' if len(p) == 2: p[0] = p[1] else: p[0] = p[3] #already have a statement block def p_for_stmt(p): '''for_stmt : FOR id_list IN testlist_star_expr suite | FOR "[" id_list "]" IN testlist_star_expr suite ''' if len(p)==6: p[0] = ["FOR", p[2], p[4], p[5]] else: p[0] = ["FOR", p[3], p[6], p[7]] def p_loop_stmt(p): '''loop_stmt : loop_head suite ''' p[0] = ["LOOP"] + p[1] + [p[2]] # We capture a list of all the actually present items in the current # datafile def p_loop_head(p): '''loop_head : LOOP ID AS ID | LOOP ID AS ID ":" ID | LOOP ID AS ID ":" ID restricted_comp_operator ID ''' p[0] = [p[2],p[4]] if len(p)>= 7: p[0] = p[0] + [p[6]] else: p[0] = p[0] + [""] if len(p) >= 9: p[0] = p[0] + [p[7],p[8]] else: p[0] = p[0] + ["",""] def p_do_stmt(p): '''do_stmt : do_stmt_head suite ''' p[0] = p[1] + [p[2]] # To translate the dREL do to a for statement, we need to make the # end of the range included in the range def p_do_stmt_head(p): '''do_stmt_head : DO ID "=" expression "," expression | DO ID "=" expression "," expression "," expression''' p[0] = ["DO",p[2],p[4],p[6]] if len(p)==9: p[0] = p[0] + [p[8]] else: p[0] = p[0] + [["EXPR",["LITERAL","1"]]] def p_repeat_stmt(p): '''repeat_stmt : REPEAT suite''' p[0] = ["REPEAT",p[2]] def p_with_stmt(p): '''with_stmt : with_head maybe_nline suite''' p[0] = p[1]+[p[3]] def p_with_head(p): '''with_head : WITH ID AS ID''' p[0] = ["WITH",p[2],p[4]] def p_funcdef(p): ''' funcdef : FUNCTION ID OPEN_PAREN arglist CLOSE_PAREN suite ''' p[0] = ["FUNCTION",p[2],p[4],p[6]] def p_arglist(p): ''' arglist : ID ":" list_display | arglist "," ID ":" list_display ''' if len(p) == 4: p[0] = [(p[1],p[2])] else: p[0] = p[1] + [(p[3],p[5])] # This production allows us to insert optional newlines def p_maybe_nline(p): ''' maybe_nline : newlines | empty ''' pass # We need to allow multiple newlines here and not just in the lexer as # an intervening comment can cause multiple newline tokens to appear def p_newlines(p): ''' newlines : NEWLINE | newlines NEWLINE ''' pass def p_empty(p): ''' empty : ''' pass def p_error(p): try: print('Syntax error at position %d, line %d token %s, value %s' % (p.lexpos,p.lineno,p.type,p.value)) print('Surrounding text: ' + p.lexer.lexdata[max(p.lexpos - 100,0): p.lexpos] + "*" + \ p.lexer.lexdata[p.lexpos:min(p.lexpos + 100,len(p.lexer.lexdata))]) except: pass parser.restart() raise SyntaxError #lexer = drel_lex.lexer parser = yacc.yacc() pycifrw-4.4/src/drel/drel_lex.py000066400000000000000000000110451345362224200167350ustar00rootroot00000000000000#Attempt to implement dREL using PLY (Python Lex Yacc) from __future__ import print_function import ply.lex as lex import re #for multiline flag states = ( ('paren','inclusive'), ) tokens = ( 'SHORTSTRING', 'LONGSTRING', 'INTEGER', 'BININT', 'HEXINT', 'OCTINT', 'REAL', 'POWER', 'ISEQUAL', 'NEQ', 'GTE', 'LTE', 'IMAGINARY', 'ID', #variable name 'ITEM_TAG', #cif item as variable 'COMMENT', 'STRPREFIX', 'ELLIPSIS', 'AND', 'BADAND', 'OR', 'BADOR', 'IN', 'NOT', 'DO', 'FOR', 'LOOP', 'REPEAT', 'AS', 'WITH', 'WHERE', 'ELSEIF', 'ELSE', 'BREAK', 'NEXT', 'IF', 'SWITCH', 'CASE', 'DEFAULT', 'AUGOP', 'PRINT', 'FUNCTION', 'NEWLINE', 'ESCAPE_NEWLINE', 'OPEN_PAREN', 'CLOSE_PAREN' ) literals = '+*-/;[],:^<>{}=.`&|' #'&' and '|' are pycifrw extensions t_INITIAL_ignore = ' \t' t_paren_ignore = ' \t\n' def t_error(t): print('Illegal character %s' % repr(t.value[0])) t_POWER = r'\*\*' t_ISEQUAL = r'==' t_NEQ = r'!=' t_GTE = r'>=' t_LTE = r'<=' t_ELLIPSIS = r'\.\.\.' t_BADOR = r'\|\|' t_BADAND = r'&&' def t_AUGOP(t): r'(\+\+=)|(\+=)|(-=)|(--=)|(\*=)|(/=)' return t # We do not have this as a literal so that we can switch to ignoring newlines def t_INITIAL_OPEN_PAREN(t): r'\(' t.lexer.paren_level = 1 t.lexer.begin('paren') return t def t_paren_OPEN_PAREN(t): r'\(' t.lexer.paren_level +=1 return t def t_paren_CLOSE_PAREN(t): r'\)' t.lexer.paren_level -=1 if t.lexer.paren_level == 0: t.lexer.begin('INITIAL') return t # Do the reals before the integers, otherwise the integer will # match the first part of the real # def t_IMAGINARY(t): r'(((([0-9]+[.][0-9]*)|([.][0-9]+))([Ee][+-]?[0-9]+)?)|([0-9]+))[jJ]' return t def t_REAL(t): r'(([0-9]+[.][0-9]*)|([.][0-9]+))([Ee][+-]?[0-9]+)?' try: value = float(t.value) except ValueError: print('Error converting %s to real' % t.value) return t # Do the binary,octal etc before decimal integer otherwise the 0 at # the front will match the decimal integer 0 # def t_BININT(t): r'0[bB][0-1]+' try: t.value = repr(int(t.value[2:],base=2)) except ValueError: print('Unable to convert binary value %s' % t.value) return t def t_OCTINT(t): r'0[oO][0-7]+' try: t.value = repr(int(t.value[2:],base=8)) except ValueError: print('Unable to convert octal value %s' % t.value) return t def t_HEXINT(t): r'0[xX][0-9a-fA-F]+' try: t.value = repr(int(t.value,base=16)) except ValueError: print('Unable to convert hex value %s' % t.value) return t def t_INTEGER(t): r'[0-9]+' try: value = int(t.value) except ValueError: print('Incorrect integer value %s' % t.value) return t def t_STRPREFIX(t): r'r(?=["\'])|u(?=["\'])|R(?=["\'])|U(?=["\'])|ur(?=["\'])|UR(?=["\'])|Ur(?=["\'])|uR(?=["\'])' return t # try longstring first as otherwise the '' will match a shortstring def t_LONGSTRING(t): r"('''([^\\]|(\\.))*''')|(\"\"\"([^\\]|(\\.))*\"\"\")" return t def t_SHORTSTRING(t): r"('([^'\n]|(\\.))*')|(\"([^\"\n]|(\\.))*\")" return t # special to avoid any ambiguity def t_ELSEIF(t): r"(?i)ELSE\s+IF" return t reserved = { 'and': 'AND', 'or': 'OR', 'in': 'IN', 'not': 'NOT', 'do': 'DO', 'Do': 'DO', 'for': 'FOR', 'For': 'FOR', 'loop': 'LOOP', 'Loop': 'LOOP', 'as': 'AS', 'with': 'WITH', 'With': 'WITH', 'where': 'WHERE', 'Where': 'WHERE', 'else': 'ELSE', 'Else': 'ELSE', 'Next': 'NEXT', 'next' : 'NEXT', 'break': 'BREAK', 'Break': 'BREAK', 'if': 'IF', 'If': 'IF', 'switch': 'SWITCH', 'case' : 'CASE', 'Function' : 'FUNCTION', 'function' : 'FUNCTION', 'Print' : 'PRINT', 'print' : 'PRINT', 'Repeat': 'REPEAT', 'repeat': 'REPEAT', 'default' : 'DEFAULT' } def t_ID(t): r'[a-zA-Z][a-zA-Z0-9_$]*' t.type = reserved.get(t.value,'ID') return t # Item tags can have underscores and digits inside, and must have # at least one underscore at the front def t_ITEM_TAG(t): r'_[a-zA-Z_0-9]+' return t def t_ESCAPE_NEWLINE(t): r'\\\n' t.lexer.lineno += 1 def t_INITIAL_NEWLINE(t): r'\n[\n \t]*' t.lexer.lineno+=len(t.value) return t def t_COMMENT(t): r'\#.*' pass lexer = lex.lex(reflags=re.MULTILINE) if __name__ == "__main__": lex.runmain(lexer) pycifrw-4.4/src/drel/drel_runtime.nw000066400000000000000000000153371345362224200176340ustar00rootroot00000000000000@ Runtime functions for dREL. dREL defines some original semantics for vector and matrix appending and removing. We implement the necessary functions here. <<*>>= import numpy from numpy.linalg import eig # Python3 compatibility if isinstance(u"abc",str): #Python 3 unicode = str <> <> <> <> @ Augmented assignments. aug_append returns a new object. For numpy, if both arrays have the same dimensions, they are reshaped to a larger dimension and then concatenated along the new axis. If the first array has a larger dimension, the second array is reshaped to match. <>= def aug_append(current,extra): """Add the contents of extra to current""" have_list = isinstance(current,list) if have_list: if not isinstance(extra, list): #append a single element return current + [extra] else: newlist = current[:] newlist.append(extra) return newlist elif isinstance(current,numpy.ndarray): if current.ndim == extra.ndim + 1: extra = numpy.expand_dims(extra,axis=0) elif current.ndim == extra.ndim: extra = numpy.expand_dims(extra,axis=0) current = numpy.expand_dims(current,axis=0) else: raise ValueError('Arrays have mismatching sizes for concatenating: %d and %d' % (current.ndim,extra.ndim)) return numpy.concatenate((current,extra)) raise ValueError("Cannot append %s to %s" % (repr(extra),repr(current))) def aug_add(current,extra): """Sum the contents of extra to current""" have_list = isinstance(current,list) if have_list: if isinstance(extra, (float,int)): # requires numpy return numpy.array(current) + extra elif isinstance(extra, list): return numpy.array(current) + numpy.array(extra) else: return current + extra def aug_sub(current,extra): have_list = isinstance(current,(list,numpy.ndarray)) if have_list: if isinstance(extra, (float,int)): # requires numpy return numpy.array(current) - extra elif isinstance(extra, (list,numpy.ndarray)): return numpy.array(current) - numpy.array(extra) else: return current - extra def aug_remove(current,extra): """Remove extra from current. Not in formal specifications. Allowed to fail silently.""" have_list = isinstance(current,list) if have_list: if extra in current: # not efficient as we modify in place here current.remove(extra) return current else: print('Removal Warning: %s not in %s' % (repr(extra),repr(current))) return current else: raise ValueError("Cannot remove %s from %s" % (repr(extra),repr(current))) @ Multiplication requires some intelligence. The rules of dREL are as follows: (1) mat * mat, vec*mat is ordinary matrix multiplication (2) vec * vec is a dot product (i.e. second vector is column) (3) mat * vec is matrix multiplication with vec as a column vector dREL has 'Array' and 'Matrix' containers. An array container only allows element-wise operations, whereas matrix containers (implicitly 2-dimensional) have matrix multiplication defined for them. While the dREL specs are not explicit about this, matrix multiplication only makes sense for 2-dimensional objects, although the DDLm type known as 'matrix' describes arbitrary tensors (ddl.dic). We explicitly check that the objects that are passed to this routine satisfy the requirements, i.e. any dimension three or more object does not have multiplication defined except by a scalar. As numpy will always return a 2-dim matrix as a result, even if it is a scalar, we detect scalars and return them as such. We also must detect vectors and return them as a 1D array, so that vector-based operations can work generically. Otherwise we have the case where the second element is accessed (see function Symop for an example) and an error results, as for a vector in numpy 2D representation this element does not exist. We also attempt to maintain type when converting to/from numpy, as some routines (e.g. Symop) wish to create strings out of integers using the backtick operator. <>= def drel_dot(first_arg,second_arg): """Perform a multiplication on two unknown types""" print("Multiply %s and %s" % (repr(first_arg),repr(second_arg))) def make_numpy(input_arg): if hasattr(input_arg,'__iter__'): try: return numpy.matrix(input_arg),True except ValueError: raise ValueError('Attempt to multiply non-matrix object %s' % (repr(input_arg))) return input_arg,False fa,first_matrix = make_numpy(first_arg) sa,second_matrix = make_numpy(second_arg) if first_matrix and second_matrix: #mult of 2 non-scalars if sa.shape[0] == 1: #is a row vector as_column = sa.T result = (fa * as_column).T else: result = fa * sa # detect scalars if result.size == 1: return result.item(0) # remove extra dimension elif result.ndim == 2 and 1 in result.shape: #vector return numpy.array(result).squeeze() else: return result return fa * sa def drel_add(first_arg,second_arg): """Separate string addition from the rest""" if isinstance(first_arg,(unicode,str)) and isinstance(second_arg,(unicode,str)): return first_arg+second_arg else: result = numpy.add(first_arg,second_arg) return result @ Other mathematical functions. dREL defines some linear algebra type functions, which we have to wrap because e.g. the eigenvalues should be sorted. Note that we are assuming the right eigenvalues, not the left eigenvalues. <>= def drel_eigen(in_matrix): """Return 3 lists of form [a,v1,v2,v3], corresponding to the 3 eigenvalues and eigenvector components of a 3x3 matrix""" vals,vects = eig(in_matrix) move = list(numpy.argsort(vals)) move.reverse() vals = vals[move] vects = vects[move] vects = list([[a]+list(numpy.asarray(v).ravel()) for a,v in zip(vals,vects)]) #Eigen returns 4-list return vects def drel_int(in_val): """Return in_val as an integer""" try: return in_val.astype('int') except: return int(in_val) @ List operations. We deduce that Strip actually returns the nth element of each list element (??) based on its use in the model_site category functions. <>= def drel_strip(in_list,element): """Return the nth element from the list""" return [a[element] for a in in_list] pycifrw-4.4/src/drel/drel_runtime.py000066400000000000000000000104371345362224200176340ustar00rootroot00000000000000import numpy from numpy.linalg import eig # Python3 compatibility if isinstance(u"abc",str): #Python 3 unicode = str def aug_append(current,extra): """Add the contents of extra to current""" have_list = isinstance(current,list) if have_list: if not isinstance(extra, list): #append a single element return current + [extra] else: newlist = current[:] newlist.append(extra) return newlist elif isinstance(current,numpy.ndarray): if current.ndim == extra.ndim + 1: extra = numpy.expand_dims(extra,axis=0) elif current.ndim == extra.ndim: extra = numpy.expand_dims(extra,axis=0) current = numpy.expand_dims(current,axis=0) else: raise ValueError('Arrays have mismatching sizes for concatenating: %d and %d' % (current.ndim,extra.ndim)) return numpy.concatenate((current,extra)) raise ValueError("Cannot append %s to %s" % (repr(extra),repr(current))) def aug_add(current,extra): """Sum the contents of extra to current""" have_list = isinstance(current,list) if have_list: if isinstance(extra, (float,int)): # requires numpy return numpy.array(current) + extra elif isinstance(extra, list): return numpy.array(current) + numpy.array(extra) else: return current + extra def aug_sub(current,extra): have_list = isinstance(current,(list,numpy.ndarray)) if have_list: if isinstance(extra, (float,int)): # requires numpy return numpy.array(current) - extra elif isinstance(extra, (list,numpy.ndarray)): return numpy.array(current) - numpy.array(extra) else: return current - extra def aug_remove(current,extra): """Remove extra from current. Not in formal specifications. Allowed to fail silently.""" have_list = isinstance(current,list) if have_list: if extra in current: # not efficient as we modify in place here current.remove(extra) return current else: print('Removal Warning: %s not in %s' % (repr(extra),repr(current))) return current else: raise ValueError("Cannot remove %s from %s" % (repr(extra),repr(current))) def drel_dot(first_arg,second_arg): """Perform a multiplication on two unknown types""" print("Multiply %s and %s" % (repr(first_arg),repr(second_arg))) def make_numpy(input_arg): if hasattr(input_arg,'__iter__'): try: return numpy.matrix(input_arg),True except ValueError: raise ValueError('Attempt to multiply non-matrix object %s' % (repr(input_arg))) return input_arg,False fa,first_matrix = make_numpy(first_arg) sa,second_matrix = make_numpy(second_arg) if first_matrix and second_matrix: #mult of 2 non-scalars if sa.shape[0] == 1: #is a row vector as_column = sa.T result = (fa * as_column).T else: result = fa * sa # detect scalars if result.size == 1: return result.item(0) # remove extra dimension elif result.ndim == 2 and 1 in result.shape: #vector return numpy.array(result).squeeze() else: return result return fa * sa def drel_add(first_arg,second_arg): """Separate string addition from the rest""" if isinstance(first_arg,(unicode,str)) and isinstance(second_arg,(unicode,str)): return first_arg+second_arg else: result = numpy.add(first_arg,second_arg) return result def drel_eigen(in_matrix): """Return 3 lists of form [a,v1,v2,v3], corresponding to the 3 eigenvalues and eigenvector components of a 3x3 matrix""" vals,vects = eig(in_matrix) move = list(numpy.argsort(vals)) move.reverse() vals = vals[move] vects = vects[move] vects = list([[a]+list(numpy.asarray(v).ravel()) for a,v in zip(vals,vects)]) #Eigen returns 4-list return vects def drel_int(in_val): """Return in_val as an integer""" try: return in_val.astype('int') except: return int(in_val) def drel_strip(in_list,element): """Return the nth element from the list""" return [a[element] for a in in_list] pycifrw-4.4/src/drel/parsetab.py000066400000000000000000002372771345362224200167610ustar00rootroot00000000000000 # parsetab.py # This file is automatically generated. Do not edit. _tabversion = '3.2' _lr_method = 'LALR' _lr_signature = '\xcc\x83\xf4':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,144,155,156,159,163,164,165,173,177,178,179,184,190,207,210,211,218,230,234,247,],[-61,-76,-56,-75,-58,-78,-65,-63,-74,-68,-66,-50,113,-79,-70,-46,-53,-69,-62,-59,-77,-55,-67,-71,-43,-60,-57,-64,-60,-52,-97,-95,-81,-72,-73,-51,-82,-44,-45,-84,-90,-54,-96,-80,-47,-48,-49,-112,-65,-100,-99,-98,-113,-83,-89,113,]),'}':([1,4,5,7,11,12,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,40,41,42,44,46,48,49,50,51,52,54,56,60,61,63,65,68,69,70,71,72,75,76,79,80,81,82,85,87,88,99,101,115,116,117,118,122,124,125,127,138,140,141,144,145,147,148,154,155,156,159,162,163,164,165,173,175,176,177,178,179,184,191,192,205,207,210,211,218,221,222,225,230,233,234,245,250,254,259,260,262,278,279,],[-154,-153,-152,-61,-76,-56,-127,-75,-58,-126,-124,-78,-23,-129,-65,-63,-125,-74,-68,-66,-5,-50,-32,-79,-30,-156,-70,-46,-53,-69,-62,-59,-77,-55,-131,-67,-27,-130,-71,-24,-43,-128,-57,-64,-155,-143,-156,-135,-139,-3,-60,-52,-146,-31,163,-97,-95,-81,-72,-73,-51,163,-147,-4,-82,-132,-26,-25,-33,-44,-45,-84,-156,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,222,-8,234,-100,-99,-98,-113,-9,-156,-137,-83,-94,-89,-136,-149,-91,-133,-134,-138,-92,-93,]),'OR':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,144,147,148,154,155,156,159,163,164,165,173,175,176,177,178,179,184,190,207,210,211,218,230,234,],[-61,-76,-56,-75,-58,-78,93,-65,-63,-74,-68,-66,-50,-32,-79,-30,-70,-46,-53,-69,-62,-59,-77,-55,-67,-27,-71,-24,-43,-60,-57,-64,-60,-52,-31,-97,-95,-81,-72,-73,-51,-82,-26,-25,-33,-44,-45,-84,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,-65,-100,-99,-98,-113,-83,-89,]),'LOOP':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,138,140,141,144,145,147,148,154,155,156,159,163,164,165,173,175,176,177,178,179,184,191,192,193,195,196,207,210,211,218,219,220,221,222,223,225,228,230,234,244,245,246,247,248,250,256,259,260,262,269,273,],[-156,-154,26,26,-153,-152,-61,26,-76,-56,-156,26,-127,-75,-58,-126,-124,-78,-23,-129,-65,-63,-125,-74,-68,-66,-5,26,-50,-32,-79,-30,-21,-70,-46,-53,-69,-62,-59,-1,-77,-55,-131,-67,-27,-130,-71,-24,-43,-128,-57,-64,-2,-155,-143,-156,-135,26,-139,-3,-60,26,-52,-146,-31,-97,-95,-81,-72,-73,-51,26,-147,-4,-82,-132,-26,-25,-33,-44,-45,-84,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,26,-8,-148,-140,26,-100,-99,-98,-113,-156,-22,-9,-156,-156,-137,26,-83,-89,26,-136,26,-141,26,-149,-144,-133,-134,-138,-142,-145,]),} _lr_action = { } for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_action: _lr_action[_x] = { } _lr_action[_x][_k] = _y del _lr_action_items _lr_goto_items = {'statements':([138,],[191,]),'comp_operator':([41,],[104,]),'small_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[6,6,6,6,6,6,142,6,6,6,6,6,6,6,6,]),'fancy_drel_assignment_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,]),'primary':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,]),'stringliteral':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,116,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,266,267,268,271,276,277,],[28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,161,28,28,28,28,28,28,28,28,28,28,28,190,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,272,28,28,275,28,28,]),'item_tag':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'not_test':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[65,65,65,65,65,65,65,115,65,65,65,65,65,65,65,65,175,176,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,]),'listmaker':([114,],[158,]),'do_stmt_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'func_arg':([133,143,217,],[180,180,243,]),'enclosure':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,]),'newlines':([0,13,16,43,46,81,86,136,158,162,203,219,222,223,231,255,265,],[5,5,87,5,5,5,141,5,5,5,5,5,5,5,5,5,5,]),'break_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,]),'dotlist':([133,],[181,]),'arglist':([153,],[199,]),'long_slice':([121,206,],[169,169,]),'repeat_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[68,68,68,68,68,68,68,68,68,68,68,68,68,68,]),'u_expr':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[49,49,49,49,49,49,99,49,49,49,127,49,49,49,49,49,49,49,49,49,164,49,49,49,177,178,179,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,]),'if_else_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[33,33,33,33,33,33,33,33,33,33,33,33,33,33,]),'parenth_form':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'literal':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,]),'attributeref':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'call':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,]),'argument_list':([133,143,],[183,183,]),'statement':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[55,78,82,82,82,82,82,192,221,82,82,82,82,82,]),'string_conversion':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,]),'with_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'input':([0,],[3,]),'loop_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'do_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'next_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[57,57,57,57,57,57,57,57,57,57,57,57,57,57,57,]),'empty':([0,13,43,46,81,136,158,162,203,219,222,223,231,255,265,],[4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,]),'listmaker2':([160,],[202,]),'short_slice':([121,206,],[167,167,]),'power':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,]),'a_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[41,41,41,41,41,41,41,41,41,41,41,41,41,41,154,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,]),'print_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,]),'maybe_nline':([0,13,43,46,81,136,158,162,203,219,222,223,231,255,265,],[2,84,114,116,138,188,201,205,232,244,245,246,252,266,271,]),'tablemaker2':([233,],[254,]),'slicing':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,]),'for_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[19,19,19,19,19,19,19,19,19,19,19,19,19,19,]),'m_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,105,107,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,155,156,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,]),'and_test':([2,3,8,14,21,29,39,53,84,86,90,92,93,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[70,70,70,70,70,70,70,70,70,70,70,147,148,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,]),'restricted_comp_operator':([41,247,],[108,261,]),'atom':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,]),'funcdef':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[61,61,61,61,61,61,61,61,61,61,61,61,61,61,]),'expr_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,]),'slice_list':([121,],[166,]),'subscription':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,]),'comparison':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,]),'attribute_tag':([50,],[118,]),'if_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[22,22,22,22,22,22,22,22,22,22,22,22,22,22,]),'id_list':([31,97,],[96,152,]),'proper_slice':([121,206,],[170,235,]),'list_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,229,232,240,242,244,246,248,252,263,267,268,276,277,],[23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,251,23,23,23,23,23,23,23,270,23,23,23,23,]),'loop_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[72,72,72,72,72,72,72,72,72,72,72,72,72,72,]),'or_test':([2,3,8,14,21,29,39,53,84,86,90,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,]),'compound_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[37,37,37,37,37,37,37,37,37,37,37,37,37,37,]),'with_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[25,25,25,25,25,25,25,25,25,25,25,25,25,25,]),'tablemaker':([116,138,],[162,162,]),'table_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,]),'suite':([8,14,39,84,90,196,228,244,246,248,],[80,85,101,140,145,225,250,259,260,262,]),'simple_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'testlist_star_expr':([2,3,8,14,21,39,53,84,86,90,135,137,138,150,191,196,226,228,244,246,248,],[77,77,77,77,89,77,123,77,77,77,187,189,77,196,77,77,248,77,77,77,77,]),'slice_item':([121,206,],[171,236,]),'expression':([2,3,8,14,21,29,39,53,84,86,90,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[47,47,47,47,47,95,47,47,47,47,47,160,172,185,186,47,47,47,185,194,47,209,213,220,47,47,233,237,238,239,185,47,47,253,256,258,47,47,47,264,273,274,278,279,]),} _lr_goto = { } for _k, _v in _lr_goto_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_goto: _lr_goto[_x] = { } _lr_goto[_x][_k] = _y del _lr_goto_items _lr_productions = [ ("S' -> input","S'",1,None,None,None), ('input -> maybe_nline statement','input',2,'p_input','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',19), ('input -> input statement','input',2,'p_input','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',20), ('statement -> simple_stmt newlines','statement',2,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',36), ('statement -> simple_stmt ; newlines','statement',3,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',37), ('statement -> compound_stmt','statement',1,'p_statement','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',38), ('simple_stmt -> small_stmt','simple_stmt',1,'p_simple_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',44), ('simple_stmt -> simple_stmt ; small_stmt','simple_stmt',3,'p_simple_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',45), ('statements -> statement','statements',1,'p_statements','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',55), ('statements -> statements statement','statements',2,'p_statements','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',56), ('small_stmt -> expr_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',61), ('small_stmt -> print_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',62), ('small_stmt -> break_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',63), ('small_stmt -> next_stmt','small_stmt',1,'p_small_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',64), ('break_stmt -> BREAK','break_stmt',1,'p_break_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',68), ('next_stmt -> NEXT','next_stmt',1,'p_next_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',72), ('print_stmt -> PRINT expression','print_stmt',2,'p_print_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',76), ('expr_stmt -> testlist_star_expr','expr_stmt',1,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',84), ('expr_stmt -> testlist_star_expr AUGOP testlist_star_expr','expr_stmt',3,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',85), ('expr_stmt -> testlist_star_expr = testlist_star_expr','expr_stmt',3,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',86), ('expr_stmt -> fancy_drel_assignment_stmt','expr_stmt',1,'p_expr_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',87), ('testlist_star_expr -> expression','testlist_star_expr',1,'p_testlist_star_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',96), ('testlist_star_expr -> testlist_star_expr , maybe_nline expression','testlist_star_expr',4,'p_testlist_star_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',97), ('expression -> or_test','expression',1,'p_expression','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',107), ('or_test -> and_test','or_test',1,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',115), ('or_test -> or_test OR and_test','or_test',3,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',116), ('or_test -> or_test BADOR and_test','or_test',3,'p_or_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',117), ('and_test -> not_test','and_test',1,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',122), ('and_test -> and_test AND not_test','and_test',3,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',123), ('and_test -> and_test BADAND not_test','and_test',3,'p_and_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',124), ('not_test -> comparison','not_test',1,'p_not_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',129), ('not_test -> NOT not_test','not_test',2,'p_not_test','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',130), ('comparison -> a_expr','comparison',1,'p_comparison','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',135), ('comparison -> a_expr comp_operator a_expr','comparison',3,'p_comparison','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',136), ('comp_operator -> restricted_comp_operator','comp_operator',1,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',142), ('comp_operator -> IN','comp_operator',1,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',143), ('comp_operator -> NOT IN','comp_operator',2,'p_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',144), ('restricted_comp_operator -> <','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',150), ('restricted_comp_operator -> >','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',151), ('restricted_comp_operator -> GTE','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',152), ('restricted_comp_operator -> LTE','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',153), ('restricted_comp_operator -> NEQ','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',154), ('restricted_comp_operator -> ISEQUAL','restricted_comp_operator',1,'p_restricted_comp_operator','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',155), ('a_expr -> m_expr','a_expr',1,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',159), ('a_expr -> a_expr + m_expr','a_expr',3,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',160), ('a_expr -> a_expr - m_expr','a_expr',3,'p_a_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',161), ('m_expr -> u_expr','m_expr',1,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',168), ('m_expr -> m_expr * u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',169), ('m_expr -> m_expr / u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',170), ('m_expr -> m_expr ^ u_expr','m_expr',3,'p_m_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',171), ('u_expr -> power','u_expr',1,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',178), ('u_expr -> - u_expr','u_expr',2,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',179), ('u_expr -> + u_expr','u_expr',2,'p_u_expr','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',180), ('power -> primary','power',1,'p_power','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',187), ('power -> primary POWER u_expr','power',3,'p_power','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',188), ('primary -> atom','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',196), ('primary -> attributeref','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',197), ('primary -> subscription','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',198), ('primary -> slicing','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',199), ('primary -> call','primary',1,'p_primary','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',200), ('atom -> ID','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',205), ('atom -> item_tag','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',206), ('atom -> literal','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',207), ('atom -> enclosure','atom',1,'p_atom','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',208), ('item_tag -> ITEM_TAG','item_tag',1,'p_item_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',213), ('literal -> stringliteral','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',217), ('literal -> INTEGER','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',218), ('literal -> HEXINT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',219), ('literal -> OCTINT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',220), ('literal -> BININT','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',221), ('literal -> REAL','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',222), ('literal -> IMAGINARY','literal',1,'p_literal','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',223), ('stringliteral -> STRPREFIX SHORTSTRING','stringliteral',2,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',228), ('stringliteral -> STRPREFIX LONGSTRING','stringliteral',2,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',229), ('stringliteral -> SHORTSTRING','stringliteral',1,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',230), ('stringliteral -> LONGSTRING','stringliteral',1,'p_stringliteral','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',231), ('enclosure -> parenth_form','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',236), ('enclosure -> string_conversion','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',237), ('enclosure -> list_display','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',238), ('enclosure -> table_display','enclosure',1,'p_enclosure','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',239), ('parenth_form -> OPEN_PAREN testlist_star_expr CLOSE_PAREN','parenth_form',3,'p_parenth_form','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',243), ('parenth_form -> OPEN_PAREN CLOSE_PAREN','parenth_form',2,'p_parenth_form','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',244), ('string_conversion -> ` testlist_star_expr `','string_conversion',3,'p_string_conversion','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',251), ('list_display -> [ maybe_nline listmaker maybe_nline ]','list_display',5,'p_list_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',256), ('list_display -> [ maybe_nline ]','list_display',3,'p_list_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',257), ('listmaker -> expression listmaker2','listmaker',2,'p_listmaker','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',265), ('listmaker2 -> , maybe_nline expression','listmaker2',3,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',270), ('listmaker2 -> listmaker2 , maybe_nline expression','listmaker2',4,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',271), ('listmaker2 -> ','listmaker2',0,'p_listmaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',272), ('table_display -> { maybe_nline tablemaker maybe_nline }','table_display',5,'p_table_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',282), ('table_display -> { maybe_nline }','table_display',3,'p_table_display','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',283), ('tablemaker -> stringliteral : expression tablemaker2','tablemaker',4,'p_tablemaker','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',290), ('tablemaker2 -> , maybe_nline stringliteral : expression','tablemaker2',5,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',294), ('tablemaker2 -> tablemaker2 , maybe_nline stringliteral : expression','tablemaker2',6,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',295), ('tablemaker2 -> ','tablemaker2',0,'p_tablemaker2','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',296), ('attributeref -> primary attribute_tag','attributeref',2,'p_attributeref','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',310), ('attribute_tag -> . ID','attribute_tag',2,'p_attribute_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',314), ('attribute_tag -> REAL','attribute_tag',1,'p_attribute_tag','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',315), ('subscription -> primary [ expression ]','subscription',4,'p_subscription','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',322), ('slicing -> primary [ proper_slice ]','slicing',4,'p_slicing','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',326), ('slicing -> primary [ slice_list ]','slicing',4,'p_slicing','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',327), ('proper_slice -> short_slice','proper_slice',1,'p_proper_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',331), ('proper_slice -> long_slice','proper_slice',1,'p_proper_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',332), ('short_slice -> :','short_slice',1,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',343), ('short_slice -> expression : expression','short_slice',3,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',344), ('short_slice -> : expression','short_slice',2,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',345), ('short_slice -> expression :','short_slice',2,'p_short_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',346), ('long_slice -> short_slice : expression','long_slice',3,'p_long_slice','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',355), ('slice_list -> slice_item','slice_list',1,'p_slice_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',362), ('slice_list -> slice_list , slice_item','slice_list',3,'p_slice_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',363), ('slice_item -> expression','slice_item',1,'p_slice_item','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',370), ('slice_item -> proper_slice','slice_item',1,'p_slice_item','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',371), ('call -> ID OPEN_PAREN CLOSE_PAREN','call',3,'p_call','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',375), ('call -> ID OPEN_PAREN argument_list CLOSE_PAREN','call',4,'p_call','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',376), ('argument_list -> func_arg','argument_list',1,'p_argument_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',386), ('argument_list -> argument_list , func_arg','argument_list',3,'p_argument_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',387), ('func_arg -> expression','func_arg',1,'p_func_arg','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',394), ('fancy_drel_assignment_stmt -> ID OPEN_PAREN dotlist CLOSE_PAREN','fancy_drel_assignment_stmt',4,'p_fancy_drel_assignment_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',398), ('dotlist -> . ID = expression','dotlist',4,'p_dotlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',405), ('dotlist -> dotlist , . ID = expression','dotlist',6,'p_dotlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',406), ('exprlist -> a_expr','exprlist',1,'p_exprlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',413), ('exprlist -> exprlist , a_expr','exprlist',3,'p_exprlist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',414), ('id_list -> ID','id_list',1,'p_id_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',421), ('id_list -> id_list , ID','id_list',3,'p_id_list','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',422), ('compound_stmt -> if_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',433), ('compound_stmt -> if_else_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',434), ('compound_stmt -> for_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',435), ('compound_stmt -> do_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',436), ('compound_stmt -> loop_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',437), ('compound_stmt -> with_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',438), ('compound_stmt -> repeat_stmt','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',439), ('compound_stmt -> funcdef','compound_stmt',1,'p_compound_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',440), ('if_else_stmt -> if_stmt ELSE suite','if_else_stmt',3,'p_if_else_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',447), ('if_stmt -> IF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',6,'p_if_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',453), ('if_stmt -> if_stmt ELSEIF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',7,'p_if_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',454), ('suite -> statement','suite',1,'p_suite','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',473), ('suite -> { maybe_nline statements } maybe_nline','suite',5,'p_suite','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',474), ('for_stmt -> FOR id_list IN testlist_star_expr suite','for_stmt',5,'p_for_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',481), ('for_stmt -> FOR [ id_list ] IN testlist_star_expr suite','for_stmt',7,'p_for_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',482), ('loop_stmt -> loop_head suite','loop_stmt',2,'p_loop_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',489), ('loop_head -> LOOP ID AS ID','loop_head',4,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',495), ('loop_head -> LOOP ID AS ID : ID','loop_head',6,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',496), ('loop_head -> LOOP ID AS ID : ID restricted_comp_operator ID','loop_head',8,'p_loop_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',497), ('do_stmt -> do_stmt_head suite','do_stmt',2,'p_do_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',508), ('do_stmt_head -> DO ID = expression , expression','do_stmt_head',6,'p_do_stmt_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',515), ('do_stmt_head -> DO ID = expression , expression , expression','do_stmt_head',8,'p_do_stmt_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',516), ('repeat_stmt -> REPEAT suite','repeat_stmt',2,'p_repeat_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',525), ('with_stmt -> with_head maybe_nline suite','with_stmt',3,'p_with_stmt','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',529), ('with_head -> WITH ID AS ID','with_head',4,'p_with_head','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',533), ('funcdef -> FUNCTION ID OPEN_PAREN arglist CLOSE_PAREN suite','funcdef',6,'p_funcdef','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',537), ('arglist -> ID : list_display','arglist',3,'p_arglist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',541), ('arglist -> arglist , ID : list_display','arglist',5,'p_arglist','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',542), ('maybe_nline -> newlines','maybe_nline',1,'p_maybe_nline','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',549), ('maybe_nline -> empty','maybe_nline',1,'p_maybe_nline','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',550), ('newlines -> NEWLINE','newlines',1,'p_newlines','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',557), ('newlines -> newlines NEWLINE','newlines',2,'p_newlines','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',558), ('empty -> ','empty',0,'p_empty','build/bdist.linux-x86_64/egg/CifFile/drel/drel_ast_yacc.py',562), ] pycifrw-4.4/src/drel/py_from_ast.nw000066400000000000000000001214771345362224200174700ustar00rootroot00000000000000@ These routines convert a dREL AST into Python text. The top-level AST builder encloses the python fragment produced by the AST traverser. The function transformer turns dREL function calls into the appropriate Python/User-defined/Numpy function calls. <<*>>= # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import # The unicode type does not exist in Python3 as the str type # encompasses unicode. PyCIFRW tests for 'unicode' would fail # Suggestions for a better approach welcome. if isinstance(u"abc",str): #Python3 unicode = str import re from CifFile import CifError <> <> <> <> <> <> @ Configuration. The function tables below describe how generic dREL operations are transformed into API- specific function calls. This is mostly concerned with calls to the ciffile object passed to the builder. Note that changing this interface will only work if the objects returned by the various calls behave as follows: (i) packets returned by the ``semantic packet'' call should have dataname-indexed values that are accessible through the Python 'getattr' function (ii) a whole column of data is returned for values returned by data access for looped datanames (iii) StarList and StarTable are understood value types (iv) the results of all applications are values ready for calculation. <>= pycifrw_func_table = { #how to use PyCIFRW CifFile objects "data_access": "ciffile[%s]", # argument is dataname to be accessed "optional_data_access":"ciffile.get(%s,None)", "element_no": "%s[%s]", # accessing a particular element of the result of data_access "count_data": "len(%s)", # number of elements for result of data_access "cat_names":"ciffile.dictionary.names_in_cat(%s)", #names in category %s "has_name":"ciffile.has_key_or_alias(%s)", "semantic_packet":"ciffile.GetKeyedSemanticPacket(%s,%s)" #get a packet for key, value } @ The AST builder puts the infrastructure around the AST statement list to turn it into a function call with two arguments: a dictionary and a data file. The target id will usually be the dataname that is being defined. We always call this ``__dreltarget'' within the generated method which is why we need to know the target_id. Note that the ``_cat.name'' syntax is significant in the dREL context, and therefore in the context of the generator. Only names with this form are possible targets, and the category part of the name is used to automatically generate the appropriate loop index when that category is referred to within the code. The generated function can be called to generate all values in a loop column, or only a single value, by setting keyword argument ``packet_no'' to the required packet, or -1. If the 'depends' keyword is True, a list of datanames that this function depends upon is returned as a second item. The 'loopable' argument is a table of categories that are loopable, with the values being ([key_ids], datanames). <>= def make_python_function(in_ast,func_name,targetname, special_ids=[{}], func_table = pycifrw_func_table, cif_dic=None,cat_meth=False, func_def = False, have_sn=True,loopable={},debug=None,depends=False): """Convert in_ast to python code""" if debug is not None: print("incoming AST:") print(repr(in_ast)) func_text,withtable,dependencies,cur_row = start_traverse(in_ast,func_table,target_id=targetname,cat_meth=cat_meth,loopable=loopable,debug=debug,func=func_def,cif_dic=cif_dic) if debug is not None: print('Start========') print(func_text) print('End==========') if func_def and not depends: return func_text elif func_def: return func_text, None # now indent the string noindent = func_text.splitlines() # get the minimum indent and remove empty lines no_spaces = [re.match(r' *',a).end() for a in noindent if a] #drop empty lines min_spaces = min(no_spaces)+4 # because we add 4 ourselves to everything if len(withtable) > 0 or cur_row: # a loop method <> else: <> if cat_meth: preamble += " "*8 + "__dreltarget = {}\n" # initialise num_header = """ import math,cmath try: import numpy except: print("Can't import numerical python, this method may not work") """ preamble += num_header indented = map(lambda a:" "*8 + a +"\n",noindent) #indent dREL body postamble = "" postamble += " "*8 + "return __dreltarget" final = preamble + "".join(indented) + postamble + end_body if not depends: return final else: return final, dependencies @ Creating functions for looped categories. We provide a shell function which internally calls the transformed function with the option of performing the calculation on a single packet or on the whole category. The existence of the "current_row" functionality means that we have to cover the situation where no other items in a category are referred to and so we need to work out the total number of packets during execution of the method. <>= d_vars = [a[1][0] for a in withtable.items()] if cur_row: d_vars = d_vars + ['__current_row'] dummy_vars = ",".join(d_vars) actual_names = [k for k in withtable.keys()] actual_names = [func_table["data_access"]% ("'"+a+"'") for a in actual_names] # intercept optional values and replace with [] if None optional_names = ["__option_w%d"%n for n,k in enumerate(withtable.keys()) if withtable[k][2]] if cur_row: actual_names+=['__row_id'] final_names = actual_names[:] one_pack_names = [func_table["element_no"] % (a,"packet_no") for a in actual_names] for n,k in enumerate(withtable.keys()): if withtable[k][2]: final_names[n]="__option_w%d"%n #pre-evaluated one_pack_names[n] = "__option_w%d"%n map_names = ",".join(final_names) one_packet_each = ",".join(one_pack_names) preamble = "def %s(ciffile,packet_no=-1):\n" % (func_name) preamble +=" try:\n" preamble +=" from itertools import repeat,imap\n" #note that imap might fail preamble +=" except ImportError: #python3\n" preamble +=" imap = map\n" preamble +=" def drel_func(%s):\n" % dummy_vars # for debugging print_instruction = "'Function passed variables "+("{!r} "*len(d_vars))+"'.format("+dummy_vars+",)" preamble +=" print(%s)\n" % print_instruction # preamble +=" print('Globals inside looped drel_func:' + repr(globals())\n") # the actual function gets inserted here # Handle the optional names end_body = "\n" for n,one_opt in enumerate(withtable.keys()): if withtable[one_opt][2]: end_body += " try:\n" end_body += " %s%d = %s\n" % ("__option_w",n,func_table["optional_data_access"]%("'"+one_opt+"'")) end_body += " except KeyError:\n" end_body += " %s%d = None\n" % ("__option_w",n) end_body+= " if packet_no < 0: #map\n" for one_opt in optional_names: end_body += " if %s is None: %s = repeat(None)\n" % (one_opt,one_opt) if cur_row and len(actual_names) > 1: #i.e. have real names from category end_body +=" __row_id = range(%s)\n" % (func_table['count_data'] % ("'"+actual_names[0]+"'")) elif cur_row and len(actual_names)==1: #so no actual names available end_body += " cat_names = %s\n" % (func_table["cat_names"] % ("'"+getcatname(targetname)[0]+"'")) end_body += " have_name = [a for a in cat_names if %s]\n" % (func_table["has_name"] % "a") end_body += " if len(have_name)>0:\n" end_body += " full_length = %s \n" % (func_table["count_data"] % (func_table["data_access"] % "have_name[0]")) end_body += " __row_id = range(full_length)\n" end_body += " else:\n" end_body += " return []\n" end_body+= " return list(imap(drel_func,%s))\n" % (map_names+",") end_body+= " else:\n" end_body+= " return drel_func(%s)\n" % one_packet_each @ Un'with'ed data can simply be called with the dictionary and ciffile, with no need for any extra variables <>= preamble = "def %s(ciffile):\n" % func_name #preamble +=" global StarList#from CifFile.drel import drel_runtime\n" end_body = "" @ The AST traverser is a recursive function which is passed a node. It returns a string. We define it inside an external function so that our global variables are actually just local variables of the enclosing function. Variable [[special_info.[special_id] ]] is a list of scopes, each containing a table of variables that need to be specially handled, either because they correspond to datanames (first entry in list) or to variables that have been aliased to categories via loop or with statements. The scoping is not used particularly often in the following code as the example methods do not really test out the scoping. A variable defined in a loop statement will also have a loop index that needs to be incremented and used to index a packet; this is the second entry in the list indexed by the variable alias. The second entry is also used to hold the dummy variable for the 'with' statement. The generated function can thus be mapped across the keys of the with table to calculate the whole column of a category. The final boolean entry determines whether the second entry is for looping (True) or not (False). At the beginning of the traverse we initialise special_id with the explicit category name if we are in a loopable category, so that any references to items in this category will be dealt with as for explicit with statements. Note that we append a single underscore to the alias to allow it to be picked up when the search for special ids is undertaken e.g. in the ATTRIBUTE case. The [[packet_vars]] entry is a table of packets that have been assigned to a variable name. We need to keep this around in order to properly manage them when their attributes are accessed, as this access will be by simple name, without the category ID. The [[need_current_row]] entry is fed back from the traverse and indicates that the current function will require access to the current packet. The [[rhs]] is an entry to indicate whether or not we are dealing with the rhs of an assignment. If False, then we are dealing with the lhs, and if None, then we are not in an assignment. The [[inif]] entry flags if we are inside an IF statement, in which case dataitems may not need to be provided in order to proceed with calculation. <>= def start_traverse(in_node,api_table,target_id=None,loopable={},cat_meth=False,debug=None, func=False, cif_dic=None): special_info = {"special_id":[{}],"target_id":target_id,"withtable":{},"sub_subject":"", "depends":set(),"loopable_cats":loopable,"packet_vars":{}, "need_current_row":False,"rhs":None,"inif":False} # create a virtual enclosing 'with' statement if target_id is not None and not cat_meth and not func: cat,name = getcatname(target_id) special_info["special_id"][-1].update({"_"+cat:[cat,"",False]}) if cat in special_info["loopable_cats"].keys(): # special_info["special_id"][-1]["_"+cat][1] = "looped_cat" <> def traverse_ast(in_node,debug=debug): if isinstance(in_node,(unicode,str)): return in_node if isinstance(in_node[0],list): raise SyntaxError('First element of AST Node must be string: ' + repr(in_node)) node_type = in_node[0] if debug == node_type: print(node_type + ": " + repr(in_node)) if node_type == "ARGLIST": pass <> <> <> <> <> else: return "Not found: %s" % repr(in_node) result = traverse_ast(in_node) # remove target id from dependencies if special_info["target_id"] is not None: special_info["depends"].discard(special_info["target_id"].lower()) if not special_info.get("have_drel_target",False): print('WARNING: no assignment to __dreltarget in %s (this is OK for category methods)' % repr(target_id)) print(result) return result,special_info["withtable"],special_info["depends"],special_info["need_current_row"] @ These are the simple constants that we have detected. <>= elif node_type == "BINARY": return("%d" % int(in_node[1],base=2)) elif node_type == "FALSE": return("False") elif node_type == "REAL": return(in_node[1]) elif node_type == "HEX": return("%d" % int(in_node[1],base=16)) elif node_type == "INT": return(in_node[1]) elif node_type == "IMAGINARY": return(in_node[1]) elif node_type == "OCTAL": return("%d" % int(in_node[1],base=8)) @ We need to catch all variables and other primary items here. Note that for now literals are simply repeated, as the Python representation is the same as the dREL representation. We make a note of all tags mentioned so that we can create a dependency tree. <>= elif node_type == "ATOM": if isinstance(in_node[1],(unicode,str)): # pick up built-in literals if in_node[1].lower() == 'twopi': return "(2.0 * math.pi)" if in_node[1].lower() == 'pi': return "math.pi" else: return in_node[1] else: return traverse_ast(in_node[1]) elif node_type == "ITEM_TAG": return in_node[1] elif node_type == "LITERAL": return in_node[1] elif node_type == "LIST": if len(in_node)==1: #empty list return "StarList([])" if special_info["rhs"] == True: outstring = "StarList([" else: outstring = "" for list_elem in in_node[1:]: outstring = outstring + traverse_ast(list_elem) + "," if special_info["rhs"] == True: return outstring[:-1] + "])" else: return outstring[:-1] elif node_type == "TABLE": if len(in_node)==1: return "StarTable({})" else: outstring = "{" for table_elem in in_node[1:]: outstring = outstring + traverse_ast(table_elem[0])+":"+traverse_ast(table_elem[1]) +"," return outstring[:-1] + "}" <> <> <> elif node_type == "SLICE": # primary [[start,finish,step],[...] outstring = traverse_ast(in_node[1]) + "[" slice_list = in_node[2] for one_slice in slice_list: if one_slice[0] == "EXPR": #not a slice as such outstring += traverse_ast(one_slice) elif len(one_slice) == 0: outstring += ":" elif len(one_slice) >0: # at least start outstring += traverse_ast(one_slice[0]) + ":" if len(one_slice) >1: #start,finish only outstring += traverse_ast(one_slice[1]) if len(one_slice) == 3: #step as well outstring += ":" + traverse_ast(one_slice[2]) outstring += "," outstring = outstring[:-1] + "]" return outstring @ Subscriptions, of form primary [ subscript ]. A subscription may be a simple element lookup, as in a[1], or it may be a keyed lookup of a different category using the value within the square brackets e.g. p[m.symop]. If we find the primary is something that we are looping or withing, we immediately adopt the keyed lookup. If the primary is an item tag, then we can deduce that a keyed lookup follows as there is no other function for an item tag without accompanying attribute to make a full dataname. It is also possible for a delayed attribute lookup to occur, for example, a = atom_site[label] and later b = a.position, so we catch any subscriptions applied to loop category names. We convert list-valued subscriptions to a sequence of Python subscriptions in order to catch the valid numpy a[1,2,1...] style multi-dimensional array access. In all other cases we have a plain subscription as for normal Python. <>= elif node_type == "SUBSCRIPTION": # variable, single expression newid = 0 if in_node[1][0] == "ATOM" and in_node[1][1][0] == "ITEM_TAG": #keyed lookup print("Found category used as item tag: subscribing") newid = [in_node[1][1][1][1:],False,False] #drop underscore and remember else: primary = traverse_ast(in_node[1]) # check to see if this is a special variable for idtable in special_info["special_id"]: newid = idtable.get(primary,0) if newid: break if primary in special_info["loopable_cats"].keys(): #loop category used newid = [primary,False,False] break if newid: #FIXME: the dataname may not be the . construction (eg pdCIF) key_items = ["_"+newid[0]+"."+s for s in special_info["loopable_cats"][newid[0]][0]] #key name special_info["depends"].update([k.lower() for k in key_items]) get_loop = api_table["semantic_packet"] % (traverse_ast(in_node[2]),"'"+newid[0]+"'") special_info["sub_subject"] = newid[0] #in case of attribute reference following print("Set sub_subject to %s" % special_info["sub_subject"]) return get_loop else: outstring = primary + "[" outstring = outstring + traverse_ast(in_node[2]) + "]" return outstring @ Attribute references. We need to catch all attribute references to special variables (those defined by loop or with statements). For ``with'' variables, we create a dummy variable for every attribute reference, keyed by the actual dataname looked up via the (cat,obj) tuple. We store the order that it appears in the function call that is created, so that we can include the function within a 'map' statement and give the correct order of items. Additionally, if the variable appears inside an IF statement, evaluation may be optional, so a flag is set (True means optional). So the withtable has syntax {``dataname'':(dummyname,dummyorder,optional = True/False)}. Furthermore, the square bracket syntax for keyed access to data packets requires that the result of the keyed access is accessed by attribute. While dREL does not require the full dataname, PyCIFRW does, so we have to store the category that the keyed access comes from and add that back in - this is what 'sub_subject' contains. Note also that this access can be delayed by assigning to the packet (e.g. a = atom_site[label]; b = a.xyz), so we check the list of such assignments kept in special_info[``packet_vars'']. In a further twist, when keyed access followed directly by attribute lookup in a single primary is performed on an explicit category, that category should have an underscore prepended on the principle the final result will be a dataname. So we need to check for this construction before deciding how to translate the 'primary' part of the construction to Python. Note also that we should not assume that a dataname is constructed as ``_category.object''. Rather, we should look up the correct form of the dataname using the provided dictionary. <>= elif node_type == "ATTRIBUTE": # id/tag , att outstring = "" newid = 0 # check for special ids primary = traverse_ast(in_node[1]) # this will set sub_subject if necessary for idtable in special_info["special_id"]: newid = idtable.get(primary,0) if newid: break if newid: #catch our output name true_name = cif_dic.get_name_by_cat_obj(newid[0].lower(),in_node[2].lower()).lower() if true_name == special_info.get("target_id","").lower(): outstring = "__dreltarget" special_info["have_drel_target"] = True # if we are looping, we add a loop prefix. If we are withing an # unlooped category, we put the full name back. elif newid[2] or (not newid[2] and not newid[1]): # looping or simple with outstring = api_table["data_access"] % ('"' +true_name +'"') special_info["depends"].add(true_name) if newid[1]: # a loop statement requires an index outstring += "[" + newid[1]+ "]" else: # a with statement; capture the name and create a dummy variable if true_name not in special_info["withtable"]: #new position = len(special_info["withtable"]) new_var = "__w%d" % position isoptional = special_info["inif"] special_info["withtable"][true_name] = (new_var,position,isoptional) outstring += special_info["withtable"][true_name][0] special_info["depends"].add(true_name) elif in_node[1][0] == "ATOM" and primary[0] == "_": # a cat/obj name fullname = cif_dic.get_name_by_cat_obj(primary,in_node[2]).lower() # a simple cat.obj dataname from the dictionary if special_info.get("target_id","").lower() == fullname: outstring = "__dreltarget" special_info["have_drel_target"] = True else: special_info["depends"].add(fullname) outstring = api_table["data_access"] % ("'" + fullname + "'") else: # default to Python attribute access # check for packet variables if primary in special_info["packet_vars"]: real_cat = special_info["packet_vars"][primary] fullname = cif_dic.get_name_by_cat_obj(real_cat,in_node[2]) special_info['depends'].add(fullname) elif special_info["sub_subject"]: fullname = cif_dic.get_name_by_cat_obj(special_info["sub_subject"],in_node[2]) special_info['depends'].add(fullname) else: # not anything special fullname = in_node[2] outstring = "getattr(" + primary + ",'" + fullname + "')" # sub_subject no longer relevant after attribute resolution special_info['sub_subject'] = "" return outstring @ Function calls. We catch two special cases. If 'Current_Row'' is called we set a flag to ask the calling environment to provide us with a row number, as the generated function is always row-independent. The List constructor in dREL will convert a single argument into a list, or else multiple arguments are bundled into a list. <>= elif node_type == "FUNC_CALL": if in_node[1] == "Current_Row": #not a function but a keyword really outstring = "__current_row" special_info["need_current_row"]=True else: func_name,every_arg_prefix,postfix = get_function_name(in_node[1]) outstring = func_name + "( " if func_name == "list" and len(in_node[2])>1: #special case outstring = outstring + "[" for argument in in_node[2]: outstring = outstring + every_arg_prefix + traverse_ast(argument) + "," if postfix == None: # signal for dictionary defined outstring = outstring + "ciffile)" else: outstring = outstring[:-1] if func_name == "list" and len(in_node[2])>1: outstring = outstring + "]" outstring = outstring + ")" + postfix return outstring @ Compound statements. We make a copy of the target expressions in the FOR statement to allow these objects to be modified inside the suite. The expression inside an IF statement may never be executed, so we do not have to provide a value for any dataitems referenced here. We flag this by setting the ``inif'' variable to True, and any dataitems referenced when this flag is True are provided as keywords to the <>= elif node_type == "IF_EXPR": #IF_EXPR test true_suite [ELSE IF_EXPR] false_suite outstring = "if " outstring = outstring + traverse_ast(in_node[1]) outstring = outstring + ":" old_inif = special_info["inif"] special_info["inif"] = True true_bit = traverse_ast(in_node[2]) outstring = outstring + add_indent("\n"+true_bit) #indent elseif = in_node[3] if len(elseif)!=0: for one_cond in elseif: #each entry is condition, suite outstring += "\nelif " + traverse_ast(one_cond[0]) + ":" outstring += add_indent("\n" + traverse_ast(one_cond[1])) if len(in_node)>4: outstring = outstring + "\nelse:" false_bit = traverse_ast(in_node[4]) outstring = outstring + add_indent("\n"+false_bit) #indent special_info["inif"] = old_inif return outstring # dREL for statements include the final value, whereas a python range will include # everything up to the final number elif node_type == "DO": #DO ID = start, finish, incr, suite outstring = "for " + in_node[1] + " in range(" + traverse_ast(in_node[2]) + "," finish = traverse_ast(in_node[3]) increment = traverse_ast(in_node[4]) outstring = outstring + finish + "+1" + "," + increment outstring = outstring + "):" suite = add_indent("\n"+traverse_ast(in_node[5])) return outstring + suite elif node_type == "FOR": # FOR target_list expression_list suite outstring = "for " for express in in_node[1]: outstring = outstring + traverse_ast(express) + "," outstring = outstring[:-1] + " in " special_info["rhs"] = True for target in in_node[2]: outstring += "copy("+traverse_ast(target) + ")," special_info["rhs"] = None outstring = outstring[:-1] + ":" + add_indent("\n" + traverse_ast(in_node[3])) return outstring elif node_type == "REPEAT": #REPEAT suite outstring = "while True:" + add_indent("\n" + traverse_ast(in_node[1])) return outstring <> <> <> @ With statements. ``With'' in dREL is ultimately just a simple alias (see separate document discussing this) if we consider that a looped item method is necessarily mapped across all packets of the category, or in other words it can *only* operate on the 'current packet'. So we make a memo of the aliases that are currently in use and don't actually output anything. We do need to catch the case where a looped category has an explicit ``with'' statement, as this should replace the implicit ``with''. <>= elif node_type == "WITH": #new_id old_id suite # each entry in special_id is [alias:[cat_name,loop variable, is_loop]] alias_id = in_node[1] cat_id = in_node[2] is_already_there = [a for a in special_info['special_id'][-1].keys() if \ special_info['special_id'][-1][a][0] == cat_id] if len(is_already_there)>0: del special_info['special_id'][-1][is_already_there[0]] print("Found explicit loop category alias: %s for %s" % (alias_id,cat_id) ) special_info['special_id'][-1].update({alias_id:[cat_id,"",False]}) if in_node[2] in special_info['loopable_cats'].keys(): #flag this special_info['special_id'][-1][alias_id][1] = "looped_cat" outstring = traverse_ast(in_node[3]) return outstring @ Loop statements involve running over all packets in a category, substituting for datanames as necessary and inserting the packet index as well. <>= elif node_type == "LOOP": #ALIAS CAT LOOPVAR COMP COMPVAR SUITE alias_id = in_node[1] cat_id = in_node[2] var_info = [cat_id,"",False] if cat_id not in special_info['loopable_cats'].keys(): message = "%s is not a loopable category (must be one of:\n%s)" % (cat_id,special_info['loopable_cats'].keys()) print(message) raise CifError(message) #loop over some index loop_num = len(special_info['special_id'][-1])+1 if in_node[3] == "": # provide our own loop_index = "__pi%d" % loop_num else: loop_index = in_node[3] var_info[1] = loop_index var_info[2] = True special_info['special_id'][-1].update({alias_id:var_info}) # now emit some text: first to find the length of the category # loopable cats contains a list of names defined for the category # this might not be robust as we ignore alternative resolutions of the (cat,name) pair catnames = set([a[1][0] for a in cif_dic.cat_obj_lookup_table.items() if a[0][0]==cat_id.lower()]) outstring = "__pyallitems = " + repr(catnames) outstring += "\nprint('names in cat = %s' % repr(__pyallitems))" outstring += "\n" + "__pycitems = [a for a in __pyallitems if %s]" % (api_table["has_name"] % "a") outstring += "\nprint('names in cat -> %s' % repr(__pycitems))\n" cat_key = cif_dic[cat_id]['_category_key.name'][0] #take official key # If there is nothing in the category, provoke category creation by evaluating the key outstring += "if len(__pycitems)==0:\n" outstring += " __pydummy = %s\n" % (api_table["data_access"] % repr(cat_key)) outstring += " __pycitems = [a for a in __pyallitems if %s]\n" % (api_table["has_name"] % "a") outstring += " print('After category creation, names in cat ->' + repr(__pycitems))\n" special_info["depends"].add(cat_key) #add key as a dependency if var_info[2] == True: access_string = api_table["count_data"] % (api_table["data_access"] % "__pycitems[0]") outstring += "\n" + "__loop_range%d = range(%s)" % (loop_num,access_string) else: outstring += "\n" + "__loop_range%d = [0]" % loop_num #outstring +="\n" + "for __noloop in [0]:" # deal with this comparison test if in_node[4] != "": outstring += "\n" + "__loop_range%d = [a for a in __loop_range%d if a %s %s]" % (loop_num,loop_num,in_node[4],in_node[5]) # now output the looping command outstring += "\n" + "for %s in __loop_range%d:" % (loop_index,loop_num) # now the actual body of the loop loop_body = traverse_ast(in_node[6]) outstring = outstring + add_indent("\n"+loop_body) return outstring @ Function arguments can have their types explicitly specified, but we don't make use of this. See comments elsewhere on how to deal with function binding, or why we don't need to do imports at this level. The draft cif_core dictionary assumes access to the data file (see function SymEquiv, which does a key-based lookup of symmetry_equiv), so we define a compulsory third argument, which must be 'ciffile' to match the naming that occurs automatically in key lookup code generation. <>= elif node_type == "FUNCTION": #FUNCTION ID ARGLIST SUITE func_name = in_node[1] outstring = "def %s (" % func_name for one_arg in in_node[2]: outstring += one_arg[0] + "," outstring = outstring + "ciffile):" # imports #import_lines = "import numpy\nfrom CifFile.drel import drel_runtime\n" import_lines = "" outstring = outstring + add_indent("\n" + import_lines + traverse_ast(in_node[3])+"\nreturn %s" % func_name) return outstring @ \section{Top level constructions}. Ultimately we end up with a list of statements. <>= elif node_type == "STATEMENTS": outstring = "" for one_statement in in_node[1]: # try: next_bit = traverse_ast(one_statement) if not isinstance(next_bit,(unicode,str)): print("Unable to traverse AST for %s" % one_statement[0]) else: outstring = outstring + next_bit + "\n" # except SyntaxError as message: # print("Failed, so far have \n " + outstring) # outstring += "raise SyntaxError, %s" % message # except: # print("Failed, so far have \n " + outstring) # outstring += "raise SyntaxError, %s" % `one_statement` return outstring <> elif node_type == "LIST": outstring = "[" for one_element in in_node[1]: outstring = outstring + traverse_ast(one_element) + "," return outstring + "]" elif node_type == "EXPR": return traverse_ast(in_node[1]) # Expr list occurs only when a non-assignment statement appears as expr_stmt elif node_type == "EXPRLIST": outstring = "" for one_expr in in_node[1]: outstring += traverse_ast(one_expr) + "\n" return outstring elif node_type == "GROUP": outstring = "(" for expression in in_node[1]: outstring = outstring + traverse_ast(expression) + "," return outstring[:-1] + ")" elif node_type == "PRINT": return 'print( ' + traverse_ast(in_node[1]) + ")" elif node_type == "BREAK": return 'break ' elif node_type == "NEXT": return 'continue ' @ Assignment. The category-level assignments write to a table indexed by object id, so the final result will be a set of list-valued datanames for insertion into the ciffile. These are dealt with by 'FANCY_ASSIGN'. The augmented assignments in dREL handle matrices as well, so we need to reimplement them, which we do by rewriting them in the form a = func(a,b) We also need to catch packet assignments (e.g. a = atom_site[label]) so that we can properly rewrite the attribute access later on, thus we keep a track of all values and the final value of sub_subject. Note that we are assuming that only one packet subscription takes place per expression, which is reasonable given that mathematical manipulations are not defined for packets. We also set the 'rhs' flag to allow us to construct lists as StarLists when on the lhs, but not the rhs. <>= elif node_type == "ASSIGN": #Target_list ,assigner, expression list outstring = "" lhs_values = [] special_info["rhs"] = False for target_value in in_node[1]: one_value = traverse_ast(target_value) outstring = outstring + one_value +"," lhs_values.append(one_value) lhs = outstring[:-1] rhs = "" special_info["rhs"] = True for order,expression in enumerate(in_node[3]): rhs += traverse_ast(expression)+"," if special_info["sub_subject"] != "": #a full packet special_info["packet_vars"].update({lhs_values[order]:special_info["sub_subject"]}) special_info["sub_subject"] = "" # we cannot expand a numpy array, hence the workaround here #if in_node[2] == "++=": # outstring = "_temp1 = %s;%s = %s(_temp1,%s)" % (lhs,lhs,aug_assign_table["++="],rhs[:-1]) if in_node[2] != "=": outstring = "%s = %s(%s,%s)" % (lhs, aug_assign_table[in_node[2]],lhs,rhs[:-1]) else: outstring = "%s = %s" % (lhs,rhs[:-1]) special_info["rhs"] = None return outstring elif node_type == "FANCY_ASSIGN": # [1] is cat name, [2] is list of objects catname = in_node[1] outstring = "" special_info["rhs"] = True for obj,value in in_node[2]: real_id = cif_dic.get_name_by_cat_obj(catname, obj) newvalue = traverse_ast(value) outstring = outstring + "__dreltarget.update({'%s':__dreltarget.get('%s',[])+[%s]})\n" % (real_id,real_id,newvalue) special_info["rhs"] = None return outstring @ Maths. We have to catch vector and matrix operations and farm them out to numpy. Not implemented yet. A binary maths operation is 'mathop'. We take a shortcut and assume that the operators are written identically in dREL and python, so that the AST contains the textual form of the operator. <>= elif node_type == "MATHOP": op = mathop_table[in_node[1]] first_arg = traverse_ast(in_node[2]) second_arg = traverse_ast(in_node[3]) if op is not None: #simple operation outstring = first_arg + op + second_arg else: outstring = fix_mathops(in_node[1],first_arg,second_arg) return outstring elif node_type == "SIGN": outstring = "drel_runtime.drel_dot(" + in_node[1] + "1," + traverse_ast(in_node[2])+")" return outstring elif node_type == "UNARY": outstring = in_node[1] + " " + traverse_ast(in_node[2]) return outstring @ Although in most cases we could simply directly write in the binary maths operator, we do a table lookup to facilitate reuse of this code with other language targets. The augmented assignment table gives the function name to use as we need to cover array structures as well. <>= mathop_table = {"+":None, "-":None, "<":"<", "*":None, "/":None, "&":"&", "|":"|", ">":">", "<=":"<=", ">=":">=", "!=":"!=", "or":" or ", "and":" and ", "==":"==", "in":" in ", "not in":" not in ", "^":None,"**":"**"} aug_assign_table = {"++=":"drel_runtime.aug_append", "+=":"drel_runtime.aug_add", "-=":"drel_runtime.aug_sub", "--=":"drel_runtime.aug_remove"} @ To avoid cluttering up the main switch statement, we call out to this function in order to find out what the Python name of the given function is. Where we have to provide our own function, that is also listed. Note that this only works for unary functions. We force all matrices to be 64-bit floats. If you check SeitzFromJones, you will find a matrix that defaults to integer type, meaning that any assignments of float values are rounded, thus the need for explicitly working with floats. User-defined functions (i.e. in dictionaries) have access to the data file (see use of 'symmetry_equiv' key access in SymEquiv) so this must be passed as a final argument. We assume anything that is not found here is user-defined, and add the 'ciffile' argument at the end. <>= def get_function_name(in_name): """Return the Python name of the dREL function, an argument prefix, and anything to be appended to the end""" builtins = {"table":"dict", "list":"list", "array":"numpy.array", "len":"len", "abs":"abs", "magn":"abs", "atoi":"int", "float":"float", "str":"str", "array":"numpy.array", "norm":"numpy.linalg.norm", "sqrt":"math.sqrt", "exp":"math.exp", "complex":"complex", "max":"max", "min":"min", "strip":"drel_runtime.drel_strip", "int":"drel_runtime.drel_int", "eigen":"drel_runtime.drel_eigen", "hash":"hash" #dREL extension } test_name = in_name.lower() target_name = builtins.get(test_name,None) if target_name is not None: return target_name,"","" if test_name in ['sind','cosd','tand']: return "math."+test_name[:-1],"math.radians(",")" if test_name in ['acosd','asind','atand','atan2d']: return "math.degrees(math."+test_name[:-1],"",")" if test_name == "mod": return "divmod","","[1]" if test_name == "upper": return "","",".upper()" if test_name == "transpose": return "","",".T" if test_name == 'expimag': return "cmath.exp","1j*(",")" if test_name in ['real','imag']: return "","","." + test_name if test_name == 'matrix': return "numpy.matrix","",".astype('float64')" if test_name == 'sort': return "","",".sort()" return in_name,"",None #dictionary defined @ Mathematical operations in dREL are complicated by the need to handle matrix operations transparently. The two operators that are affected are "^" (cross product) and "*" (matrix multiplication). <>= def fix_mathops(op,first_arg,second_arg): """Return a string that will carry out the requested operation""" if op == "^": return "numpy.cross(%s,%s)" % (first_arg,second_arg) elif op == "*": #could be matrix multiplication return "drel_runtime.drel_dot(%s,%s)" % (first_arg,second_arg) elif op == "+": return "drel_runtime.drel_add(%s,%s)" % (first_arg, second_arg) elif op == "-": return "numpy.subtract(%s,%s)" % (first_arg, second_arg) # beware integer division on this one... elif op == "/": return "numpy.true_divide(%s,%s)" % (first_arg, second_arg) @ We have a few simple utility functions for commonly-required operations. <>= def add_indent(text,n=4): """Indent text by n spaces""" return re.sub("\n","\n"+4*" ",text) def getcatname(dataname): """Return cat,name pair from dataname""" try: cat,name = dataname.split(".") except ValueError: #no period in name return cat,None return cat[1:],name pycifrw-4.4/src/drel/py_from_ast.py000066400000000000000000000720021345362224200174610ustar00rootroot00000000000000# To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import # The unicode type does not exist in Python3 as the str type # encompasses unicode. PyCIFRW tests for 'unicode' would fail # Suggestions for a better approach welcome. if isinstance(u"abc",str): #Python3 unicode = str import re from CifFile import CifError pycifrw_func_table = { #how to use PyCIFRW CifFile objects "data_access": "ciffile[%s]", # argument is dataname to be accessed "optional_data_access":"ciffile.get(%s,None)", "element_no": "%s[%s]", # accessing a particular element of the result of data_access "count_data": "len(%s)", # number of elements for result of data_access "cat_names":"ciffile.dictionary.names_in_cat(%s)", #names in category %s "has_name":"ciffile.has_key_or_alias(%s)", "semantic_packet":"ciffile.GetKeyedSemanticPacket(%s,%s)" #get a packet for key, value } def make_python_function(in_ast,func_name,targetname, special_ids=[{}], func_table = pycifrw_func_table, cif_dic=None,cat_meth=False, func_def = False, have_sn=True,loopable={},debug=None,depends=False): """Convert in_ast to python code""" if debug is not None: print("incoming AST:") print(repr(in_ast)) func_text,withtable,dependencies,cur_row = start_traverse(in_ast,func_table,target_id=targetname,cat_meth=cat_meth,loopable=loopable,debug=debug,func=func_def,cif_dic=cif_dic) if debug is not None: print('Start========') print(func_text) print('End==========') if func_def and not depends: return func_text elif func_def: return func_text, None # now indent the string noindent = func_text.splitlines() # get the minimum indent and remove empty lines no_spaces = [re.match(r' *',a).end() for a in noindent if a] #drop empty lines min_spaces = min(no_spaces)+4 # because we add 4 ourselves to everything if len(withtable) > 0 or cur_row: # a loop method d_vars = [a[1][0] for a in withtable.items()] if cur_row: d_vars = d_vars + ['__current_row'] dummy_vars = ",".join(d_vars) actual_names = [k for k in withtable.keys()] actual_names = [func_table["data_access"]% ("'"+a+"'") for a in actual_names] # intercept optional values and replace with [] if None optional_names = ["__option_w%d"%n for n,k in enumerate(withtable.keys()) if withtable[k][2]] if cur_row: actual_names+=['__row_id'] final_names = actual_names[:] one_pack_names = [func_table["element_no"] % (a,"packet_no") for a in actual_names] for n,k in enumerate(withtable.keys()): if withtable[k][2]: final_names[n]="__option_w%d"%n #pre-evaluated one_pack_names[n] = "__option_w%d"%n map_names = ",".join(final_names) one_packet_each = ",".join(one_pack_names) preamble = "def %s(ciffile,packet_no=-1):\n" % (func_name) preamble +=" try:\n" preamble +=" from itertools import repeat,imap\n" #note that imap might fail preamble +=" except ImportError: #python3\n" preamble +=" imap = map\n" preamble +=" def drel_func(%s):\n" % dummy_vars # for debugging print_instruction = "'Function passed variables "+("{!r} "*len(d_vars))+"'.format("+dummy_vars+",)" preamble +=" print(%s)\n" % print_instruction # preamble +=" print('Globals inside looped drel_func:' + repr(globals())\n") # the actual function gets inserted here # Handle the optional names end_body = "\n" for n,one_opt in enumerate(withtable.keys()): if withtable[one_opt][2]: end_body += " try:\n" end_body += " %s%d = %s\n" % ("__option_w",n,func_table["optional_data_access"]%("'"+one_opt+"'")) end_body += " except KeyError:\n" end_body += " %s%d = None\n" % ("__option_w",n) end_body+= " if packet_no < 0: #map\n" for one_opt in optional_names: end_body += " if %s is None: %s = repeat(None)\n" % (one_opt,one_opt) if cur_row and len(actual_names) > 1: #i.e. have real names from category end_body +=" __row_id = range(%s)\n" % (func_table['count_data'] % ("'"+actual_names[0]+"'")) elif cur_row and len(actual_names)==1: #so no actual names available end_body += " cat_names = %s\n" % (func_table["cat_names"] % ("'"+getcatname(targetname)[0]+"'")) end_body += " have_name = [a for a in cat_names if %s]\n" % (func_table["has_name"] % "a") end_body += " if len(have_name)>0:\n" end_body += " full_length = %s \n" % (func_table["count_data"] % (func_table["data_access"] % "have_name[0]")) end_body += " __row_id = range(full_length)\n" end_body += " else:\n" end_body += " return []\n" end_body+= " return list(imap(drel_func,%s))\n" % (map_names+",") end_body+= " else:\n" end_body+= " return drel_func(%s)\n" % one_packet_each else: preamble = "def %s(ciffile):\n" % func_name #preamble +=" global StarList#from CifFile.drel import drel_runtime\n" end_body = "" if cat_meth: preamble += " "*8 + "__dreltarget = {}\n" # initialise num_header = """ import math,cmath try: import numpy except: print("Can't import numerical python, this method may not work") """ preamble += num_header indented = map(lambda a:" "*8 + a +"\n",noindent) #indent dREL body postamble = "" postamble += " "*8 + "return __dreltarget" final = preamble + "".join(indented) + postamble + end_body if not depends: return final else: return final, dependencies def start_traverse(in_node,api_table,target_id=None,loopable={},cat_meth=False,debug=None, func=False, cif_dic=None): special_info = {"special_id":[{}],"target_id":target_id,"withtable":{},"sub_subject":"", "depends":set(),"loopable_cats":loopable,"packet_vars":{}, "need_current_row":False,"rhs":None,"inif":False} # create a virtual enclosing 'with' statement if target_id is not None and not cat_meth and not func: cat,name = getcatname(target_id) special_info["special_id"][-1].update({"_"+cat:[cat,"",False]}) if cat in special_info["loopable_cats"].keys(): # special_info["special_id"][-1]["_"+cat][1] = "looped_cat" mathop_table = {"+":None, "-":None, "<":"<", "*":None, "/":None, "&":"&", "|":"|", ">":">", "<=":"<=", ">=":">=", "!=":"!=", "or":" or ", "and":" and ", "==":"==", "in":" in ", "not in":" not in ", "^":None,"**":"**"} aug_assign_table = {"++=":"drel_runtime.aug_append", "+=":"drel_runtime.aug_add", "-=":"drel_runtime.aug_sub", "--=":"drel_runtime.aug_remove"} def traverse_ast(in_node,debug=debug): if isinstance(in_node,(unicode,str)): return in_node if isinstance(in_node[0],list): raise SyntaxError('First element of AST Node must be string: ' + repr(in_node)) node_type = in_node[0] if debug == node_type: print(node_type + ": " + repr(in_node)) if node_type == "ARGLIST": pass elif node_type == "BINARY": return("%d" % int(in_node[1],base=2)) elif node_type == "FALSE": return("False") elif node_type == "REAL": return(in_node[1]) elif node_type == "HEX": return("%d" % int(in_node[1],base=16)) elif node_type == "INT": return(in_node[1]) elif node_type == "IMAGINARY": return(in_node[1]) elif node_type == "OCTAL": return("%d" % int(in_node[1],base=8)) elif node_type == "ATOM": if isinstance(in_node[1],(unicode,str)): # pick up built-in literals if in_node[1].lower() == 'twopi': return "(2.0 * math.pi)" if in_node[1].lower() == 'pi': return "math.pi" else: return in_node[1] else: return traverse_ast(in_node[1]) elif node_type == "ITEM_TAG": return in_node[1] elif node_type == "LITERAL": return in_node[1] elif node_type == "LIST": if len(in_node)==1: #empty list return "StarList([])" if special_info["rhs"] == True: outstring = "StarList([" else: outstring = "" for list_elem in in_node[1:]: outstring = outstring + traverse_ast(list_elem) + "," if special_info["rhs"] == True: return outstring[:-1] + "])" else: return outstring[:-1] elif node_type == "TABLE": if len(in_node)==1: return "StarTable({})" else: outstring = "{" for table_elem in in_node[1:]: outstring = outstring + traverse_ast(table_elem[0])+":"+traverse_ast(table_elem[1]) +"," return outstring[:-1] + "}" elif node_type == "SUBSCRIPTION": # variable, single expression newid = 0 if in_node[1][0] == "ATOM" and in_node[1][1][0] == "ITEM_TAG": #keyed lookup print("Found category used as item tag: subscribing") newid = [in_node[1][1][1][1:],False,False] #drop underscore and remember else: primary = traverse_ast(in_node[1]) # check to see if this is a special variable for idtable in special_info["special_id"]: newid = idtable.get(primary,0) if newid: break if primary in special_info["loopable_cats"].keys(): #loop category used newid = [primary,False,False] break if newid: #FIXME: the dataname may not be the . construction (eg pdCIF) key_items = ["_"+newid[0]+"."+s for s in special_info["loopable_cats"][newid[0]][0]] #key name special_info["depends"].update([k.lower() for k in key_items]) get_loop = api_table["semantic_packet"] % (traverse_ast(in_node[2]),"'"+newid[0]+"'") special_info["sub_subject"] = newid[0] #in case of attribute reference following print("Set sub_subject to %s" % special_info["sub_subject"]) return get_loop else: outstring = primary + "[" outstring = outstring + traverse_ast(in_node[2]) + "]" return outstring elif node_type == "ATTRIBUTE": # id/tag , att outstring = "" newid = 0 # check for special ids primary = traverse_ast(in_node[1]) # this will set sub_subject if necessary for idtable in special_info["special_id"]: newid = idtable.get(primary,0) if newid: break if newid: #catch our output name true_name = cif_dic.get_name_by_cat_obj(newid[0].lower(),in_node[2].lower()).lower() if true_name == special_info.get("target_id","").lower(): outstring = "__dreltarget" special_info["have_drel_target"] = True # if we are looping, we add a loop prefix. If we are withing an # unlooped category, we put the full name back. elif newid[2] or (not newid[2] and not newid[1]): # looping or simple with outstring = api_table["data_access"] % ('"' +true_name +'"') special_info["depends"].add(true_name) if newid[1]: # a loop statement requires an index outstring += "[" + newid[1]+ "]" else: # a with statement; capture the name and create a dummy variable if true_name not in special_info["withtable"]: #new position = len(special_info["withtable"]) new_var = "__w%d" % position isoptional = special_info["inif"] special_info["withtable"][true_name] = (new_var,position,isoptional) outstring += special_info["withtable"][true_name][0] special_info["depends"].add(true_name) elif in_node[1][0] == "ATOM" and primary[0] == "_": # a cat/obj name fullname = cif_dic.get_name_by_cat_obj(primary,in_node[2]).lower() # a simple cat.obj dataname from the dictionary if special_info.get("target_id","").lower() == fullname: outstring = "__dreltarget" special_info["have_drel_target"] = True else: special_info["depends"].add(fullname) outstring = api_table["data_access"] % ("'" + fullname + "'") else: # default to Python attribute access # check for packet variables if primary in special_info["packet_vars"]: real_cat = special_info["packet_vars"][primary] fullname = cif_dic.get_name_by_cat_obj(real_cat,in_node[2]) special_info['depends'].add(fullname) elif special_info["sub_subject"]: fullname = cif_dic.get_name_by_cat_obj(special_info["sub_subject"],in_node[2]) special_info['depends'].add(fullname) else: # not anything special fullname = in_node[2] outstring = "getattr(" + primary + ",'" + fullname + "')" # sub_subject no longer relevant after attribute resolution special_info['sub_subject'] = "" return outstring elif node_type == "FUNC_CALL": if in_node[1] == "Current_Row": #not a function but a keyword really outstring = "__current_row" special_info["need_current_row"]=True else: func_name,every_arg_prefix,postfix = get_function_name(in_node[1]) outstring = func_name + "( " if func_name == "list" and len(in_node[2])>1: #special case outstring = outstring + "[" for argument in in_node[2]: outstring = outstring + every_arg_prefix + traverse_ast(argument) + "," if postfix == None: # signal for dictionary defined outstring = outstring + "ciffile)" else: outstring = outstring[:-1] if func_name == "list" and len(in_node[2])>1: outstring = outstring + "]" outstring = outstring + ")" + postfix return outstring elif node_type == "SLICE": # primary [[start,finish,step],[...] outstring = traverse_ast(in_node[1]) + "[" slice_list = in_node[2] for one_slice in slice_list: if one_slice[0] == "EXPR": #not a slice as such outstring += traverse_ast(one_slice) elif len(one_slice) == 0: outstring += ":" elif len(one_slice) >0: # at least start outstring += traverse_ast(one_slice[0]) + ":" if len(one_slice) >1: #start,finish only outstring += traverse_ast(one_slice[1]) if len(one_slice) == 3: #step as well outstring += ":" + traverse_ast(one_slice[2]) outstring += "," outstring = outstring[:-1] + "]" return outstring elif node_type == "MATHOP": op = mathop_table[in_node[1]] first_arg = traverse_ast(in_node[2]) second_arg = traverse_ast(in_node[3]) if op is not None: #simple operation outstring = first_arg + op + second_arg else: outstring = fix_mathops(in_node[1],first_arg,second_arg) return outstring elif node_type == "SIGN": outstring = "drel_runtime.drel_dot(" + in_node[1] + "1," + traverse_ast(in_node[2])+")" return outstring elif node_type == "UNARY": outstring = in_node[1] + " " + traverse_ast(in_node[2]) return outstring elif node_type == "IF_EXPR": #IF_EXPR test true_suite [ELSE IF_EXPR] false_suite outstring = "if " outstring = outstring + traverse_ast(in_node[1]) outstring = outstring + ":" old_inif = special_info["inif"] special_info["inif"] = True true_bit = traverse_ast(in_node[2]) outstring = outstring + add_indent("\n"+true_bit) #indent elseif = in_node[3] if len(elseif)!=0: for one_cond in elseif: #each entry is condition, suite outstring += "\nelif " + traverse_ast(one_cond[0]) + ":" outstring += add_indent("\n" + traverse_ast(one_cond[1])) if len(in_node)>4: outstring = outstring + "\nelse:" false_bit = traverse_ast(in_node[4]) outstring = outstring + add_indent("\n"+false_bit) #indent special_info["inif"] = old_inif return outstring # dREL for statements include the final value, whereas a python range will include # everything up to the final number elif node_type == "DO": #DO ID = start, finish, incr, suite outstring = "for " + in_node[1] + " in range(" + traverse_ast(in_node[2]) + "," finish = traverse_ast(in_node[3]) increment = traverse_ast(in_node[4]) outstring = outstring + finish + "+1" + "," + increment outstring = outstring + "):" suite = add_indent("\n"+traverse_ast(in_node[5])) return outstring + suite elif node_type == "FOR": # FOR target_list expression_list suite outstring = "for " for express in in_node[1]: outstring = outstring + traverse_ast(express) + "," outstring = outstring[:-1] + " in " special_info["rhs"] = True for target in in_node[2]: outstring += "copy("+traverse_ast(target) + ")," special_info["rhs"] = None outstring = outstring[:-1] + ":" + add_indent("\n" + traverse_ast(in_node[3])) return outstring elif node_type == "REPEAT": #REPEAT suite outstring = "while True:" + add_indent("\n" + traverse_ast(in_node[1])) return outstring elif node_type == "WITH": #new_id old_id suite # each entry in special_id is [alias:[cat_name,loop variable, is_loop]] alias_id = in_node[1] cat_id = in_node[2] is_already_there = [a for a in special_info['special_id'][-1].keys() if \ special_info['special_id'][-1][a][0] == cat_id] if len(is_already_there)>0: del special_info['special_id'][-1][is_already_there[0]] print("Found explicit loop category alias: %s for %s" % (alias_id,cat_id) ) special_info['special_id'][-1].update({alias_id:[cat_id,"",False]}) if in_node[2] in special_info['loopable_cats'].keys(): #flag this special_info['special_id'][-1][alias_id][1] = "looped_cat" outstring = traverse_ast(in_node[3]) return outstring elif node_type == "LOOP": #ALIAS CAT LOOPVAR COMP COMPVAR SUITE alias_id = in_node[1] cat_id = in_node[2] var_info = [cat_id,"",False] if cat_id not in special_info['loopable_cats'].keys(): message = "%s is not a loopable category (must be one of:\n%s)" % (cat_id,special_info['loopable_cats'].keys()) print(message) raise CifError(message) #loop over some index loop_num = len(special_info['special_id'][-1])+1 if in_node[3] == "": # provide our own loop_index = "__pi%d" % loop_num else: loop_index = in_node[3] var_info[1] = loop_index var_info[2] = True special_info['special_id'][-1].update({alias_id:var_info}) # now emit some text: first to find the length of the category # loopable cats contains a list of names defined for the category # this might not be robust as we ignore alternative resolutions of the (cat,name) pair catnames = set([a[1][0] for a in cif_dic.cat_obj_lookup_table.items() if a[0][0]==cat_id.lower()]) outstring = "__pyallitems = " + repr(catnames) outstring += "\nprint('names in cat = %s' % repr(__pyallitems))" outstring += "\n" + "__pycitems = [a for a in __pyallitems if %s]" % (api_table["has_name"] % "a") outstring += "\nprint('names in cat -> %s' % repr(__pycitems))\n" cat_key = cif_dic[cat_id]['_category_key.name'][0] #take official key # If there is nothing in the category, provoke category creation by evaluating the key outstring += "if len(__pycitems)==0:\n" outstring += " __pydummy = %s\n" % (api_table["data_access"] % repr(cat_key)) outstring += " __pycitems = [a for a in __pyallitems if %s]\n" % (api_table["has_name"] % "a") outstring += " print('After category creation, names in cat ->' + repr(__pycitems))\n" special_info["depends"].add(cat_key) #add key as a dependency if var_info[2] == True: access_string = api_table["count_data"] % (api_table["data_access"] % "__pycitems[0]") outstring += "\n" + "__loop_range%d = range(%s)" % (loop_num,access_string) else: outstring += "\n" + "__loop_range%d = [0]" % loop_num #outstring +="\n" + "for __noloop in [0]:" # deal with this comparison test if in_node[4] != "": outstring += "\n" + "__loop_range%d = [a for a in __loop_range%d if a %s %s]" % (loop_num,loop_num,in_node[4],in_node[5]) # now output the looping command outstring += "\n" + "for %s in __loop_range%d:" % (loop_index,loop_num) # now the actual body of the loop loop_body = traverse_ast(in_node[6]) outstring = outstring + add_indent("\n"+loop_body) return outstring elif node_type == "FUNCTION": #FUNCTION ID ARGLIST SUITE func_name = in_node[1] outstring = "def %s (" % func_name for one_arg in in_node[2]: outstring += one_arg[0] + "," outstring = outstring + "ciffile):" # imports #import_lines = "import numpy\nfrom CifFile.drel import drel_runtime\n" import_lines = "" outstring = outstring + add_indent("\n" + import_lines + traverse_ast(in_node[3])+"\nreturn %s" % func_name) return outstring elif node_type == "STATEMENTS": outstring = "" for one_statement in in_node[1]: # try: next_bit = traverse_ast(one_statement) if not isinstance(next_bit,(unicode,str)): print("Unable to traverse AST for %s" % one_statement[0]) else: outstring = outstring + next_bit + "\n" # except SyntaxError as message: # print("Failed, so far have \n " + outstring) # outstring += "raise SyntaxError, %s" % message # except: # print("Failed, so far have \n " + outstring) # outstring += "raise SyntaxError, %s" % `one_statement` return outstring elif node_type == "ASSIGN": #Target_list ,assigner, expression list outstring = "" lhs_values = [] special_info["rhs"] = False for target_value in in_node[1]: one_value = traverse_ast(target_value) outstring = outstring + one_value +"," lhs_values.append(one_value) lhs = outstring[:-1] rhs = "" special_info["rhs"] = True for order,expression in enumerate(in_node[3]): rhs += traverse_ast(expression)+"," if special_info["sub_subject"] != "": #a full packet special_info["packet_vars"].update({lhs_values[order]:special_info["sub_subject"]}) special_info["sub_subject"] = "" # we cannot expand a numpy array, hence the workaround here #if in_node[2] == "++=": # outstring = "_temp1 = %s;%s = %s(_temp1,%s)" % (lhs,lhs,aug_assign_table["++="],rhs[:-1]) if in_node[2] != "=": outstring = "%s = %s(%s,%s)" % (lhs, aug_assign_table[in_node[2]],lhs,rhs[:-1]) else: outstring = "%s = %s" % (lhs,rhs[:-1]) special_info["rhs"] = None return outstring elif node_type == "FANCY_ASSIGN": # [1] is cat name, [2] is list of objects catname = in_node[1] outstring = "" special_info["rhs"] = True for obj,value in in_node[2]: real_id = cif_dic.get_name_by_cat_obj(catname, obj) newvalue = traverse_ast(value) outstring = outstring + "__dreltarget.update({'%s':__dreltarget.get('%s',[])+[%s]})\n" % (real_id,real_id,newvalue) special_info["rhs"] = None return outstring elif node_type == "LIST": outstring = "[" for one_element in in_node[1]: outstring = outstring + traverse_ast(one_element) + "," return outstring + "]" elif node_type == "EXPR": return traverse_ast(in_node[1]) # Expr list occurs only when a non-assignment statement appears as expr_stmt elif node_type == "EXPRLIST": outstring = "" for one_expr in in_node[1]: outstring += traverse_ast(one_expr) + "\n" return outstring elif node_type == "GROUP": outstring = "(" for expression in in_node[1]: outstring = outstring + traverse_ast(expression) + "," return outstring[:-1] + ")" elif node_type == "PRINT": return 'print( ' + traverse_ast(in_node[1]) + ")" elif node_type == "BREAK": return 'break ' elif node_type == "NEXT": return 'continue ' else: return "Not found: %s" % repr(in_node) result = traverse_ast(in_node) # remove target id from dependencies if special_info["target_id"] is not None: special_info["depends"].discard(special_info["target_id"].lower()) if not special_info.get("have_drel_target",False): print('WARNING: no assignment to __dreltarget in %s (this is OK for category methods)' % repr(target_id)) print(result) return result,special_info["withtable"],special_info["depends"],special_info["need_current_row"] def get_function_name(in_name): """Return the Python name of the dREL function, an argument prefix, and anything to be appended to the end""" builtins = {"table":"dict", "list":"list", "array":"numpy.array", "len":"len", "abs":"abs", "magn":"abs", "atoi":"int", "float":"float", "str":"str", "array":"numpy.array", "norm":"numpy.linalg.norm", "sqrt":"math.sqrt", "exp":"math.exp", "complex":"complex", "max":"max", "min":"min", "strip":"drel_runtime.drel_strip", "int":"drel_runtime.drel_int", "eigen":"drel_runtime.drel_eigen", "hash":"hash" #dREL extension } test_name = in_name.lower() target_name = builtins.get(test_name,None) if target_name is not None: return target_name,"","" if test_name in ['sind','cosd','tand']: return "math."+test_name[:-1],"math.radians(",")" if test_name in ['acosd','asind','atand','atan2d']: return "math.degrees(math."+test_name[:-1],"",")" if test_name == "mod": return "divmod","","[1]" if test_name == "upper": return "","",".upper()" if test_name == "transpose": return "","",".T" if test_name == 'expimag': return "cmath.exp","1j*(",")" if test_name in ['real','imag']: return "","","." + test_name if test_name == 'matrix': return "numpy.matrix","",".astype('float64')" if test_name == 'sort': return "","",".sort()" return in_name,"",None #dictionary defined def fix_mathops(op,first_arg,second_arg): """Return a string that will carry out the requested operation""" if op == "^": return "numpy.cross(%s,%s)" % (first_arg,second_arg) elif op == "*": #could be matrix multiplication return "drel_runtime.drel_dot(%s,%s)" % (first_arg,second_arg) elif op == "+": return "drel_runtime.drel_add(%s,%s)" % (first_arg, second_arg) elif op == "-": return "numpy.subtract(%s,%s)" % (first_arg, second_arg) # beware integer division on this one... elif op == "/": return "numpy.true_divide(%s,%s)" % (first_arg, second_arg) def add_indent(text,n=4): """Indent text by n spaces""" return re.sub("\n","\n"+4*" ",text) def getcatname(dataname): """Return cat,name pair from dataname""" try: cat,name = dataname.split(".") except ValueError: #no period in name return cat,None return cat[1:],name pycifrw-4.4/src/lib/000077500000000000000000000000001345362224200144045ustar00rootroot00000000000000pycifrw-4.4/src/lib/Makefile000066400000000000000000000001301345362224200160360ustar00rootroot00000000000000lex.yy.c: star.l flex --nounistd star.l # clean: rm -f lex.yy.c rm -f py_star_scan.o pycifrw-4.4/src/lib/lex.yy.c000066400000000000000000001516631345362224200160140ustar00rootroot00000000000000 #line 3 "lex.yy.c" #define YY_INT_ALIGNED short int /* A lexical scanner generated by flex */ #define FLEX_SCANNER #define YY_FLEX_MAJOR_VERSION 2 #define YY_FLEX_MINOR_VERSION 5 #define YY_FLEX_SUBMINOR_VERSION 39 #if YY_FLEX_SUBMINOR_VERSION > 0 #define FLEX_BETA #endif /* First, we deal with platform-specific or compiler-specific issues. */ /* begin standard C headers. */ #include #include #include #include /* end standard C headers. */ /* flex integer type definitions */ #ifndef FLEXINT_H #define FLEXINT_H /* C99 systems have . Non-C99 systems may or may not. */ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, * if you want the limit (max/min) macros for int types. */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 #endif #include typedef int8_t flex_int8_t; typedef uint8_t flex_uint8_t; typedef int16_t flex_int16_t; typedef uint16_t flex_uint16_t; typedef int32_t flex_int32_t; typedef uint32_t flex_uint32_t; #else typedef signed char flex_int8_t; typedef short int flex_int16_t; typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; /* Limits of integral types. */ #ifndef INT8_MIN #define INT8_MIN (-128) #endif #ifndef INT16_MIN #define INT16_MIN (-32767-1) #endif #ifndef INT32_MIN #define INT32_MIN (-2147483647-1) #endif #ifndef INT8_MAX #define INT8_MAX (127) #endif #ifndef INT16_MAX #define INT16_MAX (32767) #endif #ifndef INT32_MAX #define INT32_MAX (2147483647) #endif #ifndef UINT8_MAX #define UINT8_MAX (255U) #endif #ifndef UINT16_MAX #define UINT16_MAX (65535U) #endif #ifndef UINT32_MAX #define UINT32_MAX (4294967295U) #endif #endif /* ! C99 */ #endif /* ! FLEXINT_H */ #ifdef __cplusplus /* The "const" storage-class-modifier is valid. */ #define YY_USE_CONST #else /* ! __cplusplus */ /* C99 requires __STDC__ to be defined as 1. */ #if defined (__STDC__) #define YY_USE_CONST #endif /* defined (__STDC__) */ #endif /* ! __cplusplus */ #ifdef YY_USE_CONST #define yyconst const #else #define yyconst #endif /* Returned upon end-of-file. */ #define YY_NULL 0 /* Promotes a possibly negative, possibly signed char to an unsigned * integer for use as an array index. If the signed char is negative, * we want to instead treat it as an 8-bit unsigned char, hence the * double cast. */ #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. */ #define BEGIN (yy_start) = 1 + 2 * /* Translate the current start state into a value that can be later handed * to BEGIN to return to the state. The YYSTATE alias is for lex * compatibility. */ #define YY_START (((yy_start) - 1) / 2) #define YYSTATE YY_START /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* Special action meaning "start processing a new file". */ #define YY_NEW_FILE yyrestart(yyin ) #define YY_END_OF_BUFFER_CHAR 0 /* Size of default input buffer. */ #ifndef YY_BUF_SIZE #ifdef __ia64__ /* On IA-64, the buffer size is 16k, not 8k. * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. * Ditto for the __ia64__ case accordingly. */ #define YY_BUF_SIZE 32768 #else #define YY_BUF_SIZE 16384 #endif /* __ia64__ */ #endif /* The state buf must be large enough to hold one state per character in the main buffer. */ #define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) #ifndef YY_TYPEDEF_YY_BUFFER_STATE #define YY_TYPEDEF_YY_BUFFER_STATE typedef struct yy_buffer_state *YY_BUFFER_STATE; #endif #ifndef YY_TYPEDEF_YY_SIZE_T #define YY_TYPEDEF_YY_SIZE_T typedef size_t yy_size_t; #endif extern yy_size_t yyleng; extern FILE *yyin, *yyout; #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires * access to the local variable yy_act. Since yyless() is a macro, it would break * existing scanners that call yyless() from OUTSIDE yylex. * One obvious solution it to make yy_act a global. I tried that, and saw * a 5% performance hit in a non-yylineno scanner, because yy_act is * normally declared as a register variable-- so it is not worth it. */ #define YY_LESS_LINENO(n) \ do { \ int yyl;\ for ( yyl = n; yyl < yyleng; ++yyl )\ if ( yytext[yyl] == '\n' )\ --yylineno;\ }while(0) #define YY_LINENO_REWIND_TO(dst) \ do {\ const char *p;\ for ( p = yy_cp-1; p >= (dst); --p)\ if ( *p == '\n' )\ --yylineno;\ }while(0) /* Return all but the first "n" matched characters back to the input stream. */ #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ int yyless_macro_arg = (n); \ YY_LESS_LINENO(yyless_macro_arg);\ *yy_cp = (yy_hold_char); \ YY_RESTORE_YY_MORE_OFFSET \ (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } \ while ( 0 ) #define unput(c) yyunput( c, (yytext_ptr) ) #ifndef YY_STRUCT_YY_BUFFER_STATE #define YY_STRUCT_YY_BUFFER_STATE struct yy_buffer_state { FILE *yy_input_file; char *yy_ch_buf; /* input buffer */ char *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. */ yy_size_t yy_buf_size; /* Number of characters read into yy_ch_buf, not including EOB * characters. */ yy_size_t yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to * delete it. */ int yy_is_our_buffer; /* Whether this is an "interactive" input source; if so, and * if we're using stdio for input, then we want to use getc() * instead of fread(), to make sure we stop fetching input after * each newline. */ int yy_is_interactive; /* Whether we're considered to be at the beginning of a line. * If so, '^' rules will be active on the next match, otherwise * not. */ int yy_at_bol; int yy_bs_lineno; /**< The line count. */ int yy_bs_column; /**< The column count. */ /* Whether to try to fill the input buffer when we reach the * end of it. */ int yy_fill_buffer; int yy_buffer_status; #define YY_BUFFER_NEW 0 #define YY_BUFFER_NORMAL 1 /* When an EOF's been seen but there's still some text to process * then we mark the buffer as YY_EOF_PENDING, to indicate that we * shouldn't try reading from the input source any more. We might * still have a bunch of tokens to match, though, because of * possible backing-up. * * When we actually see the EOF, we change the status to "new" * (via yyrestart()), so that the user can continue scanning by * just pointing yyin at a new input file. */ #define YY_BUFFER_EOF_PENDING 2 }; #endif /* !YY_STRUCT_YY_BUFFER_STATE */ /* Stack of input buffers. */ static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ /* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general * "scanner state". * * Returns the top of the stack, or NULL. */ #define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ : NULL) /* Same as previous macro, but useful when we know that the buffer stack is not * NULL or when we need an lvalue. For internal use only. */ #define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] /* yy_hold_char holds the character lost when yytext is formed. */ static char yy_hold_char; static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */ yy_size_t yyleng; /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; static int yy_init = 0; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ /* Flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ static int yy_did_buffer_switch_on_eof; void yyrestart (FILE *input_file ); void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ); void yy_delete_buffer (YY_BUFFER_STATE b ); void yy_flush_buffer (YY_BUFFER_STATE b ); void yypush_buffer_state (YY_BUFFER_STATE new_buffer ); void yypop_buffer_state (void ); static void yyensure_buffer_stack (void ); static void yy_load_buffer_state (void ); static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); #define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ) YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ); YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ); YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ); void *yyalloc (yy_size_t ); void *yyrealloc (void *,yy_size_t ); void yyfree (void * ); #define yy_new_buffer yy_create_buffer #define yy_set_interactive(is_interactive) \ { \ if ( ! YY_CURRENT_BUFFER ){ \ yyensure_buffer_stack (); \ YY_CURRENT_BUFFER_LVALUE = \ yy_create_buffer(yyin,YY_BUF_SIZE ); \ } \ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ } #define yy_set_bol(at_bol) \ { \ if ( ! YY_CURRENT_BUFFER ){\ yyensure_buffer_stack (); \ YY_CURRENT_BUFFER_LVALUE = \ yy_create_buffer(yyin,YY_BUF_SIZE ); \ } \ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ } #define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) /* Begin user sect3 */ #define yywrap() 1 #define YY_SKIP_YYWRAP typedef unsigned char YY_CHAR; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; typedef int yy_state_type; extern int yylineno; int yylineno = 1; extern char *yytext; #define yytext_ptr yytext static yy_state_type yy_get_previous_state (void ); static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); static int yy_get_next_buffer (void ); static void yy_fatal_error (yyconst char msg[] ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. */ #define YY_DO_BEFORE_ACTION \ (yytext_ptr) = yy_bp; \ yyleng = (size_t) (yy_cp - yy_bp); \ (yy_hold_char) = *yy_cp; \ *yy_cp = '\0'; \ (yy_c_buf_p) = yy_cp; #define YY_NUM_RULES 20 #define YY_END_OF_BUFFER 21 /* This struct is not used in this scanner, but its presence is necessary. */ struct yy_trans_info { flex_int32_t yy_verify; flex_int32_t yy_nxt; }; static yyconst flex_int16_t yy_accept[72] = { 0, 8, 8, 0, 0, 21, 14, 8, 18, 19, 19, 9, 19, 19, 14, 14, 14, 14, 19, 17, 20, 20, 13, 14, 8, 0, 0, 0, 10, 0, 10, 0, 16, 9, 0, 15, 14, 14, 14, 14, 14, 7, 0, 12, 12, 10, 0, 0, 11, 11, 14, 14, 14, 14, 14, 12, 11, 14, 14, 14, 14, 14, 14, 14, 1, 5, 3, 6, 14, 4, 2, 0 } ; static yyconst flex_int32_t yy_ec[256] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 5, 6, 7, 8, 5, 5, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 11, 12, 5, 13, 14, 5, 15, 5, 5, 5, 5, 16, 5, 5, 17, 18, 5, 5, 19, 20, 5, 21, 5, 5, 5, 5, 8, 5, 8, 5, 22, 5, 11, 12, 5, 13, 14, 5, 15, 5, 5, 5, 5, 16, 5, 5, 17, 18, 5, 5, 19, 20, 5, 21, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } ; static yyconst flex_int32_t yy_meta[24] = { 0, 1, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1 } ; static yyconst flex_int16_t yy_base[84] = { 0, 0, 0, 21, 23, 128, 0, 32, 117, 27, 120, 25, 129, 116, 113, 107, 105, 21, 0, 129, 39, 129, 129, 0, 0, 111, 35, 43, 129, 45, 110, 113, 112, 47, 108, 107, 95, 97, 96, 91, 94, 0, 49, 51, 53, 100, 55, 57, 59, 61, 98, 96, 89, 92, 87, 63, 65, 82, 92, 80, 79, 78, 0, 83, 0, 0, 0, 0, 76, 0, 0, 129, 69, 82, 73, 76, 80, 83, 75, 86, 90, 93, 40, 36 } ; static yyconst flex_int16_t yy_def[84] = { 0, 71, 1, 72, 72, 71, 73, 71, 74, 74, 75, 76, 71, 77, 73, 73, 73, 73, 78, 71, 79, 71, 71, 73, 7, 74, 74, 80, 71, 81, 74, 75, 75, 76, 77, 77, 73, 73, 73, 73, 73, 78, 79, 71, 71, 74, 80, 81, 71, 71, 73, 73, 73, 73, 73, 71, 71, 73, 73, 73, 73, 73, 82, 73, 73, 83, 73, 82, 73, 83, 73, 0, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71 } ; static yyconst flex_int16_t yy_nxt[153] = { 0, 6, 7, 8, 9, 6, 10, 11, 12, 13, 6, 6, 6, 14, 6, 15, 16, 6, 6, 17, 6, 6, 18, 19, 21, 21, 21, 21, 25, 26, 30, 22, 39, 22, 24, 25, 26, 71, 45, 27, 69, 40, 43, 44, 67, 71, 25, 26, 48, 49, 25, 26, 43, 44, 43, 44, 55, 44, 25, 26, 48, 49, 48, 49, 56, 49, 43, 44, 48, 49, 20, 20, 20, 20, 28, 28, 28, 28, 31, 41, 31, 33, 33, 33, 33, 34, 23, 34, 42, 42, 42, 46, 46, 46, 46, 47, 47, 47, 70, 68, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 71, 54, 53, 52, 51, 50, 35, 35, 32, 32, 29, 71, 38, 37, 36, 35, 32, 29, 71, 5, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71 } ; static yyconst flex_int16_t yy_chk[153] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 11, 11, 9, 3, 17, 4, 7, 7, 7, 9, 26, 7, 83, 17, 20, 20, 82, 26, 27, 27, 29, 29, 33, 33, 42, 42, 43, 43, 44, 44, 46, 46, 47, 47, 48, 48, 49, 49, 55, 55, 56, 56, 72, 72, 72, 72, 74, 74, 74, 74, 75, 78, 75, 76, 76, 76, 76, 77, 73, 77, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 68, 63, 61, 60, 59, 58, 57, 54, 53, 52, 51, 50, 45, 40, 39, 38, 37, 36, 35, 34, 32, 31, 30, 25, 16, 15, 14, 13, 10, 8, 5, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71 } ; /* Table of booleans, true if rule could match eol. */ static yyconst flex_int32_t yy_rule_can_match_eol[21] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, }; static yy_state_type yy_last_accepting_state; static char *yy_last_accepting_cpos; extern int yy_flex_debug; int yy_flex_debug = 0; /* The intent behind this definition is that it'll catch * any uses of REJECT which flex missed. */ #define REJECT reject_used_but_not_detected #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; #line 1 "star.l" /******************************************************************************* This code borrows some patterns from star.l, which was written and designed by Andrew Gene HALL of I.N. Services Pty. Ltd., Scarborough WA, Australia, for the University of Western Australia, Crawley WA, Australia. Present code by JRH to work with PyCIFRW v 3.1 Match for EOL at EOF contributed by Boris Dusek *******************************************************************************/ /* Lex Definitions for a STAR File */ #line 13 "star.l" /* Global Definitions */ #include #include #include "star_scanner.h" #define YY_DECL int star_scanner() /* redefine YY_INPUT to come from our global string */ #define YY_INPUT(buf,result,max_size)\ {\ if(string_pos == in_string_len) result = YY_NULL;\ if(string_pos + max_size <= in_string_len) {\ strncpy(buf,input_string+string_pos,max_size);\ string_pos +=max_size;\ result =max_size;\ } else {\ strncpy(buf,input_string+string_pos,in_string_len - string_pos);\ result = in_string_len - string_pos;\ string_pos = in_string_len;\ }\ } /* These have been modified from starbase, as they allow all sorts of forbidden characters */ /* Lex Rules fo a STAR File */ #line 583 "lex.yy.c" #define INITIAL 0 #define Alltext 1 #ifndef YY_EXTRA_TYPE #define YY_EXTRA_TYPE void * #endif static int yy_init_globals (void ); /* Accessor methods to globals. These are made visible to non-reentrant scanners for convenience. */ int yylex_destroy (void ); int yyget_debug (void ); void yyset_debug (int debug_flag ); YY_EXTRA_TYPE yyget_extra (void ); void yyset_extra (YY_EXTRA_TYPE user_defined ); FILE *yyget_in (void ); void yyset_in (FILE * in_str ); FILE *yyget_out (void ); void yyset_out (FILE * out_str ); yy_size_t yyget_leng (void ); char *yyget_text (void ); int yyget_lineno (void ); void yyset_lineno (int line_number ); /* Macros after this point can all be overridden by user definitions in * section 1. */ #ifndef YY_SKIP_YYWRAP #ifdef __cplusplus extern "C" int yywrap (void ); #else extern int yywrap (void ); #endif #endif static void yyunput (int c,char *buf_ptr ); #ifndef yytext_ptr static void yy_flex_strncpy (char *,yyconst char *,int ); #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen (yyconst char * ); #endif #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput (void ); #else static int input (void ); #endif #endif /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #ifdef __ia64__ /* On IA-64, the buffer size is 16k, not 8k */ #define YY_READ_BUF_SIZE 16384 #else #define YY_READ_BUF_SIZE 8192 #endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ #ifndef ECHO /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ #define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ size_t n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) \ buf[n++] = (char) c; \ if ( c == EOF && ferror( yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ result = n; \ } \ else \ { \ errno=0; \ while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ { \ if( errno != EINTR) \ { \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ break; \ } \ errno=0; \ clearerr(yyin); \ } \ }\ \ #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #ifndef yyterminate #define yyterminate() return YY_NULL #endif /* Number of entries by which start-condition stack grows. */ #ifndef YY_START_STACK_INCR #define YY_START_STACK_INCR 25 #endif /* Report a fatal error. */ #ifndef YY_FATAL_ERROR #define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) #endif /* end tables serialization structures and prototypes */ /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ #ifndef YY_DECL #define YY_DECL_IS_OURS 1 extern int yylex (void); #define YY_DECL int yylex (void) #endif /* !YY_DECL */ /* Code executed at the beginning of each rule, after yytext and yyleng * have been set up. */ #ifndef YY_USER_ACTION #define YY_USER_ACTION #endif /* Code executed at the end of each rule. */ #ifndef YY_BREAK #define YY_BREAK break; #endif #define YY_RULE_SETUP \ YY_USER_ACTION /** The main scanner function which does all the work. */ YY_DECL { register yy_state_type yy_current_state; register char *yy_cp, *yy_bp; register int yy_act; if ( !(yy_init) ) { (yy_init) = 1; #ifdef YY_USER_INIT YY_USER_INIT; #endif if ( ! (yy_start) ) (yy_start) = 1; /* first start state */ if ( ! yyin ) yyin = stdin; if ( ! yyout ) yyout = stdout; if ( ! YY_CURRENT_BUFFER ) { yyensure_buffer_stack (); YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin,YY_BUF_SIZE ); } yy_load_buffer_state( ); } { #line 78 "star.l" #line 792 "lex.yy.c" while ( 1 ) /* loops until end-of-file is reached */ { yy_cp = (yy_c_buf_p); /* Support of yytext. */ *yy_cp = (yy_hold_char); /* yy_bp points to the position in yy_ch_buf of the start of * the current run. */ yy_bp = yy_cp; yy_current_state = (yy_start); yy_match: do { register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 72 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; ++yy_cp; } while ( yy_base[yy_current_state] != 129 ); yy_find_action: yy_act = yy_accept[yy_current_state]; if ( yy_act == 0 ) { /* have to back up */ yy_cp = (yy_last_accepting_cpos); yy_current_state = (yy_last_accepting_state); yy_act = yy_accept[yy_current_state]; } YY_DO_BEFORE_ACTION; if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) { yy_size_t yyl; for ( yyl = 0; yyl < yyleng; ++yyl ) if ( yytext[yyl] == '\n' ) yylineno++; ; } do_action: /* This label is used only to access EOF actions. */ switch ( yy_act ) { /* beginning of action switch */ case 0: /* must back up */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = (yy_hold_char); yy_cp = (yy_last_accepting_cpos); yy_current_state = (yy_last_accepting_state); goto yy_find_action; case 1: YY_RULE_SETUP #line 79 "star.l" {return(DLBLOCK);} YY_BREAK case 2: YY_RULE_SETUP #line 80 "star.l" {return(DGLOBAL);} YY_BREAK case 3: YY_RULE_SETUP #line 81 "star.l" {return(DSTOP);} YY_BREAK case 4: YY_RULE_SETUP #line 82 "star.l" {return(DSAVE_HEADING);} YY_BREAK case 5: YY_RULE_SETUP #line 83 "star.l" {return(DSAVE_END);} YY_BREAK case 6: YY_RULE_SETUP #line 84 "star.l" {return(DDATA_HEADING);} YY_BREAK case 7: YY_RULE_SETUP #line 85 "star.l" {return(DDATA_NAME);} YY_BREAK case 8: YY_RULE_SETUP #line 86 "star.l" {/* do nothing */} YY_BREAK case 9: YY_RULE_SETUP #line 87 "star.l" {/* do nothing */} YY_BREAK case 10: /* rule 10 can match eol */ *yy_cp = (yy_hold_char); /* undo effects of setting up yytext */ YY_LINENO_REWIND_TO(yy_cp - 1); (yy_c_buf_p) = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP #line 88 "star.l" {/* do nothing */} YY_BREAK case 11: /* rule 11 can match eol */ YY_RULE_SETUP #line 90 "star.l" {BEGIN(Alltext);return(DSTART_SC_LINE);} YY_BREAK case 12: /* rule 12 can match eol */ YY_RULE_SETUP #line 91 "star.l" {return(DSC_LINE_OF_TEXT);} YY_BREAK case 13: YY_RULE_SETUP #line 92 "star.l" {BEGIN(INITIAL); return(DEND_SC_LINE);} YY_BREAK case 14: YY_RULE_SETUP #line 95 "star.l" {return(DDATA_VALUE_1);} YY_BREAK case 15: YY_RULE_SETUP #line 97 "star.l" {/* this code borrowed from star.l in starbase. We can't write a rule in flex to accept a quote if the following character is not blank, and accepting the following non-blank may swallow the closing quote! */ char *p; p = yytext; /* step over first quote */ p++; /* scan to "? * (? == null) or (? == space) => end of string, stop */ while ( p = strchr( p, '\'')) if (isspace( *(++p))) { /* (? == space) => push back rest of the input, * S_Q_T_S */ /* this is to avoid side effects in the macro */ int i = yyleng - strlen(p); yyless( i ); } return(DDATA_VALUE_1); } YY_BREAK case 16: YY_RULE_SETUP #line 122 "star.l" {/* this code borrowed from star.l in starbase. We can't write a rule in flex to accept a quote only if the following character is not blank, and writing a quote-nonblank rule may swallow the closing quote. */ char *p; p = yytext; /* step over first quote */ p++; /* scan to '? * (? == null) or (? == space) => end of string, stop */ while ( p = strchr( p, '\"')) if (isspace( *(++p))) { /* (? == space) => push back rest of the input, * S_Q_T_S */ /* this is to avoid side effects in the macro */ int i = yyleng - strlen(p); yyless( i ); } return(DDATA_VALUE_1); } YY_BREAK case 17: YY_RULE_SETUP #line 147 "star.l" {YY_FLUSH_BUFFER; return(DEND);} YY_BREAK case 18: /* rule 18 can match eol */ YY_RULE_SETUP #line 148 "star.l" {/*printf("Stray carriage return...\n");*/} YY_BREAK case 19: YY_RULE_SETUP #line 149 "star.l" { /* printf("Unexpected Character\n");*/ /* printf("<%d>\n",yytext[0]);*/ YY_FLUSH_BUFFER; return(DERROR); } YY_BREAK case 20: YY_RULE_SETUP #line 155 "star.l" ECHO; YY_BREAK #line 1019 "lex.yy.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(Alltext): yyterminate(); case YY_END_OF_BUFFER: { /* Amount of text matched not including the EOB char. */ int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = (yy_hold_char); YY_RESTORE_YY_MORE_OFFSET if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) { /* We're scanning a new file or input source. It's * possible that this happened because the user * just pointed yyin at a new source and called * yylex(). If so, then we have to assure * consistency between YY_CURRENT_BUFFER and our * globals. Here is the right place to do so, because * this is the first action (other than possibly a * back-up) that will match for the new input source. */ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; } /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test * in input(). */ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) { /* This was really a NUL. */ yy_state_type yy_next_state; (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state( ); /* Okay, we're now positioned to make the NUL * transition. We couldn't have * yy_get_previous_state() go ahead and do it * for us because it doesn't know how to deal * with the possibility of jamming (and we don't * want to build jamming into it because then it * will run more slowly). */ yy_next_state = yy_try_NUL_trans( yy_current_state ); yy_bp = (yytext_ptr) + YY_MORE_ADJ; if ( yy_next_state ) { /* Consume the NUL. */ yy_cp = ++(yy_c_buf_p); yy_current_state = yy_next_state; goto yy_match; } else { yy_cp = (yy_c_buf_p); goto yy_find_action; } } else switch ( yy_get_next_buffer( ) ) { case EOB_ACT_END_OF_FILE: { (yy_did_buffer_switch_on_eof) = 0; if ( yywrap( ) ) { /* Note: because we've taken care in * yy_get_next_buffer() to have set up * yytext, we can now set up * yy_c_buf_p so that if some total * hoser (like flex itself) wants to * call the scanner after we return the * YY_NULL, it'll still work - another * YY_NULL will get returned. */ (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; yy_act = YY_STATE_EOF(YY_START); goto do_action; } else { if ( ! (yy_did_buffer_switch_on_eof) ) YY_NEW_FILE; } break; } case EOB_ACT_CONTINUE_SCAN: (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state( ); yy_cp = (yy_c_buf_p); yy_bp = (yytext_ptr) + YY_MORE_ADJ; goto yy_match; case EOB_ACT_LAST_MATCH: (yy_c_buf_p) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; yy_current_state = yy_get_previous_state( ); yy_cp = (yy_c_buf_p); yy_bp = (yytext_ptr) + YY_MORE_ADJ; goto yy_find_action; } break; } default: YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); } /* end of action switch */ } /* end of scanning one token */ } /* end of user's declarations */ } /* end of yylex */ /* yy_get_next_buffer - try to read in a new buffer * * Returns a code representing an action: * EOB_ACT_LAST_MATCH - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ static int yy_get_next_buffer (void) { register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; register char *source = (yytext_ptr); register int number_to_move, i; int ret_val; if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) { /* Don't try to fill the buffer, so this is an EOF. */ if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) { /* We matched a single character, the EOB, so * treat this as a final EOF. */ return EOB_ACT_END_OF_FILE; } else { /* We matched some text prior to the EOB, first * process it. */ return EOB_ACT_LAST_MATCH; } } /* Try to read more data. */ /* First move last chars to start of buffer. */ number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) /* don't do the read, it's not guaranteed to return an EOF, * just force an EOF */ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; else { yy_size_t num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; int yy_c_buf_p_offset = (int) ((yy_c_buf_p) - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { yy_size_t new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; else b->yy_buf_size *= 2; b->yy_ch_buf = (char *) /* Include room in for 2 EOB chars. */ yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); } else /* Can't grow it, we don't own it. */ b->yy_ch_buf = 0; if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; } if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; /* Read in more data. */ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), (yy_n_chars), num_to_read ); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } if ( (yy_n_chars) == 0 ) { if ( number_to_move == YY_MORE_ADJ ) { ret_val = EOB_ACT_END_OF_FILE; yyrestart(yyin ); } else { ret_val = EOB_ACT_LAST_MATCH; YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_EOF_PENDING; } } else ret_val = EOB_ACT_CONTINUE_SCAN; if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { /* Extend the array by 50%, plus the number we really need. */ yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ); if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); } (yy_n_chars) += number_to_move; YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; return ret_val; } /* yy_get_previous_state - get the state just before the EOB char was reached */ static yy_state_type yy_get_previous_state (void) { register yy_state_type yy_current_state; register char *yy_cp; yy_current_state = (yy_start); for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) { register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 23); if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 72 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; } return yy_current_state; } /* yy_try_NUL_trans - try to make a transition on the NUL character * * synopsis * next_state = yy_try_NUL_trans( current_state ); */ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) { register int yy_is_jam; register char *yy_cp = (yy_c_buf_p); register YY_CHAR yy_c = 23; if ( yy_accept[yy_current_state] ) { (yy_last_accepting_state) = yy_current_state; (yy_last_accepting_cpos) = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 72 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; yy_is_jam = (yy_current_state == 71); return yy_is_jam ? 0 : yy_current_state; } static void yyunput (int c, register char * yy_bp ) { register char *yy_cp; yy_cp = (yy_c_buf_p); /* undo effects of setting up yytext */ *yy_cp = (yy_hold_char); if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ register yy_size_t number_to_move = (yy_n_chars) + 2; register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; register char *source = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) *--dest = *--source; yy_cp += (int) (dest - source); yy_bp += (int) (dest - source); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size; if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); } *--yy_cp = (char) c; if ( c == '\n' ){ --yylineno; } (yytext_ptr) = yy_bp; (yy_hold_char) = *yy_cp; (yy_c_buf_p) = yy_cp; } #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput (void) #else static int input (void) #endif { int c; *(yy_c_buf_p) = (yy_hold_char); if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) { /* yy_c_buf_p now points to the character we want to return. * If this occurs *before* the EOB characters, then it's a * valid NUL; if not, then we've hit the end of the buffer. */ if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) /* This was really a NUL. */ *(yy_c_buf_p) = '\0'; else { /* need more input */ yy_size_t offset = (yy_c_buf_p) - (yytext_ptr); ++(yy_c_buf_p); switch ( yy_get_next_buffer( ) ) { case EOB_ACT_LAST_MATCH: /* This happens because yy_g_n_b() * sees that we've accumulated a * token and flags that we need to * try matching the token before * proceeding. But for input(), * there's no matching to consider. * So convert the EOB_ACT_LAST_MATCH * to EOB_ACT_END_OF_FILE. */ /* Reset buffer status. */ yyrestart(yyin ); /*FALLTHROUGH*/ case EOB_ACT_END_OF_FILE: { if ( yywrap( ) ) return EOF; if ( ! (yy_did_buffer_switch_on_eof) ) YY_NEW_FILE; #ifdef __cplusplus return yyinput(); #else return input(); #endif } case EOB_ACT_CONTINUE_SCAN: (yy_c_buf_p) = (yytext_ptr) + offset; break; } } } c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ *(yy_c_buf_p) = '\0'; /* preserve yytext */ (yy_hold_char) = *++(yy_c_buf_p); if ( c == '\n' ) yylineno++; ; return c; } #endif /* ifndef YY_NO_INPUT */ /** Immediately switch to a different input stream. * @param input_file A readable stream. * * @note This function does not reset the start condition to @c INITIAL . */ void yyrestart (FILE * input_file ) { if ( ! YY_CURRENT_BUFFER ){ yyensure_buffer_stack (); YY_CURRENT_BUFFER_LVALUE = yy_create_buffer(yyin,YY_BUF_SIZE ); } yy_init_buffer(YY_CURRENT_BUFFER,input_file ); yy_load_buffer_state( ); } /** Switch to a different input buffer. * @param new_buffer The new input buffer. * */ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) { /* TODO. We should be able to replace this entire function body * with * yypop_buffer_state(); * yypush_buffer_state(new_buffer); */ yyensure_buffer_stack (); if ( YY_CURRENT_BUFFER == new_buffer ) return; if ( YY_CURRENT_BUFFER ) { /* Flush out information for old buffer. */ *(yy_c_buf_p) = (yy_hold_char); YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } YY_CURRENT_BUFFER_LVALUE = new_buffer; yy_load_buffer_state( ); /* We don't actually know whether we did this switch during * EOF (yywrap()) processing, but the only time this flag * is looked at is after yywrap() is called, so it's safe * to go ahead and always set it. */ (yy_did_buffer_switch_on_eof) = 1; } static void yy_load_buffer_state (void) { (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; (yy_hold_char) = *(yy_c_buf_p); } /** Allocate and initialize an input buffer state. * @param file A readable stream. * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. * * @return the allocated buffer state. */ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) { YY_BUFFER_STATE b; b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_buf_size = size; /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_is_our_buffer = 1; yy_init_buffer(b,file ); return b; } /** Destroy the buffer. * @param b a buffer created with yy_create_buffer() * */ void yy_delete_buffer (YY_BUFFER_STATE b ) { if ( ! b ) return; if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; if ( b->yy_is_our_buffer ) yyfree((void *) b->yy_ch_buf ); yyfree((void *) b ); } /* Initializes or reinitializes a buffer. * This function is sometimes called more than once on the same buffer, * such as during a yyrestart() or at EOF. */ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) { int oerrno = errno; yy_flush_buffer(b ); b->yy_input_file = file; b->yy_fill_buffer = 1; /* If b is the current buffer, then yy_init_buffer was _probably_ * called from yyrestart() or through yy_get_next_buffer. * In that case, we don't want to reset the lineno or column. */ if (b != YY_CURRENT_BUFFER){ b->yy_bs_lineno = 1; b->yy_bs_column = 0; } b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; errno = oerrno; } /** Discard all buffered characters. On the next scan, YY_INPUT will be called. * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. * */ void yy_flush_buffer (YY_BUFFER_STATE b ) { if ( ! b ) return; b->yy_n_chars = 0; /* We always need two end-of-buffer characters. The first causes * a transition to the end-of-buffer state. The second causes * a jam in that state. */ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; b->yy_buf_pos = &b->yy_ch_buf[0]; b->yy_at_bol = 1; b->yy_buffer_status = YY_BUFFER_NEW; if ( b == YY_CURRENT_BUFFER ) yy_load_buffer_state( ); } /** Pushes the new state onto the stack. The new state becomes * the current state. This function will allocate the stack * if necessary. * @param new_buffer The new state. * */ void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) { if (new_buffer == NULL) return; yyensure_buffer_stack(); /* This block is copied from yy_switch_to_buffer. */ if ( YY_CURRENT_BUFFER ) { /* Flush out information for old buffer. */ *(yy_c_buf_p) = (yy_hold_char); YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); } /* Only push if top exists. Otherwise, replace top. */ if (YY_CURRENT_BUFFER) (yy_buffer_stack_top)++; YY_CURRENT_BUFFER_LVALUE = new_buffer; /* copied from yy_switch_to_buffer. */ yy_load_buffer_state( ); (yy_did_buffer_switch_on_eof) = 1; } /** Removes and deletes the top of the stack, if present. * The next element becomes the new top. * */ void yypop_buffer_state (void) { if (!YY_CURRENT_BUFFER) return; yy_delete_buffer(YY_CURRENT_BUFFER ); YY_CURRENT_BUFFER_LVALUE = NULL; if ((yy_buffer_stack_top) > 0) --(yy_buffer_stack_top); if (YY_CURRENT_BUFFER) { yy_load_buffer_state( ); (yy_did_buffer_switch_on_eof) = 1; } } /* Allocates the stack if it does not exist. * Guarantees space for at least one push. */ static void yyensure_buffer_stack (void) { yy_size_t num_to_alloc; if (!(yy_buffer_stack)) { /* First allocation is just for 2 elements, since we don't know if this * scanner will even need a stack. We use 2 instead of 1 to avoid an * immediate realloc on the next call. */ num_to_alloc = 1; (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc (num_to_alloc * sizeof(struct yy_buffer_state*) ); if ( ! (yy_buffer_stack) ) YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); (yy_buffer_stack_max) = num_to_alloc; (yy_buffer_stack_top) = 0; return; } if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ /* Increase the buffer to prepare for a possible push. */ int grow_size = 8 /* arbitrary grow size */; num_to_alloc = (yy_buffer_stack_max) + grow_size; (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc ((yy_buffer_stack), num_to_alloc * sizeof(struct yy_buffer_state*) ); if ( ! (yy_buffer_stack) ) YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); /* zero only the new slots.*/ memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); (yy_buffer_stack_max) = num_to_alloc; } } /** Setup the input buffer state to scan directly from a user-specified character buffer. * @param base the character buffer * @param size the size in bytes of the character buffer * * @return the newly allocated buffer state object. */ YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size ) { YY_BUFFER_STATE b; if ( size < 2 || base[size-2] != YY_END_OF_BUFFER_CHAR || base[size-1] != YY_END_OF_BUFFER_CHAR ) /* They forgot to leave room for the EOB's. */ return 0; b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ b->yy_buf_pos = b->yy_ch_buf = base; b->yy_is_our_buffer = 0; b->yy_input_file = 0; b->yy_n_chars = b->yy_buf_size; b->yy_is_interactive = 0; b->yy_at_bol = 1; b->yy_fill_buffer = 0; b->yy_buffer_status = YY_BUFFER_NEW; yy_switch_to_buffer(b ); return b; } /** Setup the input buffer state to scan a string. The next call to yylex() will * scan from a @e copy of @a str. * @param yystr a NUL-terminated string to scan * * @return the newly allocated buffer state object. * @note If you want to scan bytes that may contain NUL values, then use * yy_scan_bytes() instead. */ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr ) { return yy_scan_bytes(yystr,strlen(yystr) ); } /** Setup the input buffer state to scan the given bytes. The next call to yylex() will * scan from a @e copy of @a bytes. * @param yybytes the byte buffer to scan * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. * * @return the newly allocated buffer state object. */ YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len ) { YY_BUFFER_STATE b; char *buf; yy_size_t n; yy_size_t i; /* Get memory for full buffer, including space for trailing EOB's. */ n = _yybytes_len + 2; buf = (char *) yyalloc(n ); if ( ! buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); for ( i = 0; i < _yybytes_len; ++i ) buf[i] = yybytes[i]; buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; b = yy_scan_buffer(buf,n ); if ( ! b ) YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; return b; } #ifndef YY_EXIT_FAILURE #define YY_EXIT_FAILURE 2 #endif static void yy_fatal_error (yyconst char* msg ) { (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } /* Redefine yyless() so it works in section 3 code. */ #undef yyless #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ int yyless_macro_arg = (n); \ YY_LESS_LINENO(yyless_macro_arg);\ yytext[yyleng] = (yy_hold_char); \ (yy_c_buf_p) = yytext + yyless_macro_arg; \ (yy_hold_char) = *(yy_c_buf_p); \ *(yy_c_buf_p) = '\0'; \ yyleng = yyless_macro_arg; \ } \ while ( 0 ) /* Accessor methods (get/set functions) to struct members. */ /** Get the current line number. * */ int yyget_lineno (void) { return yylineno; } /** Get the input stream. * */ FILE *yyget_in (void) { return yyin; } /** Get the output stream. * */ FILE *yyget_out (void) { return yyout; } /** Get the length of the current token. * */ yy_size_t yyget_leng (void) { return yyleng; } /** Get the current token. * */ char *yyget_text (void) { return yytext; } /** Set the current line number. * @param line_number * */ void yyset_lineno (int line_number ) { yylineno = line_number; } /** Set the input stream. This does not discard the current * input buffer. * @param in_str A readable stream. * * @see yy_switch_to_buffer */ void yyset_in (FILE * in_str ) { yyin = in_str ; } void yyset_out (FILE * out_str ) { yyout = out_str ; } int yyget_debug (void) { return yy_flex_debug; } void yyset_debug (int bdebug ) { yy_flex_debug = bdebug ; } static int yy_init_globals (void) { /* Initialization is the same as for the non-reentrant scanner. * This function is called from yylex_destroy(), so don't allocate here. */ /* We do not touch yylineno unless the option is enabled. */ yylineno = 1; (yy_buffer_stack) = 0; (yy_buffer_stack_top) = 0; (yy_buffer_stack_max) = 0; (yy_c_buf_p) = (char *) 0; (yy_init) = 0; (yy_start) = 0; /* Defined in main.c */ #ifdef YY_STDINIT yyin = stdin; yyout = stdout; #else yyin = (FILE *) 0; yyout = (FILE *) 0; #endif /* For future reference: Set errno on error, since we are called by * yylex_init() */ return 0; } /* yylex_destroy is for both reentrant and non-reentrant scanners. */ int yylex_destroy (void) { /* Pop the buffer stack, destroying each element. */ while(YY_CURRENT_BUFFER){ yy_delete_buffer(YY_CURRENT_BUFFER ); YY_CURRENT_BUFFER_LVALUE = NULL; yypop_buffer_state(); } /* Destroy the stack itself. */ yyfree((yy_buffer_stack) ); (yy_buffer_stack) = NULL; /* Reset the globals. This is important in a non-reentrant scanner so the next time * yylex() is called, initialization will occur. */ yy_init_globals( ); return 0; } /* * Internal utility routines. */ #ifndef yytext_ptr static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) { register int i; for ( i = 0; i < n; ++i ) s1[i] = s2[i]; } #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen (yyconst char * s ) { register int n; for ( n = 0; s[n]; ++n ) ; return n; } #endif void *yyalloc (yy_size_t size ) { return (void *) malloc( size ); } void *yyrealloc (void * ptr, yy_size_t size ) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those * that use void* generic pointers. It works with the latter * because both ANSI C and C++ allow castless assignment from * any pointer type to void*, and deal with argument conversions * as though doing an assignment. */ return (void *) realloc( (char *) ptr, size ); } void yyfree (void * ptr ) { free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ } #define YYTABLES_NAME "yytables" #line 154 "star.l" /* This routine is called during initialisation to avoid any problems which might arise due to a syntax error not re-initialising the scanner. Each time the scanner is initialised, the buffer is flushed */ void star_clear(void) { yy_flush_buffer(YY_CURRENT_BUFFER); yylineno = 0; BEGIN(INITIAL); } pycifrw-4.4/src/lib/py_star_scan.c000066400000000000000000000135421345362224200172420ustar00rootroot00000000000000/* Call our flex scanner */ #include "Python.h" #include #define STAR_SCANNER #include "star_scanner.h" #if PY_MAJOR_VERSION >= 3 #define MOD_ERROR_VAL NULL #define MOD_SUCCESS_VAL(val) val #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) #define MOD_DEF(ob, name, doc, methods) \ static struct PyModuleDef moduledef = { \ PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ ob = PyModule_Create(&moduledef); #else #define MOD_ERROR_VAL #define MOD_SUCCESS_VAL(val) #define MOD_INIT(name) void init##name(void) #define MOD_DEF(ob, name, doc, methods) \ ob = Py_InitModule3(name, methods, doc); #endif static PyObject * get_input(PyObject * self, PyObject * args); static PyObject * flex_scan(PyObject * self,PyObject * args); static PyObject * get_token(PyObject * self,PyObject * args); static PyObject * drop_mem(PyObject * self,PyObject * args); static PyObject * get_last_ten(PyObject * self,PyObject * args); static PyMethodDef StarScanMethods[] = { {"prepare",get_input, METH_VARARGS,"Prepare scanner input"}, {"scan", flex_scan, METH_VARARGS, "Get next token"}, {"token",get_token, METH_VARARGS, "Return i'th token"}, {"last_ten",get_last_ten, METH_VARARGS, "Return last 10 tokens"}, {"cleanup", drop_mem, METH_VARARGS, "Free used memory"}, {NULL,NULL,0,NULL} }; //Module initialisation for Python 2 and 3 MOD_INIT(StarScan) { PyObject *m; MOD_DEF(m,"StarScan","A tokeniser for Star files", StarScanMethods) if(m==NULL) return MOD_ERROR_VAL; token_list = NULL; value_list = NULL; line_no_list = NULL; current_len = 0; alloc_mem = 0; #if PY_MAJOR_VERSION >= 3 return m; #endif } /* We need to read from the text string that the Python scanner uses, so we get a handle on the string and use that to feed flex. We allow a Unicode string that is UTF8 encoded. */ static PyObject * get_input(PyObject * self, PyObject * args) { PyObject * str_arg; /* A python string object in theory */ int i; if(!(PyArg_ParseTuple(args,"U",&str_arg))) return NULL; #if PY_MAJOR_VERSION >= 3 input_string = PyUnicode_AsUTF8(str_arg); #else input_string = PyString_AsString(str_arg); #endif string_pos = 0; in_string_len = strlen(input_string); star_clear(); for(i=0;ialloc_mem) { token_list = (int *) realloc(token_list,(alloc_mem+MEM_ALLOC_SIZE)*sizeof(int *)); line_no_list = (int *) realloc(line_no_list,(alloc_mem+MEM_ALLOC_SIZE)*sizeof(int *)); value_list = (PyObject **) realloc(value_list,(alloc_mem+MEM_ALLOC_SIZE)*sizeof(PyObject **)); alloc_mem += MEM_ALLOC_SIZE; /* printf("Expanded memory, val=%x,tok=%x\n",value_list,token_list);*/ } /* store latest values */ save_str = (char *) malloc((yyleng+1)*sizeof(char *)); /* printf("Got memory for string %s length %d at %x\n",yytext,yyleng+1,save_str);*/ strncpy(save_str,yytext,yyleng+1); value_list[current_len] = PyUnicode_FromString(save_str); /* new reference */ token_list[current_len] = tok_id; line_no_list[current_len] = yylineno; current_len++; /* return(Py_BuildValue("(iiss)",0,0,token_str,yytext));*/ return(Py_BuildValue("")); } static PyObject * get_token(PyObject * self, PyObject * args) { int list_pos; if(!(PyArg_ParseTuple(args,"i",&list_pos))) return NULL; /* printf("Getting token %d\n",list_pos);*/ if(list_pos==current_len) flex_scan(self,args); if(list_pos #include #include "star_scanner.h" #define YY_DECL int star_scanner() /* redefine YY_INPUT to come from our global string */ #define YY_INPUT(buf,result,max_size)\ {\ if(string_pos == in_string_len) result = YY_NULL;\ if(string_pos + max_size <= in_string_len) {\ strncpy(buf,input_string+string_pos,max_size);\ string_pos +=max_size;\ result =max_size;\ } else {\ strncpy(buf,input_string+string_pos,in_string_len - string_pos);\ result = in_string_len - string_pos;\ string_pos = in_string_len;\ }\ } %} /* These have been modified from starbase, as they allow all sorts of forbidden characters */ a [aA] b [bB] d [dD] e [eE] g [gG] l [lL] o [oO] p [pP] s [sS] t [tT] v [vV] Dollar \$ Data_ {d}{a}{t}{a}_ Loop_ {l}{o}{o}{p}_ Global_ {g}{l}{o}{b}{a}{l}_ Stop_ {s}{t}{o}{p}_ Save_ {s}{a}{v}{e}_ Char [^\n\r\0] TrueChar [][!%&\(\)*+,./:<=>?@0-9A-Za-z\\^`{}|~"#$';_ \t-] Blank [\n \t\r\0] NonBlank [^\n \t\r] TrueNonBlank [][!%&\(\)*+,./:<=>?@0-9A-Za-z\\^`{}|~"#$';_-] NonDQuote [^\n\r"\0] NonSQuote [^\n\r'\0] NotUnderscore [^\n \r\t_\0] Space [\t ] Spaces [\t ]* Comment #[^\n\r]* EndOfLine {Spaces}({Comment})?(\n|\r|\r\n) %a 4000 %o 6000 %x Alltext %option noyywrap %option yylineno /* Lex Rules fo a STAR File */ %% {Loop_} {return(DLBLOCK);} {Global_} {return(DGLOBAL);} {Stop_} {return(DSTOP);} {Save_}{TrueNonBlank}+ {return(DSAVE_HEADING);} {Save_} {return(DSAVE_END);} {Data_}{TrueNonBlank}+ {return(DDATA_HEADING);} _{TrueNonBlank}+ {return(DDATA_NAME);} {Spaces} {/* do nothing */} {Comment} {/* do nothing */} {EndOfLine}/[^;] {/* do nothing */} (\n|\r\n);({TrueChar})*(\r\n|\r|\n)+ {BEGIN(Alltext);return(DSTART_SC_LINE);} [^;\r\n]({TrueChar})*(\r\n|\r|\n)+ {return(DSC_LINE_OF_TEXT);} ; {BEGIN(INITIAL); return(DEND_SC_LINE);} ([^\n\r\t \"#$\'_\[\]\0]{TrueNonBlank}*) {return(DDATA_VALUE_1);} \'{TrueChar}*\' {/* this code borrowed from star.l in starbase. We can't write a rule in flex to accept a quote if the following character is not blank, and accepting the following non-blank may swallow the closing quote! */ char *p; p = yytext; /* step over first quote */ p++; /* scan to "? * (? == null) or (? == space) => end of string, stop */ while ( p = strchr( p, '\'')) if (isspace( *(++p))) { /* (? == space) => push back rest of the input, * S_Q_T_S */ /* this is to avoid side effects in the macro */ int i = yyleng - strlen(p); yyless( i ); } return(DDATA_VALUE_1); } \"{TrueChar}*\" {/* this code borrowed from star.l in starbase. We can't write a rule in flex to accept a quote only if the following character is not blank, and writing a quote-nonblank rule may swallow the closing quote. */ char *p; p = yytext; /* step over first quote */ p++; /* scan to '? * (? == null) or (? == space) => end of string, stop */ while ( p = strchr( p, '\"')) if (isspace( *(++p))) { /* (? == space) => push back rest of the input, * S_Q_T_S */ /* this is to avoid side effects in the macro */ int i = yyleng - strlen(p); yyless( i ); } return(DDATA_VALUE_1); } \0 {YY_FLUSH_BUFFER; return(DEND);} [\n] {/*printf("Stray carriage return...\n");*/} . { /* printf("Unexpected Character\n");*/ /* printf("<%d>\n",yytext[0]);*/ YY_FLUSH_BUFFER; return(DERROR); } %% /* This routine is called during initialisation to avoid any problems which might arise due to a syntax error not re-initialising the scanner. Each time the scanner is initialised, the buffer is flushed */ void star_clear(void) { yy_flush_buffer(YY_CURRENT_BUFFER); yylineno = 0; BEGIN(INITIAL); } pycifrw-4.4/src/lib/star_scanner.h000066400000000000000000000024331345362224200172410ustar00rootroot00000000000000#ifdef STAR_SCANNER /* The defines give the position of the token in this array */ char * tokens[] = {"END","LBLOCK","GLOBAL","STOP", "save_heading","save_end", "data_heading","data_name", "start_sc_line","sc_line_of_text", "end_sc_line","data_value_1", "(error)"}; char * input_string; /* where flex gets input */ size_t string_pos; /* current position */ size_t in_string_len; /* total length */ int * token_list; /* list of tokens */ int * line_no_list; /* list of token positions */ PyObject ** value_list; /* list of values */ size_t alloc_mem; /* How much allocated */ size_t current_len; /* Length of list */ #define MEM_ALLOC_SIZE 4192 /* Allocation block size */ extern int star_scanner(void); extern void star_clear(void); extern char * yytext; extern size_t yyleng; extern size_t yylineno; #else extern char * input_string; extern size_t string_pos; extern size_t in_string_len; #endif /* defines to index into the token list */ #define DEND 0 #define DLBLOCK 1 #define DGLOBAL 2 #define DSTOP 3 #define DSAVE_HEADING 4 #define DSAVE_END 5 #define DDATA_HEADING 6 #define DDATA_NAME 7 #define DSTART_SC_LINE 8 #define DSC_LINE_OF_TEXT 9 #define DEND_SC_LINE 10 #define DDATA_VALUE_1 11 #define DERROR 12 pycifrw-4.4/src/parsetab.py000066400000000000000000002411471345362224200160220ustar00rootroot00000000000000 # parsetab.py # This file is automatically generated. Do not edit. _tabversion = '3.2' _lr_method = 'LALR' _lr_signature = '\xcc\x83\xf4':([7,11,12,17,18,23,28,30,34,35,36,40,41,42,48,49,50,51,52,54,56,60,63,69,71,73,75,76,88,99,117,118,122,124,125,127,144,155,156,159,163,164,165,173,177,178,179,184,190,207,210,211,218,230,234,247,],[-61,-76,-56,-75,-58,-78,-65,-63,-74,-68,-66,-50,113,-79,-70,-46,-53,-69,-62,-59,-77,-55,-67,-71,-43,-60,-57,-64,-60,-52,-97,-95,-81,-72,-73,-51,-82,-44,-45,-84,-90,-54,-96,-80,-47,-48,-49,-112,-65,-100,-99,-98,-113,-83,-89,113,]),'}':([1,4,5,7,11,12,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,40,41,42,44,46,48,49,50,51,52,54,56,60,61,63,65,68,69,70,71,72,75,76,79,80,81,82,85,87,88,99,101,115,116,117,118,122,124,125,127,138,140,141,144,145,147,148,154,155,156,159,162,163,164,165,173,175,176,177,178,179,184,191,192,205,207,210,211,218,221,222,225,230,233,234,245,250,254,259,260,262,278,279,],[-154,-153,-152,-61,-76,-56,-127,-75,-58,-126,-124,-78,-23,-129,-65,-63,-125,-74,-68,-66,-5,-50,-32,-79,-30,-156,-70,-46,-53,-69,-62,-59,-77,-55,-131,-67,-27,-130,-71,-24,-43,-128,-57,-64,-155,-143,-156,-135,-139,-3,-60,-52,-146,-31,163,-97,-95,-81,-72,-73,-51,163,-147,-4,-82,-132,-26,-25,-33,-44,-45,-84,-156,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,222,-8,234,-100,-99,-98,-113,-9,-156,-137,-83,-94,-89,-136,-149,-91,-133,-134,-138,-92,-93,]),'OR':([7,11,12,17,18,23,24,28,30,34,35,36,40,41,42,44,48,49,50,51,52,54,56,60,63,65,69,70,71,73,75,76,88,99,115,117,118,122,124,125,127,144,147,148,154,155,156,159,163,164,165,173,175,176,177,178,179,184,190,207,210,211,218,230,234,],[-61,-76,-56,-75,-58,-78,93,-65,-63,-74,-68,-66,-50,-32,-79,-30,-70,-46,-53,-69,-62,-59,-77,-55,-67,-27,-71,-24,-43,-60,-57,-64,-60,-52,-31,-97,-95,-81,-72,-73,-51,-82,-26,-25,-33,-44,-45,-84,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,-65,-100,-99,-98,-113,-83,-89,]),'LOOP':([0,1,2,3,4,5,7,8,11,12,13,14,15,17,18,19,22,23,24,25,28,30,33,34,35,36,37,39,40,41,42,44,47,48,49,50,51,52,54,55,56,60,61,63,65,68,69,70,71,72,75,76,78,79,80,81,82,84,85,87,88,90,99,101,115,117,118,122,124,125,127,138,140,141,144,145,147,148,154,155,156,159,163,164,165,173,175,176,177,178,179,184,191,192,193,195,196,207,210,211,218,219,220,221,222,223,225,228,230,234,244,245,246,247,248,250,256,259,260,262,269,273,],[-156,-154,26,26,-153,-152,-61,26,-76,-56,-156,26,-127,-75,-58,-126,-124,-78,-23,-129,-65,-63,-125,-74,-68,-66,-5,26,-50,-32,-79,-30,-21,-70,-46,-53,-69,-62,-59,-1,-77,-55,-131,-67,-27,-130,-71,-24,-43,-128,-57,-64,-2,-155,-143,-156,-135,26,-139,-3,-60,26,-52,-146,-31,-97,-95,-81,-72,-73,-51,26,-147,-4,-82,-132,-26,-25,-33,-44,-45,-84,-90,-54,-96,-80,-28,-29,-47,-48,-49,-112,26,-8,-148,-140,26,-100,-99,-98,-113,-156,-22,-9,-156,-156,-137,26,-83,-89,26,-136,26,-141,26,-149,-144,-133,-134,-138,-142,-145,]),} _lr_action = { } for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_action: _lr_action[_x] = { } _lr_action[_x][_k] = _y del _lr_action_items _lr_goto_items = {'statements':([138,],[191,]),'comp_operator':([41,],[104,]),'small_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[6,6,6,6,6,6,142,6,6,6,6,6,6,6,6,]),'fancy_drel_assignment_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,]),'primary':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,]),'stringliteral':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,116,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,266,267,268,271,276,277,],[28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,161,28,28,28,28,28,28,28,28,28,28,28,190,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,272,28,28,275,28,28,]),'item_tag':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'not_test':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[65,65,65,65,65,65,65,115,65,65,65,65,65,65,65,65,175,176,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,65,]),'listmaker':([114,],[158,]),'do_stmt_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'func_arg':([133,143,217,],[180,180,243,]),'enclosure':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,]),'newlines':([0,13,16,43,46,81,86,136,158,162,203,219,222,223,231,255,265,],[5,5,87,5,5,5,141,5,5,5,5,5,5,5,5,5,5,]),'break_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,]),'dotlist':([133,],[181,]),'arglist':([153,],[199,]),'repeat_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[68,68,68,68,68,68,68,68,68,68,68,68,68,68,]),'u_expr':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[49,49,49,49,49,49,99,49,49,49,127,49,49,49,49,49,49,49,49,49,164,49,49,49,177,178,179,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,]),'if_else_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[33,33,33,33,33,33,33,33,33,33,33,33,33,33,]),'parenth_form':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'literal':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,]),'attributeref':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'call':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,]),'argument_list':([133,143,],[183,183,]),'statement':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[55,78,82,82,82,82,82,192,221,82,82,82,82,82,]),'string_conversion':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,56,]),'with_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'input':([0,],[3,]),'loop_head':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'do_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'next_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[57,57,57,57,57,57,57,57,57,57,57,57,57,57,57,]),'empty':([0,13,43,46,81,136,158,162,203,219,222,223,231,255,265,],[4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,]),'listmaker2':([160,],[202,]),'short_slice':([121,206,],[167,167,]),'power':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,]),'a_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[41,41,41,41,41,41,41,41,41,41,41,41,41,41,154,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,]),'print_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[58,58,58,58,58,58,58,58,58,58,58,58,58,58,58,]),'and_test':([2,3,8,14,21,29,39,53,84,86,90,92,93,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[70,70,70,70,70,70,70,70,70,70,70,147,148,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,70,]),'maybe_nline':([0,13,43,46,81,136,158,162,203,219,222,223,231,255,265,],[2,84,114,116,138,188,201,205,232,244,245,246,252,266,271,]),'tablemaker2':([233,],[254,]),'slicing':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,]),'for_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[19,19,19,19,19,19,19,19,19,19,19,19,19,19,]),'m_expr':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,104,105,107,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,155,156,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,]),'table_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,]),'restricted_comp_operator':([41,247,],[108,261,]),'atom':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,]),'funcdef':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[61,61,61,61,61,61,61,61,61,61,61,61,61,61,]),'expr_stmt':([2,3,8,14,39,84,86,90,138,191,196,228,244,246,248,],[20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,]),'slice_list':([121,],[166,]),'subscription':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,75,]),'comparison':([2,3,8,14,21,29,39,45,53,84,86,90,92,93,114,121,128,129,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,]),'attribute_tag':([50,],[118,]),'if_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[22,22,22,22,22,22,22,22,22,22,22,22,22,22,]),'id_list':([31,97,],[96,152,]),'proper_slice':([121,206,],[170,235,]),'list_display':([2,3,8,14,21,29,32,39,45,53,67,84,86,90,92,93,104,105,107,114,119,121,128,129,130,131,132,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,229,232,240,242,244,246,248,252,263,267,268,276,277,],[23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,251,23,23,23,23,23,23,23,270,23,23,23,23,]),'loop_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[72,72,72,72,72,72,72,72,72,72,72,72,72,72,]),'or_test':([2,3,8,14,21,29,39,53,84,86,90,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,]),'compound_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[37,37,37,37,37,37,37,37,37,37,37,37,37,37,]),'with_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[25,25,25,25,25,25,25,25,25,25,25,25,25,25,]),'tablemaker':([116,138,],[162,162,]),'long_slice':([121,206,],[169,169,]),'suite':([8,14,39,84,90,196,228,244,246,248,],[80,85,101,140,145,225,250,259,260,262,]),'simple_stmt':([2,3,8,14,39,84,90,138,191,196,228,244,246,248,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'testlist_star_expr':([2,3,8,14,21,39,53,84,86,90,135,137,138,150,191,196,226,228,244,246,248,],[77,77,77,77,89,77,123,77,77,77,187,189,77,196,77,77,248,77,77,77,77,]),'slice_item':([121,206,],[171,236,]),'expression':([2,3,8,14,21,29,39,53,84,86,90,114,121,133,134,135,137,138,143,146,150,168,174,188,191,196,204,206,208,212,217,226,228,232,240,242,244,246,248,252,267,268,276,277,],[47,47,47,47,47,95,47,47,47,47,47,160,172,185,186,47,47,47,185,194,47,209,213,220,47,47,233,237,238,239,185,47,47,253,256,258,47,47,47,264,273,274,278,279,]),} _lr_goto = { } for _k, _v in _lr_goto_items.items(): for _x,_y in zip(_v[0],_v[1]): if not _x in _lr_goto: _lr_goto[_x] = { } _lr_goto[_x][_k] = _y del _lr_goto_items _lr_productions = [ ("S' -> input","S'",1,None,None,None), ('input -> maybe_nline statement','input',2,'p_input','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',19), ('input -> input statement','input',2,'p_input','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',20), ('statement -> simple_stmt newlines','statement',2,'p_statement','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',36), ('statement -> simple_stmt ; newlines','statement',3,'p_statement','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',37), ('statement -> compound_stmt','statement',1,'p_statement','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',38), ('simple_stmt -> small_stmt','simple_stmt',1,'p_simple_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',44), ('simple_stmt -> simple_stmt ; small_stmt','simple_stmt',3,'p_simple_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',45), ('statements -> statement','statements',1,'p_statements','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',55), ('statements -> statements statement','statements',2,'p_statements','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',56), ('small_stmt -> expr_stmt','small_stmt',1,'p_small_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',61), ('small_stmt -> print_stmt','small_stmt',1,'p_small_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',62), ('small_stmt -> break_stmt','small_stmt',1,'p_small_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',63), ('small_stmt -> next_stmt','small_stmt',1,'p_small_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',64), ('break_stmt -> BREAK','break_stmt',1,'p_break_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',68), ('next_stmt -> NEXT','next_stmt',1,'p_next_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',72), ('print_stmt -> PRINT expression','print_stmt',2,'p_print_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',76), ('expr_stmt -> testlist_star_expr','expr_stmt',1,'p_expr_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',84), ('expr_stmt -> testlist_star_expr AUGOP testlist_star_expr','expr_stmt',3,'p_expr_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',85), ('expr_stmt -> testlist_star_expr = testlist_star_expr','expr_stmt',3,'p_expr_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',86), ('expr_stmt -> fancy_drel_assignment_stmt','expr_stmt',1,'p_expr_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',87), ('testlist_star_expr -> expression','testlist_star_expr',1,'p_testlist_star_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',96), ('testlist_star_expr -> testlist_star_expr , maybe_nline expression','testlist_star_expr',4,'p_testlist_star_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',97), ('expression -> or_test','expression',1,'p_expression','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',107), ('or_test -> and_test','or_test',1,'p_or_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',115), ('or_test -> or_test OR and_test','or_test',3,'p_or_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',116), ('or_test -> or_test BADOR and_test','or_test',3,'p_or_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',117), ('and_test -> not_test','and_test',1,'p_and_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',122), ('and_test -> and_test AND not_test','and_test',3,'p_and_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',123), ('and_test -> and_test BADAND not_test','and_test',3,'p_and_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',124), ('not_test -> comparison','not_test',1,'p_not_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',129), ('not_test -> NOT not_test','not_test',2,'p_not_test','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',130), ('comparison -> a_expr','comparison',1,'p_comparison','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',135), ('comparison -> a_expr comp_operator a_expr','comparison',3,'p_comparison','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',136), ('comp_operator -> restricted_comp_operator','comp_operator',1,'p_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',142), ('comp_operator -> IN','comp_operator',1,'p_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',143), ('comp_operator -> NOT IN','comp_operator',2,'p_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',144), ('restricted_comp_operator -> <','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',150), ('restricted_comp_operator -> >','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',151), ('restricted_comp_operator -> GTE','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',152), ('restricted_comp_operator -> LTE','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',153), ('restricted_comp_operator -> NEQ','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',154), ('restricted_comp_operator -> ISEQUAL','restricted_comp_operator',1,'p_restricted_comp_operator','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',155), ('a_expr -> m_expr','a_expr',1,'p_a_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',159), ('a_expr -> a_expr + m_expr','a_expr',3,'p_a_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',160), ('a_expr -> a_expr - m_expr','a_expr',3,'p_a_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',161), ('m_expr -> u_expr','m_expr',1,'p_m_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',168), ('m_expr -> m_expr * u_expr','m_expr',3,'p_m_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',169), ('m_expr -> m_expr / u_expr','m_expr',3,'p_m_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',170), ('m_expr -> m_expr ^ u_expr','m_expr',3,'p_m_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',171), ('u_expr -> power','u_expr',1,'p_u_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',178), ('u_expr -> - u_expr','u_expr',2,'p_u_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',179), ('u_expr -> + u_expr','u_expr',2,'p_u_expr','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',180), ('power -> primary','power',1,'p_power','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',187), ('power -> primary POWER u_expr','power',3,'p_power','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',188), ('primary -> atom','primary',1,'p_primary','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',196), ('primary -> attributeref','primary',1,'p_primary','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',197), ('primary -> subscription','primary',1,'p_primary','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',198), ('primary -> slicing','primary',1,'p_primary','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',199), ('primary -> call','primary',1,'p_primary','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',200), ('atom -> ID','atom',1,'p_atom','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',205), ('atom -> item_tag','atom',1,'p_atom','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',206), ('atom -> literal','atom',1,'p_atom','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',207), ('atom -> enclosure','atom',1,'p_atom','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',208), ('item_tag -> ITEM_TAG','item_tag',1,'p_item_tag','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',213), ('literal -> stringliteral','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',217), ('literal -> INTEGER','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',218), ('literal -> HEXINT','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',219), ('literal -> OCTINT','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',220), ('literal -> BININT','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',221), ('literal -> REAL','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',222), ('literal -> IMAGINARY','literal',1,'p_literal','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',223), ('stringliteral -> STRPREFIX SHORTSTRING','stringliteral',2,'p_stringliteral','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',228), ('stringliteral -> STRPREFIX LONGSTRING','stringliteral',2,'p_stringliteral','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',229), ('stringliteral -> SHORTSTRING','stringliteral',1,'p_stringliteral','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',230), ('stringliteral -> LONGSTRING','stringliteral',1,'p_stringliteral','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',231), ('enclosure -> parenth_form','enclosure',1,'p_enclosure','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',236), ('enclosure -> string_conversion','enclosure',1,'p_enclosure','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',237), ('enclosure -> list_display','enclosure',1,'p_enclosure','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',238), ('enclosure -> table_display','enclosure',1,'p_enclosure','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',239), ('parenth_form -> OPEN_PAREN testlist_star_expr CLOSE_PAREN','parenth_form',3,'p_parenth_form','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',243), ('parenth_form -> OPEN_PAREN CLOSE_PAREN','parenth_form',2,'p_parenth_form','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',244), ('string_conversion -> ` testlist_star_expr `','string_conversion',3,'p_string_conversion','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',251), ('list_display -> [ maybe_nline listmaker maybe_nline ]','list_display',5,'p_list_display','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',256), ('list_display -> [ maybe_nline ]','list_display',3,'p_list_display','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',257), ('listmaker -> expression listmaker2','listmaker',2,'p_listmaker','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',265), ('listmaker2 -> , maybe_nline expression','listmaker2',3,'p_listmaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',270), ('listmaker2 -> listmaker2 , maybe_nline expression','listmaker2',4,'p_listmaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',271), ('listmaker2 -> ','listmaker2',0,'p_listmaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',272), ('table_display -> { maybe_nline tablemaker maybe_nline }','table_display',5,'p_table_display','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',282), ('table_display -> { maybe_nline }','table_display',3,'p_table_display','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',283), ('tablemaker -> stringliteral : expression tablemaker2','tablemaker',4,'p_tablemaker','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',290), ('tablemaker2 -> , maybe_nline stringliteral : expression','tablemaker2',5,'p_tablemaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',294), ('tablemaker2 -> tablemaker2 , maybe_nline stringliteral : expression','tablemaker2',6,'p_tablemaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',295), ('tablemaker2 -> ','tablemaker2',0,'p_tablemaker2','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',296), ('attributeref -> primary attribute_tag','attributeref',2,'p_attributeref','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',310), ('attribute_tag -> . ID','attribute_tag',2,'p_attribute_tag','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',314), ('attribute_tag -> REAL','attribute_tag',1,'p_attribute_tag','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',315), ('subscription -> primary [ expression ]','subscription',4,'p_subscription','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',322), ('slicing -> primary [ proper_slice ]','slicing',4,'p_slicing','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',326), ('slicing -> primary [ slice_list ]','slicing',4,'p_slicing','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',327), ('proper_slice -> short_slice','proper_slice',1,'p_proper_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',331), ('proper_slice -> long_slice','proper_slice',1,'p_proper_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',332), ('short_slice -> :','short_slice',1,'p_short_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',343), ('short_slice -> expression : expression','short_slice',3,'p_short_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',344), ('short_slice -> : expression','short_slice',2,'p_short_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',345), ('short_slice -> expression :','short_slice',2,'p_short_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',346), ('long_slice -> short_slice : expression','long_slice',3,'p_long_slice','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',355), ('slice_list -> slice_item','slice_list',1,'p_slice_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',362), ('slice_list -> slice_list , slice_item','slice_list',3,'p_slice_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',363), ('slice_item -> expression','slice_item',1,'p_slice_item','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',370), ('slice_item -> proper_slice','slice_item',1,'p_slice_item','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',371), ('call -> ID OPEN_PAREN CLOSE_PAREN','call',3,'p_call','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',375), ('call -> ID OPEN_PAREN argument_list CLOSE_PAREN','call',4,'p_call','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',376), ('argument_list -> func_arg','argument_list',1,'p_argument_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',386), ('argument_list -> argument_list , func_arg','argument_list',3,'p_argument_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',387), ('func_arg -> expression','func_arg',1,'p_func_arg','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',394), ('fancy_drel_assignment_stmt -> ID OPEN_PAREN dotlist CLOSE_PAREN','fancy_drel_assignment_stmt',4,'p_fancy_drel_assignment_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',398), ('dotlist -> . ID = expression','dotlist',4,'p_dotlist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',405), ('dotlist -> dotlist , . ID = expression','dotlist',6,'p_dotlist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',406), ('exprlist -> a_expr','exprlist',1,'p_exprlist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',413), ('exprlist -> exprlist , a_expr','exprlist',3,'p_exprlist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',414), ('id_list -> ID','id_list',1,'p_id_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',421), ('id_list -> id_list , ID','id_list',3,'p_id_list','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',422), ('compound_stmt -> if_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',433), ('compound_stmt -> if_else_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',434), ('compound_stmt -> for_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',435), ('compound_stmt -> do_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',436), ('compound_stmt -> loop_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',437), ('compound_stmt -> with_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',438), ('compound_stmt -> repeat_stmt','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',439), ('compound_stmt -> funcdef','compound_stmt',1,'p_compound_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',440), ('if_else_stmt -> if_stmt ELSE suite','if_else_stmt',3,'p_if_else_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',447), ('if_stmt -> IF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',6,'p_if_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',453), ('if_stmt -> if_stmt ELSEIF OPEN_PAREN expression CLOSE_PAREN maybe_nline suite','if_stmt',7,'p_if_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',454), ('suite -> statement','suite',1,'p_suite','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',473), ('suite -> { maybe_nline statements } maybe_nline','suite',5,'p_suite','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',474), ('for_stmt -> FOR id_list IN testlist_star_expr suite','for_stmt',5,'p_for_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',481), ('for_stmt -> FOR [ id_list ] IN testlist_star_expr suite','for_stmt',7,'p_for_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',482), ('loop_stmt -> loop_head suite','loop_stmt',2,'p_loop_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',489), ('loop_head -> LOOP ID AS ID','loop_head',4,'p_loop_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',495), ('loop_head -> LOOP ID AS ID : ID','loop_head',6,'p_loop_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',496), ('loop_head -> LOOP ID AS ID : ID restricted_comp_operator ID','loop_head',8,'p_loop_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',497), ('do_stmt -> do_stmt_head suite','do_stmt',2,'p_do_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',508), ('do_stmt_head -> DO ID = expression , expression','do_stmt_head',6,'p_do_stmt_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',515), ('do_stmt_head -> DO ID = expression , expression , expression','do_stmt_head',8,'p_do_stmt_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',516), ('repeat_stmt -> REPEAT suite','repeat_stmt',2,'p_repeat_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',525), ('with_stmt -> with_head maybe_nline suite','with_stmt',3,'p_with_stmt','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',529), ('with_head -> WITH ID AS ID','with_head',4,'p_with_head','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',533), ('funcdef -> FUNCTION ID OPEN_PAREN arglist CLOSE_PAREN suite','funcdef',6,'p_funcdef','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',537), ('arglist -> ID : list_display','arglist',3,'p_arglist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',541), ('arglist -> arglist , ID : list_display','arglist',5,'p_arglist','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',542), ('maybe_nline -> newlines','maybe_nline',1,'p_maybe_nline','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',549), ('maybe_nline -> empty','maybe_nline',1,'p_maybe_nline','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',550), ('newlines -> NEWLINE','newlines',1,'p_newlines','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',557), ('newlines -> newlines NEWLINE','newlines',2,'p_newlines','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',558), ('empty -> ','empty',0,'p_empty','/home/jrh/programs/CIF/pycifrw-git/pycifrw/drel/drel_ast_yacc.py',562), ] pycifrw-4.4/src/yapps3_compiled_rt.py000066400000000000000000000333111345362224200200110ustar00rootroot00000000000000# # Yapps 2 Runtime, part of Yapps 2 - yet another python parser system # Copyright 1999-2003 by Amit J. Patel # # This version of the Yapps 2 Runtime can be distributed under the # terms of the MIT open source license, either found in the LICENSE file # included with the Yapps distribution # or at # # # Modified for PyCIFRW by JRH to allow external scanner # # To maximize python3/python2 compatibility from __future__ import print_function from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import """ Detail of JRH modifications. The compiled module handles all token administration by itself, but does not deal with restrictions. It also effectively removes the context-sensitivity of Yapps, as it ignores restrictions, but these restrictions turn out to be unnecessary for CIF. Interestingly, the module scan function is never called directly from python. """ """Run time libraries needed to run parsers generated by Yapps. This module defines parse-time exception classes, a scanner class, a base class for parsers produced by Yapps, and a context class that keeps track of the parse stack. """ # TODO: it should be possible to embed yappsrt into the generated # grammar to make a standalone module. import sys, re # For normal installation this module is "CifFile.yapps3_compiled_rt" # and StarScan is an extension module within the parent CifFile module. if __name__.startswith('CifFile.'): try: from . import StarScan have_star_scan = True except ImportError: have_star_scan = False # Otherwise assume this is imported from the yapps3/yapps2.py script # that is executed from Makefile to generate YappsStarParser sources. else: assert __name__ == 'yapps3_compiled_rt', "Unexpected module name." assert sys.argv[0].endswith('yapps2.py'), ( "This should be reached only when running yapps2.py in Makefile.") have_star_scan = False class YappsSyntaxError(Exception): """When we run into an unexpected token, this is the exception to use""" def __init__(self, charpos=-1, msg="Bad Token", context=None): Exception.__init__(self) self.charpos = charpos self.msg = msg self.context = context def __str__(self): if self.charpos < 0: return 'SyntaxError' else: return 'SyntaxError@char%s(%s)' % (repr(self.charpos), self.msg) class NoMoreTokens(Exception): """Another exception object, for when we run out of tokens""" pass class Scanner: """Yapps scanner. The Yapps scanner can work in context sensitive or context insensitive modes. The token(i) method is used to retrieve the i-th token. It takes a restrict set that limits the set of tokens it is allowed to return. In context sensitive mode, this restrict set guides the scanner. In context insensitive mode, there is no restriction (the set is always the full set of tokens). """ def __init__(self, patterns, ignore, input, scantype="standard"): """Initialize the scanner. Parameters: patterns : [(terminal, uncompiled regex), ...] or None ignore : [terminal,...] input : string If patterns is None, we assume that the subclass has defined self.patterns : [(terminal, compiled regex), ...]. Note that the patterns parameter expects uncompiled regexes, whereas the self.patterns field expects compiled regexes. """ self.tokens = [] # [(begin char pos, end char pos, token name, matched text), ...] self.restrictions = [] self.input = input self.pos = 0 self.ignore = ignore self.scantype = scantype self.first_line_number = 1 if self.scantype == "flex" and have_star_scan: StarScan.prepare(input) self.scan = self.compiled_scan self.token = self.compiled_token self.__del__ = StarScan.cleanup elif self.scantype == "flex": print("WARNING: using Python scanner although C scanner requested") self.scantype = "standard" if self.scantype != "flex": self.scan = self.interp_scan self.token = self.interp_token if patterns is not None: # Compile the regex strings into regex objects self.patterns = [] for terminal, regex in patterns: self.patterns.append( (terminal, re.compile(regex)) ) def get_token_pos(self): """Get the current token position in the input text.""" return len(self.tokens) def get_char_pos(self): """Get the current char position in the input text.""" return self.pos def get_prev_char_pos(self, i=None): """Get the previous position (one token back) in the input text.""" if self.pos == 0: return 0 if i is None: i = -1 return self.tokens[i][0] def get_line_number(self): """Get the line number of the current position in the input text.""" # TODO: make this work at any token/char position return self.first_line_number + self.get_input_scanned().count('\n') def get_column_number(self): """Get the column number of the current position in the input text.""" s = self.get_input_scanned() i = s.rfind('\n') # may be -1, but that's okay in this case return len(s) - (i+1) def get_input_scanned(self): """Get the portion of the input that has been tokenized.""" return self.input[:self.pos] def get_input_unscanned(self): """Get the portion of the input that has not yet been tokenized.""" return self.input[self.pos:] def interp_token(self, i, restrict=None): """Get the i'th token in the input. If i is one past the end, then scan for another token. Args: restrict : [token, ...] or None; if restrict is None, then any token is allowed. You may call token(i) more than once. However, the restrict set may never be larger than what was passed in on the first call to token(i). """ if i == len(self.tokens): self.scan(restrict) if i < len(self.tokens): # Make sure the restriction is more restricted. This # invariant is needed to avoid ruining tokenization at # position i+1 and higher. if restrict and self.restrictions[i]: for r in restrict: if r not in self.restrictions[i]: raise NotImplementedError("Unimplemented: restriction set changed") return self.tokens[i] raise NoMoreTokens() def compiled_token(self,i,restrict=0): try: return StarScan.token(i) except IndexError: raise NoMoreTokens() def __repr__(self): """Print the last 10 tokens that have been scanned in""" output = '' if self.scantype != "flex": for t in self.tokens[-10:]: output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3])) else: out_tokens = StarScan.last_ten() for t in out_tokens: output = '%s\n (~line %s) %s = %s' % (output,t[0],t[2],repr(t[3])) return output def interp_scan(self, restrict): """Should scan another token and add it to the list, self.tokens, and add the restriction to self.restrictions""" # Prepare accepted pattern list if restrict: # only patterns in the 'restrict' parameter or in self.ignore # are accepted accepted_patterns=[] for p_name, p_regexp in self.patterns: if p_name not in restrict and p_name not in self.ignore: pass else: accepted_patterns.append((p_name,p_regexp)) else: # every pattern is good accepted_patterns=self.patterns # Keep looking for a token, ignoring any in self.ignore while 1: # Search the patterns for the longest match, with earlier # tokens in the list having preference best_match = -1 best_pat = '(error)' for p,regexp in accepted_patterns: m = regexp.match(self.input, self.pos) if m and len(m.group(0)) > best_match: # We got a match that's better than the previous one best_pat = p best_match = len(m.group(0)) # If we didn't find anything, raise an error if best_pat == '(error)' and best_match < 0: msg = 'Bad Token' if restrict: msg = 'Trying to find one of '+', '.join(restrict) raise YappsSyntaxError(self.pos, msg) # If we found something that isn't to be ignored, return it if best_pat not in self.ignore: # Create a token with this data token = (self.pos, self.pos+best_match, best_pat, self.input[self.pos:self.pos+best_match]) self.pos = self.pos + best_match # Only add this token if it's not in the list # (to prevent looping) if not self.tokens or token != self.tokens[-1]: self.tokens.append(token) self.restrictions.append(restrict) return else: # This token should be ignored .. self.pos = self.pos + best_match def compiled_scan(self,restrict): token = StarScan.scan() print("Calling compiled scan, got %s" % repr(token)) if token[2] not in restrict: msg = "Bad Token" if restrict: msg = "Trying to find one of " + ", ".join(restrict) raise YappsSyntaxError(self.pos,msg) self.tokens.append(token) self.restrictions.append(restrict) return class Parser: """Base class for Yapps-generated parsers. """ def __init__(self, scanner): self._scanner = scanner self._pos = 0 def _peek(self, *types): """Returns the token type for lookahead; if there are any args then the list of args is the set of token types to allow""" tok = self._scanner.token(self._pos, types) return tok[2] def _scan(self, type): """Returns the matched text, and moves to the next token""" tok = self._scanner.token(self._pos, [type]) if tok[2] != type: raise YappsSyntaxError(tok[0], 'Trying to find '+type+' :'+ ' ,') self._pos = 1 + self._pos return tok[3] class Context: """Class to represent the parser's call stack. Every rule creates a Context that links to its parent rule. The contexts can be used for debugging. """ def __init__(self, parent, scanner, tokenpos, rule, args=()): """Create a new context. Args: parent: Context object or None scanner: Scanner object pos: integer (scanner token position) rule: string (name of the rule) args: tuple listing parameters to the rule """ self.parent = parent self.scanner = scanner self.tokenpos = tokenpos self.rule = rule self.args = args def __str__(self): output = '' if self.parent: output = str(self.parent) + ' > ' output += self.rule return output # # Note that this sort of error printout is useless with the # compiled scanner # def print_line_with_pointer(text, p): """Print the line of 'text' that includes position 'p', along with a second line with a single caret (^) at position p""" # TODO: separate out the logic for determining the line/character # location from the logic for determining how to display an # 80-column line to stderr. # Now try printing part of the line text = text[max(p-80, 0):p+80] p = p - max(p-80, 0) # Strip to the left i = text[:p].rfind('\n') j = text[:p].rfind('\r') if i < 0 or (0 <= j < i): i = j if 0 <= i < p: p = p - i - 1 text = text[i+1:] # Strip to the right i = text.find('\n', p) j = text.find('\r', p) if i < 0 or (0 <= j < i): i = j if i >= 0: text = text[:i] # Now shorten the text while len(text) > 70 and p > 60: # Cut off 10 chars text = "..." + text[10:] p = p - 7 # Now print the string, along with an indicator print('> ',text,file=sys.stderr) print('> ',' '*p + '^',file=sys.stderr) def print_error(input, err, scanner): """Print error messages, the parser stack, and the input text -- for human-readable error messages.""" # NOTE: this function assumes 80 columns :-( # Figure out the line number line_number = scanner.get_line_number() column_number = scanner.get_column_number() print('%d:%d: %s' % (line_number, column_number, err.msg),file=sys.stderr) context = err.context if not context: print_line_with_pointer(input, err.charpos) while context: # TODO: add line number print('while parsing %s%s:' % (context.rule, tuple(context.args)),file=sys.stderr) print_line_with_pointer(input, context.scanner.get_prev_char_pos(context.tokenpos)) context = context.parent def wrap_error_reporter(parser, rule): try: return getattr(parser, rule)() except YappsSyntaxError as e: input = parser._scanner.input print_error(input, e, parser._scanner) except NoMoreTokens: print('Could not complete parsing; stopped around here:',file=sys.stderr) print(parser._scanner,file=sys.stderr)