pax_global_header00006660000000000000000000000064141240021420014500gustar00rootroot0000000000000052 comment=fb0125ebc74a31cbbaeaf95e2e066765c30b9ee8 patsy-0.5.2/000077500000000000000000000000001412400214200126445ustar00rootroot00000000000000patsy-0.5.2/.coveragerc000066400000000000000000000001471412400214200147670ustar00rootroot00000000000000[run] branch=True source=patsy [report] exclude_lines = pragma: no cover ^def test_ precision = 1 patsy-0.5.2/.github/000077500000000000000000000000001412400214200142045ustar00rootroot00000000000000patsy-0.5.2/.github/workflows/000077500000000000000000000000001412400214200162415ustar00rootroot00000000000000patsy-0.5.2/.github/workflows/publish.yml000066400000000000000000000011641412400214200204340ustar00rootroot00000000000000name: Publish tagged releases to PyPI on: push: tags: - "v*" jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Set up Python uses: actions/setup-python@v1 with: python-version: '3.7' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel twine upload dist/* patsy-0.5.2/.github/workflows/tox.yml000066400000000000000000000017721412400214200176050ustar00rootroot00000000000000name: Run Tox Tests on: push: branches: - "*" jobs: build: runs-on: ubuntu-latest strategy: max-parallel: 4 matrix: python-version: [2.7, 3.6, 3.7, 3.8, 3.9] pandas-presence: ['with_pandas', 'without_pandas'] env: PYTHON_VERSION: ${{ matrix.python-version }} PANDAS_PRESENCE: ${{ matrix.pandas-presence }} steps: - uses: actions/checkout@v1 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install tox - name: Test with tox run: | PYTHON_ENV="py${PYTHON_VERSION//./}" tox -e "${PYTHON_ENV}-${PANDAS_PRESENCE}" - name: Upload coverage to Codecov uses: codecov/codecov-action@v1.0.10 with: file: ./coverage.xml flags: unittests env_vars: PYTHON_VERSION,PANDAS_PRESENCE patsy-0.5.2/.gitignore000066400000000000000000000020211412400214200146270ustar00rootroot00000000000000# Project specific files # ########################## .coverage htmlcov/ .tox # Generated by doc build doc/_static/basis-*.png doc/savefig/ # Cribbed from numpy's .gitignore: # Editor temporary/working/backup files # ######################################### .#* [#]*# *~ *$ *.bak *.diff *.org .project *.rej .settings/ .*.sw[nop] .sw[nop] *.tmp # Compiled source # ################### *.a *.com *.class *.dll *.exe *.o *.py[ocd] *.so # Packages # ############ # it's better to unpack these files and commit the raw source # git has its own built in compression methods *.7z *.bz2 *.bzip2 *.dmg *.gz *.iso *.jar *.rar *.tar *.tbz2 *.tgz *.zip # Python files # ################ # setup.py working directory build # sphinx build directory _build # setup.py dist directory dist doc/build doc/cdoc/build # Egg metadata *.egg-info # The shelf plugin uses this dir ./.shelf # Logs and databases # ###################### *.log *.sql *.sqlite # OS generated files # ###################### .gdb_history .DS_Store? ehthumbs.db Icon? Thumbs.db patsy-0.5.2/CODE_OF_CONDUCT.md000066400000000000000000000045171412400214200154520ustar00rootroot00000000000000# Contributor Code of Conduct As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality. Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery * Personal attacks * Trolling or insulting/derogatory comments * Public or private harassment * Publishing other's private information, such as physical or electronic addresses, without explicit permission * Other unethical or unprofessional conduct Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. By adopting this Code of Conduct, project maintainers commit themselves to fairly and consistently applying these principles to every aspect of managing this project. Project maintainers who do not follow or enforce the Code of Conduct may be permanently removed from the project team. This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting a project maintainer at njs@pobox.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. Maintainers are obligated to maintain confidentiality with regard to the reporter of an incident. This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.3.0, available at [http://contributor-covenant.org/version/1/3/0/][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/3/0/ patsy-0.5.2/LICENSE.txt000066400000000000000000000103311412400214200144650ustar00rootroot00000000000000The bulk of Patsy is distributed under a simple 2-clause BSD license: Copyright (C) 2011-2012, Patsy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The module patsy.compat contains code derived from the Python standard library, and is covered by the following license: PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. As per item (3), we are required to provide a brief summary of changes. For this, see comments in patsy/compat.py. patsy-0.5.2/MANIFEST.in000066400000000000000000000002461412400214200144040ustar00rootroot00000000000000include setup.cfg .coveragerc tox.ini include TODO LICENSE.txt README.md CODE_OF_CONDUCT.md recursive-include tools *.py *.R recursive-include doc * prune doc/_build patsy-0.5.2/README.md000066400000000000000000000041121412400214200141210ustar00rootroot00000000000000# Patsy **Notice:** `patsy` is no longer under active development. As of August 2021, Matthew Wardrop (@matthewwardrop) and Tomás Capretto (@tomicapretto) have taken on responsibility from Nathaniel Smith (@njsmith) for keeping the lights on, but no new feature development is planned. The spiritual successor of this project is [Formulaic](https://github.com/matthewwardrop/formulaic), and we recommend those interested in new feature development contribute there. Those whose use-cases continue to be met by `patsy` can continue using this package with increased confidence that things will continue to work as is for the foreseeable future. --- Patsy is a Python library for describing statistical models (especially linear models, or models that have a linear component) and building design matrices. Patsy brings the convenience of [R](http://www.r-project.org/) "formulas" to Python. [![PyPI - Version](https://img.shields.io/pypi/v/patsy.svg)](https://pypi.org/project/spec-classes/) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/patsy.svg) ![https://patsy.readthedocs.io/](https://img.shields.io/badge/docs-read%20now-blue.svg) ![PyPI - Status](https://img.shields.io/pypi/status/patsy.svg) ![https://travis-ci.org/pydata/patsy](https://travis-ci.org/pydata/patsy.png?branch=master) ![https://coveralls.io/r/pydata/patsy?branch=master](https://coveralls.io/repos/pydata/patsy/badge.png?branch=master) ![https://doi.org/10.5281/zenodo.592075](https://zenodo.org/badge/DOI/10.5281/zenodo.592075.svg) - **Documentation:** - **Downloads:** - **Code and issues:** - **Mailing list:** () ## Dependencies * Python (2.6, 2.7, or 3.3+) * six * numpy * Optional: * pytest/pytest-cov: needed to run tests * scipy: needed for spline-related functions like ``bs`` ## Installation ``pip install patsy`` (or, for traditionalists: ``python setup.py install``) ## License 2-clause BSD, see LICENSE.txt for details. patsy-0.5.2/TODO000066400000000000000000000346411412400214200133440ustar00rootroot00000000000000* Add missing data handling to the just-pass-in-a-matrix bit of the high-level API * Add parallel array handling to build_design_matrices * Add parallel array handling of some sort to high-level API... * Refactor build so that there are two stages - first stage takes a set of factor evaluators, and returns a set of evaluated columns - second stage handles interactions and categorical coding and assembles these together into design matrices use case: any model where you actually want to get categorical data out (like multinomial regression with a factor on the LHS, or CART with factors on the right-hand side) ** first stage should also handle other "parallel" data, like weights, which need to participate in the missingness calculations ** possibly also support a "subset=" argument at this stage ** and for parallel vectors and subset=, allow a string as a value, and if seen then evaluate it as python code in the same context as formula data (like R's subset=(MyCol > 10)) ** And do NaN/mask/missing data handling at this stage *** Imputation? *** numpy.ma * Better NaN/masks/missing data handling in transforms. I think the current ones will just blow up if there are any NaNs. (The previous entry is about handling the term "x" where x has NAs; this entry is about handling "center(x)" where x has NAs.) R's solution to this is that scale(x) simply unconditionally ignores NAs when computing the mean, regardless of the overall setting of na.action. That seems reasonable... * Advocacy Potential users? - statsmodels - PyMC has a (closed) ticket requesting such features: http://code.google.com/p/pymc/issues/detail?id=162 - nipy, though they have their own thing... - sklearn, which has regression (and might find it useful otherwise!) * Do something smarter with mismatched pandas indexes Right now we're conservative -- if you do ~ x + y and x and y don't have identical indexes, then that's an error. It's possible we should do something cleverer, though. Perhaps we should merge them somehow (pandas.concat(..., join="outer")?). (This of course would require that *all* items have indexes, though; right now you can mix plain ndarrays and pandas objects.) * Improve EvalFactor's stateful transform handling to follow . lookups right now it can only detect stateful transforms when they are called directly like scale(x) but not if referenced through some module like mylib.scale(x) In general we don't even want to try handling every possible function lookup syntax (see next item for a safety check for that), but we should allow for people to distribute non-builtin stateful transforms. * As a safety check for non-stateful transforms, we should always evaluate each formula on just the first row of data alone, and make sure the result matches what we got when evaluating it vectorized (i.e., confirm f(x[0]) == f(x)[0], where f is our transform. However, this is kind of tricky given that x might be pulled out of the environment, the 'data' dict might have arbitrary objects, etc. Hmm. Maybe intercept variable lookups and just munge those? This is easy to do if someone's passing in a structured array or dataframe and pulling all their data from it, or even if they use a dict with well-behaved columns. But the problem is when people do things like: In [1]: logx = np.log(data["x"]) # refers to data["y"] and logx together In [2]: lm("y ~ logx", data) * More contrast tools - Some sort of symbolic tools for user-defined contrasts -- take the comparisons that people want to compute in terms of linear combinations of level names, convert that to a matrix and do the pinv dance? We have the linear_contrast code already, but that's for describing constraints in terms of the coefficients you have -- it seems like people want to be able to describe constraints in terms of... I'm not sure what. Group means? The coefficients they could have had if they'd fit some other model? (Presumably the all-full-rank-dummy-coding-all-the-time model.) If I can ever figure out what this is (it has something to do with "estimable contrasts") then I'll implement it. - Short-hands for Type II, Type III, and "remove this term and everything marginal to it" contrast tests? Might need to figure out the trick that car::Anova uses to do efficient Type II tests with two contrast matrices. - Understand how coding matters for Type-III ANOVA. The tabs I had open last time I was looking at this: http://goanna.cs.rmit.edu.au/~fscholer/anova.php http://www.mail-archive.com/r-help@stat.math.ethz.ch/msg69781.html https://stat.ethz.ch/pipermail/r-help/2007-October/143047.html http://www.uni-kiel.de/psychologie/dwoll/r/ssTypes.php * A good way to support magic functions like mgcv's s(). statsmodels wants this for things like y ~ arima(2, 3) y ~ garch(1, 1) the cheap trick way of doing it is: class ArimaModelType(object): __patsy_magic__ = True ... def arima(n, m): return ArimaModelType(n, m) and then in the factor type sniffing code detect these things and separate them out from "real" factors. * make sure that pickling works - And make sure that if we allow it at all, then it's sustainable! i.e. we'll be able to guarantee that if people pickle a ModelDesc or Design or whatever now, then they'll be able to get it back later. * Should EvalEnvironment.capture make a copy of the scope dictionaries? - The effect would be to prevent later changes in the enclosing scope from affecting predictions. Of course, we probably don't want to make a *deep* copy of the scope, so there's still no guarantees -- changes to mutable objects within that scope would still be visible. Perhaps we *could* get away with making a deep copy of all mutable objects that are accessed during the initial build, though... I think we'd need to special-case and ignore any READONLY ndarrays, as a safety valve for people who have a giant data-set they're referring to. of course, even a deep copy isn't enough -- they could call an immutable function which accesses mutable state. - Josef points out that in long-running REPLs people often need to del local variables to let memory be released, and if all their formulas are going and making shallow copies of the environment then this will be impossible. So making a shallow copy is probably out. - The other approach would be to extend the state dependency checking that we already want to do (to catch undeclared stateful transforms), and have it not only check that building an isolated row of data gives the same result as building the full list, but also that re-building that same row later at prediction time gives the same result as it did in the first place. * Export information on which terms are marginal to which other ones Marginality only makes sense within a numeric-interaction "bucket", so this has to be computed in patsy.build and exported as part of DesignMatrixColumnInfo. Then it can be used for Type II tests. * Some way to specify the default contrast * Support for R's magic "." term - The "y ~ everything else" form - The "what I had in this other ModelDesc" form (e.g., "y ~ . - a" to drop the 'a' predictor from an old model) - This will require that the formula->ModelDesc have access to the data or previous formula... * More stateful transforms: - Splines - 'cut': numeric->factor by quantile dichotimization - Orthogonal polynomials - 'code': takes a Categorical (or coerces to one), and optionally a contrast, and and does the standard contrast-coding. And possibly this should replace _CatFactorEvaluator... * Support for building sparse model matrices directly. (This should be pretty straightforward when it comes to exploiting the intrinsic sparsity of categorical factors; numeric factors that evaluate to a sparse matrix directly might be slightly more complicated.) * Real testing/support for formula syntax extensions The tricky part here is making sure we produce something useful. Use cases: - multinomial log-linear modelling - see below Prior art: - R package "lmer" interpets formulas like y ~ x1 + x2 + (1 | foo) + (1 + x | bar) - The R [[http://cran.r-project.org/web/packages/Formula/vignettes/Formula.pdf][Formula]] package, which has two features: - you can write multivariate responses, like y1 + y2 ~ ... (in stock R, this is interpreted as addition (!)). - you can write multiple "parts" on each side, separated by |. Basically these are treated as a list of design matrix specifications, and there are ways to pull out the first, second etc. on each side. - R package "plm" uses Formula to allow formulas like: y ~ x1 + x2 y ~ x1 + x2 | x3 y ~ x1 + x2 | . + x3 where the second part specifies "instrumental variables". I can't tell if the second part has an implicit intercept. - R package "frontier" uses Formula in a similar way, allowing formulas like y ~ x1 + x2 y ~ x1 + x2 | x3 where the first form computes a "error components frontier" and the latter computes an "efficiency effects frontier" (where the part after the | are regresses "used to explain the efficiency levels (Z variables)"). The part after the bar does have an implicit intercept. - package AER uses this in its "ivreg" command, which seems similar to plm. An example makes clear that "y ~ . | x1 + x2" works, and presumably the "." means the same thing as it would in "y ~ ." for lm. - package betareg does "beta regression", and a formula like "y ~ x1 | x2" states that "x1" should be used for the "mean submodel" and "x2" should be used for the "precision submodel". Its betatree function extends this further to "y ~ x1 | x2 | c1 + c2" where "c1", "c2" are "partitioning variables". AFAICT this means that it does basically does a CART-style tree division of the data based on c1, c2, and then fits beta regression models x1 | x2 on each subset. - package "fdaMixed" uses formulas like Y | id ~ fixed | random where Y is a response variable, id is "a factor separating the samples", and fixed and random are linear models for the fixed and random effects. The 'id' part seems to be used to match multiple samples from the same random effects group? - package "growcurves" allows "y ~ fixed | random". If there is no |, then there is a second argument (random.only) which is consulted to determine whether the sole RHS argument is fixed or random. (Maybe 'y ~ x1 + x2 + random(x3 + x3)' would be a better syntax?) - package "games" uses a syntax like "y ~ x1 + x2 | 0 | x3 | z". There is another version with 8 entries instead of 4. - package "metafor" does effect-size calculations using the syntax "outcome ~ group | study" where each entry has to be a 2-level factor. (And the 'weights' argument gives the actual numbers.) - package "mhurdle" seems to describe a kind of multi-step process via three-part formulas y ~ x1 | x2 | x3 where "the first part describes the selection process if any, the second part the regression equation, and the third part the purchase infrequency process". You can fill in 0 if you want to assume that some process doesn't actually apply (or leave out the last one altogether). - package "mlogit" uses three-part RHS formulas to specify different parts of a multinomial logit model. "the first one contains the alternative specific variables with generic coefficient, i.e. a unique coefficient for all the alternatives; the second one contains the individual specific variables for which one coefficient is estimated for all the alternatives except one of them ; the third one contains the alternative specific variables with alternative specific coefficients...If a standard formula is writen, it is assumed that there are only alternative specific variables with generic coefficients." The second RHS termlist has an intercept by default; for the other two termlists any intercept is ignored in any case. - package "polywog" does some clever polynomial basis function fitting thing, and uses formulas like y ~ x1 + x2 | z1 + z2 to mean basically the equivalent of y ~ x1*x2 + z1 + z2 i.e., the first termlist gets a super-rich non-linear interaction between all its entries, and the second is just entered linearly. * Currently we don't distinguish between ordered and unordered categorical data. Should that change? * how should redundancy elimination and explicit factor matrices interact? Example: If you do 1 + C(a, mat):C(b, mat), then currently it will expand that to 1 + C(a, mat) + C(a, mat):C(b, mat), which is going to be weird. Probably we should notice that the .contrast attribute in these cases does not give us the option of full- versus reduced-rank coding, and in redundancy.py we should note that such factors cannot be "expanded". * Profiling/optimization. There are lots of places where I use lazy quadratic algorithms (or even exponential, in the case of the non-redundant coding stuff). Perhaps worse is the heavy multiplication used unconditionally to load data into the model matrix. I'm pretty sure that at least most of the quadratic stuff doesn't matter because it's n^2 where n is something like the number of factors in an interaction term (and who has hundreds of factors interacting in one term?), but it wouldn't hurt to run some profiles to check. I think really what I mean is just, run timeit on a 10-variable interaction to make sure it isn't completely annoying. * Possible optimization: let a stateful transform's memorize_chunk function raise Stateless to indicate that actually, ha-ha, it turns out that it doesn't need to memorize anything after all (b/c the relevant data turns out to be specified explicitly in *args, **kwargs). Actually, this would be really useful for things like splines, which need to do expensive quantile estimation, but not if knots are specified. Another use case: C(something_that's_already_categorical, contrast=...). Note that this can't be detected until we do the first round of evaluation. A better interface would be memorize_needed(self, *args, **kwargs). I guess we could even have memorize_passes_needed, but eh... * Wacky idea: make factors into an actual stateful transform (one that takes a dict-like object and returns a matrix or Categorical) This would require: - adding memorize_passes support to stateful transforms - moving the factor memorization state inside an object (so it wouldn't be factors that would be stateful transforms, factors would be factories for stateful transforms) patsy-0.5.2/coverage.xml000066400000000000000000004414171412400214200151740ustar00rootroot00000000000000 /home/matthew/Repositories/github/patsy/patsy patsy-0.5.2/doc/000077500000000000000000000000001412400214200134115ustar00rootroot00000000000000patsy-0.5.2/doc/API-reference.rst000066400000000000000000000130321412400214200165070ustar00rootroot00000000000000``patsy`` API reference ========================== This is a complete reference for everything you get when you `import patsy`. .. module:: patsy .. ipython:: python :suppress: from patsy import * Basic API --------- .. autofunction:: dmatrix .. autofunction:: dmatrices .. autofunction:: incr_dbuilders .. autofunction:: incr_dbuilder .. autoexception:: PatsyError :members: Convenience utilities --------------------- .. autofunction:: balanced .. autofunction:: demo_data Design metadata --------------- .. autoclass:: DesignInfo Here's an example of the most common way to get a :class:`DesignInfo`: .. ipython:: python mat = dmatrix("a + x", demo_data("a", "x", nlevels=3)) di = mat.design_info .. attribute:: column_names The names of each column, represented as a list of strings in the proper order. Guaranteed to exist. .. ipython:: python di.column_names .. attribute:: column_name_indexes An :class:`~collections.OrderedDict` mapping column names (as strings) to column indexes (as integers). Guaranteed to exist and to be sorted from low to high. .. ipython:: python di.column_name_indexes .. attribute:: term_names The names of each term, represented as a list of strings in the proper order. Guaranteed to exist. There is a one-to-many relationship between columns and terms -- each term generates one or more columns. .. ipython:: python di.term_names .. attribute:: term_name_slices An :class:`~collections.OrderedDict` mapping term names (as strings) to Python :func:`slice` objects indicating which columns correspond to each term. Guaranteed to exist. The slices are guaranteed to be sorted from left to right and to cover the whole range of columns with no overlaps or gaps. .. ipython:: python di.term_name_slices .. attribute:: terms A list of :class:`Term` objects representing each term. May be None, for example if a user passed in a plain preassembled design matrix rather than using the Patsy machinery. .. ipython:: python di.terms [term.name() for term in di.terms] .. attribute:: term_slices An :class:`~collections.OrderedDict` mapping :class:`Term` objects to Python :func:`slice` objects indicating which columns correspond to which terms. Like :attr:`terms`, this may be None. .. ipython:: python di.term_slices .. attribute:: factor_infos A dict mapping factor objects to :class:`FactorInfo` objects providing information about each factor. Like :attr:`terms`, this may be None. .. ipython:: python di.factor_infos .. attribute:: term_codings An :class:`~collections.OrderedDict` mapping each :class:`Term` object to a list of :class:`SubtermInfo` objects which together describe how this term is encoded in the final design matrix. Like :attr:`terms`, this may be None. .. ipython:: python di.term_codings .. attribute:: builder In versions of patsy before 0.4.0, this returned a ``DesignMatrixBuilder`` object which could be passed to :func:`build_design_matrices`. Starting in 0.4.0, :func:`build_design_matrices` now accepts :class:`DesignInfo` objects directly, and writing ``f(design_info.builder)`` is now a deprecated alias for simply writing ``f(design_info)``. A number of convenience methods are also provided that take advantage of the above metadata: .. automethod:: describe .. automethod:: linear_constraint .. automethod:: slice .. automethod:: subset .. automethod:: from_array .. autoclass:: FactorInfo .. autoclass:: SubtermInfo .. autoclass:: DesignMatrix .. automethod:: __new__ .. _stateful-transforms-list: Stateful transforms ------------------- Patsy comes with a number of :ref:`stateful transforms ` built in: .. autofunction:: center .. autofunction:: standardize .. function:: scale(x, center=True, rescale=True, ddof=0) An alias for :func:`standardize`, for R compatibility. Finally, this is not itself a stateful transform, but it's useful if you want to define your own: .. autofunction:: stateful_transform .. _categorical-coding-ref: Handling categorical data ------------------------- .. autoclass:: Treatment .. autoclass:: Diff .. autoclass:: Poly .. autoclass:: Sum .. autoclass:: Helmert .. autoclass:: ContrastMatrix Spline regression ----------------- .. autofunction:: bs .. autofunction:: cr .. autofunction:: cc .. autofunction:: te Working with formulas programmatically -------------------------------------- .. autoclass:: Term .. data:: INTERCEPT This is a pre-instantiated zero-factors :class:`Term` object representing the intercept, useful for making your code clearer. Do remember though that this is not a singleton object, i.e., you should compare against it using ``==``, not ``is``. .. autoclass:: LookupFactor .. autoclass:: EvalFactor .. autoclass:: ModelDesc Working with the Python execution environment --------------------------------------------- .. autoclass:: EvalEnvironment :members: Building design matrices ------------------------ .. autofunction:: design_matrix_builders .. autofunction:: build_design_matrices Missing values -------------- .. autoclass:: NAAction :members: Linear constraints ------------------ .. autoclass:: LinearConstraint Origin tracking --------------- .. autoclass:: Origin :members: patsy-0.5.2/doc/Makefile000066400000000000000000000056771412400214200150700ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d _build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf _build/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html @echo @echo "Build finished. The HTML pages are in _build/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) _build/dirhtml @echo @echo "Build finished. The HTML pages are in _build/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) _build/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) _build/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) _build/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in _build/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) _build/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in _build/qthelp, like this:" @echo "# qcollectiongenerator _build/qthelp/scikitssparse.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile _build/qthelp/scikitssparse.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) _build/latex @echo @echo "Build finished; the LaTeX files are in _build/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) _build/changes @echo @echo "The overview file is in _build/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) _build/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in _build/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) _build/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in _build/doctest/output.txt." patsy-0.5.2/doc/R-comparison.rst000066400000000000000000000157661412400214200165330ustar00rootroot00000000000000.. _R-comparison: Differences between R and Patsy formulas =========================================== .. currentmodule:: patsy Patsy has a very high degree of compatibility with R. Almost any formula you would use in R will also work in Patsy -- with a few caveats. .. note:: All R quirks described herein were last verified with R 2.15.0. Differences from R: - Most obviously, we both support using arbitrary code to perform variable transformations, but in Patsy this code is written in Python, not R. - Patsy has no ``%in%``. In R, ``a %in% b`` is identical to ``b:a``. Patsy only supports the ``b:a`` version of this syntax. - In Patsy, only ``**`` can be used for exponentiation. In R, both ``^`` and ``**`` can be used for exponentiation, i.e., you can write either ``(a + b)^2`` or ``(a + b)**2``. In Patsy (as in Python generally), only ``**`` indicates exponentiation; ``^`` is ignored by the parser (and if present, will be interpreted as a call to the Python binary XOR operator). - In Patsy, the left-hand side of a formula uses the same evaluation rules as the right-hand side. In R, the left hand side is treated as R code, so a formula like ``y1 + y2 ~ x1 + x2`` actually regresses the *sum* of ``y1`` and ``y2`` onto the *set of predictors* ``x1`` and ``x2``. In Patsy, the only difference between the left-hand side and the right-hand side is that there is no automatic intercept added to the left-hand side. (In this regard Patsy is similar to the R enhanced formula package `Formula `_.) - Patsy produces a different column ordering for formulas involving numeric predictors. In R, there are two rules for term ordering: first, lower-order interactions are sorted before higher-order interactions, and second, interactions of the same order are listed in whatever order they appeared in the formula. In Patsy, we add another rule: terms are first grouped together based on which numeric factors they include. Then within each group, we use the same ordering as R. - Patsy has more rigorous handling of the presence or absence of the intercept term. In R, the rules for when deciding whether to include an intercept are somewhat idiosyncratic and can ignore things like parentheses. To understand the difference, first consider the formula ``a + (b - a)``. In both Patsy and R, we first evaluate the ``(b - a)`` part; since there is no ``a`` term to remove, this simplifies to just ``b``. We then evaluate ``a + b``: the end result is a model which contains an ``a`` term in it. Now consider the formula ``1 + (b - 1)``. In Patsy, this is analogous to the case above: first ``(b - 1)`` is reduced to just ``b``, and then ``1 + b`` produces a model with intercept included. In R, the parentheses are ignored, and ``1 + (b - 1)`` gives a model that does *not* include the intercept. This can be slightly more confusing when it comes to the implicit intercept term. In Patsy, this is handled exactly as if the right-hand side of each formula has an invisible ``"1 +"`` inserted at the beginning. Therefore in Patsy, these formulas are different:: # Python: dmatrices("y ~ b - 1") # equivalent to 1 + b - 1: no intercept dmatrices("y ~ (b - 1)") # equivalent to 1 + (b - 1): has intercept In R, these two formulas are equivalent. - Patsy has a more accurate algorithm for deciding whether to use a full- or reduced-rank coding scheme for categorical factors. There are two situations in which R's coding algorithm for categorical variables can become confused and produce over- or under-specified model matrices. Patsy, so far as we are aware, produces correctly specified matrices in all cases. It's unlikely that you'll run into these in actual usage, but they're worth mentioning. To illustrate, let's define ``a`` and ``b`` as categorical predictors, each with 2 levels: .. code-block:: rconsole # R: > a <- factor(c("a1", "a1", "a2", "a2")) > b <- factor(c("b1", "b2", "b1", "b2")) .. ipython:: python :suppress: a = ["a1", "a1", "a2", "a2"] b = ["b1", "b2", "b1", "b2"] from patsy import dmatrix The first problem occurs for formulas like ``1 + a:b``. This produces a model matrix with rank 4, just like many other formulas that include ``a:b``, such as ``0 + a:b``, ``1 + a + a:b``, and ``a*b``: .. code-block:: rconsole # R: > qr(model.matrix(~ 1 + a:b))$rank [1] 4 However, the matrix produced for this formula has 5 columns, meaning that it contains redundant overspecification: .. code-block:: rconsole # R: > mat <- model.matrix(~ 1 + a:b) > ncol(mat) [1] 5 The underlying problem is that R's algorithm does not pay attention to 'non-local' redundancies -- it will adjust its coding to avoid a redundancy between two terms of degree-n, or a term of degree-n and one of degree-(n+1), but it is blind to a redundancy between a term of degree-n and one of degree-(n+2), as we have here. Patsy's algorithm has no such limitation: .. ipython:: python # Python: a = ["a1", "a1", "a2", "a2"] b = ["b1", "b2", "b1", "b2"] mat = dmatrix("1 + a:b") mat.shape[1] To produce this result, it codes ``a:b`` uses the same columns that would be used to code ``b + a:b`` in the formula ``"1 + b + a:b"``. The second problem occurs for formulas involving numeric predictors. Effectively, when determining coding schemes, R assumes that all factors are categorical. So for the formula ``0 + a:c + a:b``, R will notice that if it used a full-rank coding for the ``c`` and ``b`` factors, then both terms would be collinear with ``a``, and thus each other. Therefore, it encodes ``c`` with a full-rank encoding, and uses a reduced-rank encoding for ``b``. (And the ``0 +`` lets it avoid the previous bug.) So far, so good. But now consider the formula ``0 + a:x + a:b``, where ``x`` is numeric. Here, ``a:x`` and ``a:b`` will not be collinear, even if we do use a full-rank encoding for ``b``. Therefore, we *should* use a full-rank encoding for ``b``, and produce a model matrix with 6 columns. But in fact, R gives us only 4: .. code-block:: rconsole # R: > x <- c(1, 2, 3, 4) > mat <- model.matrix(~ 0 + a:x + a:b) > ncol(mat) [1] 4 The problem is that it cannot tell the difference between ``0 + a:x + a:b`` and ``0 + a:c + a:b``: it uses the same coding for both, whether it's appropriate or not. (The alert reader might wonder whether this bug could be triggered by a simpler formula, like ``0 + x + b``. It turns out that R's code ``do_modelmatrix`` function has a special-case where for first-order interactions only, it *will* peek at the type of the data before deciding on a coding scheme.) Patsy always checks whether each factor is categorical or numeric before it makes coding decisions, and thus handles this case correctly: .. ipython:: python # Python: x = [1, 2, 3, 4] mat = dmatrix("0 + a:x + a:b") mat.shape[1] patsy-0.5.2/doc/_examples/000077500000000000000000000000001412400214200153665ustar00rootroot00000000000000patsy-0.5.2/doc/_examples/add_predictors.py000066400000000000000000000006171412400214200207320ustar00rootroot00000000000000def add_predictors(base_formula, extra_predictors): desc = ModelDesc.from_formula(base_formula) # Using LookupFactor here ensures that everything will work correctly even # if one of the column names in extra_columns is named like "weight.in.kg" # or "sys.exit()" or "LittleBobbyTables()". desc.rhs_termlist += [Term([LookupFactor(p)]) for p in extra_predictors] return desc patsy-0.5.2/doc/_examples/example_lm.py000066400000000000000000000032671412400214200200730ustar00rootroot00000000000000import numpy as np from patsy import dmatrices, build_design_matrices class LM(object): """An example ordinary least squares linear model class, analogous to R's lm() function. Don't use this in real life, it isn't properly tested.""" def __init__(self, formula_like, data={}): y, x = dmatrices(formula_like, data, 1) self.nobs = x.shape[0] self.betas, self.rss, _, _ = np.linalg.lstsq(x, y) self._y_design_info = y.design_info self._x_design_info = x.design_info def __repr__(self): summary = ("Ordinary least-squares regression\n" " Model: %s ~ %s\n" " Regression (beta) coefficients:\n" % (self._y_design_info.describe(), self._x_design_info.describe())) for name, value in zip(self._x_design_info.column_names, self.betas): summary += " %s: %0.3g\n" % (name, value[0]) return summary def predict(self, new_data): (new_x,) = build_design_matrices([self._x_design_info], new_data) return np.dot(new_x, self.betas) def loglik(self, new_data): (new_y, new_x) = build_design_matrices([self._y_design_info, self._x_design_info], new_data) new_pred = np.dot(new_x, self.betas) sigma2 = self.rss / self.nobs # It'd be more elegant to use scipy.stats.norm.logpdf here, but adding # a dependency on scipy makes the docs build more complicated: Z = -0.5 * np.log(2 * np.pi * sigma2) return Z + -0.5 * (new_y - new_x) ** 2/sigma2 patsy-0.5.2/doc/_examples/example_treatment.py000066400000000000000000000013261412400214200214600ustar00rootroot00000000000000import numpy as np class MyTreat(object): def __init__(self, reference=0): self.reference = reference def code_with_intercept(self, levels): return ContrastMatrix(np.eye(len(levels)), ["[My.%s]" % (level,) for level in levels]) def code_without_intercept(self, levels): eye = np.eye(len(levels) - 1) contrasts = np.vstack((eye[:self.reference, :], np.zeros((1, len(levels) - 1)), eye[self.reference:, :])) suffixes = ["[MyT.%s]" % (level,) for level in levels[:self.reference] + levels[self.reference + 1:]] return ContrastMatrix(contrasts, suffixes) patsy-0.5.2/doc/_static/000077500000000000000000000000001412400214200150375ustar00rootroot00000000000000patsy-0.5.2/doc/_static/closelabel.png000066400000000000000000000002501412400214200176470ustar00rootroot00000000000000PNG  IHDRtEXtSoftwareAdobe ImageReadyqe<JIDATxb```@ŀ@ *įP+4.0 I&/6X H7uѼ 0 na,IENDB`patsy-0.5.2/doc/_static/facebox.css000066400000000000000000000022071412400214200171610ustar00rootroot00000000000000#facebox { position: absolute; top: 0; left: 0; z-index: 100; text-align: left; } #facebox .popup{ position:relative; border:3px solid rgba(0,0,0,0); -webkit-border-radius:5px; -moz-border-radius:5px; border-radius:5px; -webkit-box-shadow:0 0 18px rgba(0,0,0,0.4); -moz-box-shadow:0 0 18px rgba(0,0,0,0.4); box-shadow:0 0 18px rgba(0,0,0,0.4); } #facebox .content { display:table; width: 370px; padding: 10px; background: #fff; -webkit-border-radius:4px; -moz-border-radius:4px; border-radius:4px; } #facebox .content > p:first-child{ margin-top:0; } #facebox .content > p:last-child{ margin-bottom:0; } #facebox .close{ position:absolute; top:5px; right:5px; padding:2px; background:#fff; } #facebox .close img{ opacity:0.3; } #facebox .close:hover img{ opacity:1.0; } #facebox .loading { text-align: center; } #facebox .image { text-align: center; } #facebox img { border: 0; margin: 0; } #facebox_overlay { position: fixed; top: 0px; left: 0px; height:100%; width:100%; } .facebox_hide { z-index:-100; } .facebox_overlayBG { background-color: #000; z-index: 99; }patsy-0.5.2/doc/_static/facebox.js000066400000000000000000000221751412400214200170130ustar00rootroot00000000000000/* * Facebox (for jQuery) * version: 1.2 (05/05/2008) * @requires jQuery v1.2 or later * * Examples at http://famspam.com/facebox/ * * Licensed under the MIT: * http://www.opensource.org/licenses/mit-license.php * * Copyright 2007, 2008 Chris Wanstrath [ chris@ozmm.org ] * * Usage: * * jQuery(document).ready(function() { * jQuery('a[rel*=facebox]').facebox() * }) * * Terms * Loads the #terms div in the box * * Terms * Loads the terms.html page in the box * * Terms * Loads the terms.png image in the box * * * You can also use it programmatically: * * jQuery.facebox('some html') * jQuery.facebox('some html', 'my-groovy-style') * * The above will open a facebox with "some html" as the content. * * jQuery.facebox(function($) { * $.get('blah.html', function(data) { $.facebox(data) }) * }) * * The above will show a loading screen before the passed function is called, * allowing for a better ajaxy experience. * * The facebox function can also display an ajax page, an image, or the contents of a div: * * jQuery.facebox({ ajax: 'remote.html' }) * jQuery.facebox({ ajax: 'remote.html' }, 'my-groovy-style') * jQuery.facebox({ image: 'stairs.jpg' }) * jQuery.facebox({ image: 'stairs.jpg' }, 'my-groovy-style') * jQuery.facebox({ div: '#box' }) * jQuery.facebox({ div: '#box' }, 'my-groovy-style') * * Want to close the facebox? Trigger the 'close.facebox' document event: * * jQuery(document).trigger('close.facebox') * * Facebox also has a bunch of other hooks: * * loading.facebox * beforeReveal.facebox * reveal.facebox (aliased as 'afterReveal.facebox') * init.facebox * afterClose.facebox * * Simply bind a function to any of these hooks: * * $(document).bind('reveal.facebox', function() { ...stuff to do after the facebox and contents are revealed... }) * */ (function($) { $.facebox = function(data, klass) { $.facebox.loading() if (data.ajax) fillFaceboxFromAjax(data.ajax, klass) else if (data.image) fillFaceboxFromImage(data.image, klass) else if (data.div) fillFaceboxFromHref(data.div, klass) else if ($.isFunction(data)) data.call($) else $.facebox.reveal(data, klass) } /* * Public, $.facebox methods */ $.extend($.facebox, { settings: { opacity : 0.2, overlay : true, /* I don't know why absolute paths don't work. If you try to use facebox * outside of the examples folder these images won't show up. */ loadingImage : '../../_static/loading.gif', closeImage : '../../_static/closelabel.png', imageTypes : [ 'png', 'jpg', 'jpeg', 'gif' ], faceboxHtml : '\ ' }, loading: function() { init() if ($('#facebox .loading').length == 1) return true showOverlay() $('#facebox .content').empty() $('#facebox .body').children().hide().end(). append('
') $('#facebox').css({ top: getPageScroll()[1] + (getPageHeight() / 10), left: $(window).width() / 2 - 205 }).show() $(document).bind('keydown.facebox', function(e) { if (e.keyCode == 27) $.facebox.close() return true }) $(document).trigger('loading.facebox') }, reveal: function(data, klass) { $(document).trigger('beforeReveal.facebox') if (klass) $('#facebox .content').addClass(klass) $('#facebox .content').append(data) $('#facebox .loading').remove() $('#facebox .body').children().fadeIn('normal') $('#facebox').css('left', $(window).width() / 2 - ($('#facebox .popup').width() / 2)) $(document).trigger('reveal.facebox').trigger('afterReveal.facebox') }, close: function() { $(document).trigger('close.facebox') return false } }) /* * Public, $.fn methods */ $.fn.facebox = function(settings) { if ($(this).length == 0) return init(settings) function clickHandler() { $.facebox.loading(true) // support for rel="facebox.inline_popup" syntax, to add a class // also supports deprecated "facebox[.inline_popup]" syntax var klass = this.rel.match(/facebox\[?\.(\w+)\]?/) if (klass) klass = klass[1] fillFaceboxFromHref(this.href, klass) return false } return this.bind('click.facebox', clickHandler) } /* * Private methods */ // called one time to setup facebox on this page function init(settings) { if ($.facebox.settings.inited) return true else $.facebox.settings.inited = true $(document).trigger('init.facebox') makeCompatible() var imageTypes = $.facebox.settings.imageTypes.join('|') $.facebox.settings.imageTypesRegexp = new RegExp('\.(' + imageTypes + ')$', 'i') if (settings) $.extend($.facebox.settings, settings) $('body').append($.facebox.settings.faceboxHtml) var preload = [ new Image(), new Image() ] preload[0].src = $.facebox.settings.closeImage preload[1].src = $.facebox.settings.loadingImage $('#facebox').find('.b:first, .bl').each(function() { preload.push(new Image()) preload.slice(-1).src = $(this).css('background-image').replace(/url\((.+)\)/, '$1') }) $('#facebox .close').click($.facebox.close) $('#facebox .close_image').attr('src', $.facebox.settings.closeImage) } // getPageScroll() by quirksmode.com function getPageScroll() { var xScroll, yScroll; if (self.pageYOffset) { yScroll = self.pageYOffset; xScroll = self.pageXOffset; } else if (document.documentElement && document.documentElement.scrollTop) { // Explorer 6 Strict yScroll = document.documentElement.scrollTop; xScroll = document.documentElement.scrollLeft; } else if (document.body) {// all other Explorers yScroll = document.body.scrollTop; xScroll = document.body.scrollLeft; } return new Array(xScroll,yScroll) } // Adapted from getPageSize() by quirksmode.com function getPageHeight() { var windowHeight if (self.innerHeight) { // all except Explorer windowHeight = self.innerHeight; } else if (document.documentElement && document.documentElement.clientHeight) { // Explorer 6 Strict Mode windowHeight = document.documentElement.clientHeight; } else if (document.body) { // other Explorers windowHeight = document.body.clientHeight; } return windowHeight } // Backwards compatibility function makeCompatible() { var $s = $.facebox.settings $s.loadingImage = $s.loading_image || $s.loadingImage $s.closeImage = $s.close_image || $s.closeImage $s.imageTypes = $s.image_types || $s.imageTypes $s.faceboxHtml = $s.facebox_html || $s.faceboxHtml } // Figures out what you want to display and displays it // formats are: // div: #id // image: blah.extension // ajax: anything else function fillFaceboxFromHref(href, klass) { // div if (href.match(/#/)) { var url = window.location.href.split('#')[0] var target = href.replace(url,'') if (target == '#') return $.facebox.reveal($(target).html(), klass) // image } else if (href.match($.facebox.settings.imageTypesRegexp)) { fillFaceboxFromImage(href, klass) // ajax } else { fillFaceboxFromAjax(href, klass) } } function fillFaceboxFromImage(href, klass) { var image = new Image() image.onload = function() { $.facebox.reveal('
', klass) } image.src = href } function fillFaceboxFromAjax(href, klass) { $.get(href, function(data) { $.facebox.reveal(data, klass) }) } function skipOverlay() { return $.facebox.settings.overlay == false || $.facebox.settings.opacity === null } function showOverlay() { if (skipOverlay()) return if ($('#facebox_overlay').length == 0) $("body").append('
') $('#facebox_overlay').hide().addClass("facebox_overlayBG") .css('opacity', $.facebox.settings.opacity) .click(function() { $(document).trigger('close.facebox') }) .fadeIn(200) return false } function hideOverlay() { if (skipOverlay()) return $('#facebox_overlay').fadeOut(200, function(){ $("#facebox_overlay").removeClass("facebox_overlayBG") $("#facebox_overlay").addClass("facebox_hide") $("#facebox_overlay").remove() }) return false } /* * Bindings */ $(document).bind('close.facebox', function() { $(document).unbind('keydown.facebox') $('#facebox').fadeOut(function() { $('#facebox .content').removeClass().addClass('content') $('#facebox .loading').remove() $(document).trigger('afterClose.facebox') }) hideOverlay() }) })(jQuery); patsy-0.5.2/doc/_static/loading.gif000077500000000000000000000053171412400214200171540ustar00rootroot00000000000000GIF89a 򺺺444ėTTT! NETSCAPE2.0! , H *\p hp"8G>D)R4CIË\9p:ȹs1_2`p` u< uSYڐkǞ`Fhvƴ6S>u+ryJ/QM.0@p_ ++/KY&]9ى Mr `ixr\˪ vfjMO&*Z؇o>;ܦŝ",,@CPؼrSE.ٴjTWYR Y+ѫKb ڌ! ,H*/g, ">") .replace(/"/g, """) .replace(/'/g, "'")) } function scrapeText(codebox){ /// Returns input lines cleaned of prompt1 and prompt2 var lines = codebox.split('\n'); var newlines = new Array(); $.each(lines, function() { if (this.match(/^In \[\d+]: /)){ newlines.push(this.replace(/^(\s)*In \[\d+]: /,"")); } else if (this.match(/^(\s)*\.+:/)){ newlines.push(this.replace(/^(\s)*\.+: /,"")); } } ); return newlines.join('\\n'); } $(document).ready( function() { // grab all code boxes var ipythoncode = $(".highlight-ipython"); $.each(ipythoncode, function() { var code = scrapeText($(this).text()); // give them a facebox pop-up with plain text code $(this).append('View Code'); $(this,"textarea").select(); }); }); patsy-0.5.2/doc/builtins-reference.rst000066400000000000000000000005671412400214200177400ustar00rootroot00000000000000``patsy.builtins`` API reference =================================== This module defines some tools that are automatically made available to code evaluated in formulas. You can also access it directly; use ``from patsy.builtins import *`` to import the same variables that formula code receives automatically. .. automodule:: patsy.builtins :members: :undoc-members: patsy-0.5.2/doc/categorical-coding.rst000066400000000000000000000057401412400214200176670ustar00rootroot00000000000000.. _categorical-coding: Coding categorical data ======================= .. currentmodule:: patsy Patsy allows great flexibility in how categorical data is coded, via the function :func:`C`. :func:`C` marks some data as being categorical (including data which would not automatically be treated as categorical, such as a column of integers), while also optionally setting the preferred coding scheme and level ordering. Let's get some categorical data to work with: .. ipython:: python from patsy import dmatrix, demo_data, ContrastMatrix, Poly data = demo_data("a", nlevels=3) data As you know, simply giving Patsy a categorical variable causes it to be coded using the default :class:`Treatment` coding scheme. (Strings and booleans are treated as categorical by default.) .. ipython:: python dmatrix("a", data) We can also alter the level ordering, which is useful for, e.g., :class:`Diff` coding: .. ipython:: python l = ["a3", "a2", "a1"] dmatrix("C(a, levels=l)", data) But the default coding is just that -- a default. The easiest alternative is to use one of the other built-in coding schemes, like orthogonal polynomial coding: .. ipython:: python dmatrix("C(a, Poly)", data) There are a number of built-in coding schemes; for details you can check the :ref:`API reference `. But we aren't restricted to those. We can also provide a custom contrast matrix, which allows us to produce all kinds of strange designs: .. ipython:: python contrast = [[1, 2], [3, 4], [5, 6]] dmatrix("C(a, contrast)", data) dmatrix("C(a, [[1], [2], [-4]])", data) Hmm, those ``[custom0]``, ``[custom1]`` names that Patsy auto-generated for us are a bit ugly looking. We can attach names to our contrast matrix by creating a :class:`ContrastMatrix` object, and make things prettier: .. ipython:: python contrast_mat = ContrastMatrix(contrast, ["[pretty0]", "[pretty1]"]) dmatrix("C(a, contrast_mat)", data) And, finally, if we want to get really fancy, we can also define our own "smart" coding schemes like :class:`Poly`. Just define a class that has two methods, :meth:`code_with_intercept` and :meth:`code_without_intercept`. They have identical signatures, taking a list of levels as their argument and returning a :class:`ContrastMatrix`. Patsy will automatically choose the appropriate method to call to produce a full-rank design matrix without redundancy; see :ref:`redundancy` for the full details on how Patsy makes this decision. As an example, here's a simplified version of the built-in :class:`Treatment` coding object: .. literalinclude:: _examples/example_treatment.py .. ipython:: python :suppress: with open("_examples/example_treatment.py") as f: exec(f.read()) And it can now be used just like the built-in methods: .. ipython:: python # Full rank: dmatrix("0 + C(a, MyTreat)", data) # Reduced rank: dmatrix("C(a, MyTreat)", data) # With argument: dmatrix("C(a, MyTreat(2))", data) patsy-0.5.2/doc/changes.rst000066400000000000000000000164721412400214200155650ustar00rootroot00000000000000Changes ======= .. currentmodule:: patsy All Patsy releases are archived at Zenodo: .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.592075.svg :target: https://doi.org/10.5281/zenodo.592075 v0.5.2 ------ * Fix some deprecation warnings associated with importing from the `collections` module (rather than `collections.abc`) in Python 3.7+. v0.5.1 ------ * The Python 3.6.7 and 3.7.1 point releases changed the standard tokenizer module in a way that broke patsy. Updated patsy to work with these point releases. (See `#131 `__ for details.) v0.5.0 ------ * Dropped support for Python 2.6 and 3.3. * Update to keep up with ``pandas`` API changes * More consistent handling of degenerate linear constraints in :meth:`DesignInfo.linear_constraint` (`#89 `__) * Fix a crash in ``DesignMatrix.__repr__`` when ``shape[0] == 0`` v0.4.1 ------ New features: * On Python 2, accept ``unicode`` strings containing only ASCII characters as valid formula descriptions in the high-level formula API (:func:`dmatrix` and friends). This is intended as a convenience for people using Python 2 with ``from __future__ import unicode_literals``. (See :ref:`py2-versus-py3`.) Bug fixes: * Accept ``long`` as a valid integer type in the new :class:`DesignInfo` classes. In particular this fixes errors that arise on 64-bit Windows builds (where ``ndarray.shape`` contains ``long`` objects), like ``ValueError: For numerical factors, num_columns must be an int.`` * Fix deprecation warnings encountered with numpy 1.10 v0.4.0 ------ Incompatible changes: * :class:`EvalFactor` and :meth:`ModelDesc.from_formula` no longer take an ``eval_env`` argument. * The :func:`design_matrix_builders` function and the :meth:`factor_protocol.memorize_passes_needed` method now require an ``eval_env`` as an additional argument. * The :class:`DesignInfo` constructor's arguments have totally changed. In addition to the changes needed to support the new features below, we no longer support "shim" DesignInfo objects that have non-trivial term specifications. This was only included in the first place to provide a compatibility hook for competing formula libraries; four years later, no such libraries have shown up. If one does, we can re-add it, but I'm not going to bother maintaining it in the mean time... * Dropped support for Python 3.2. Other changes: * Patsy now supports Pandas's new (version 0.15 or later) categorical objects. * Formulas (or more precisely, :class:`EvalFactor` objects) now only keep a reference to the variables required from their environment instead of the whole environment where the formula was defined. (Thanks to Christian Hudon.) * :class:`DesignInfo` has new attributes :attr:`DesignInfo.factor_infos` and :attr:`DesignInfo.term_codings` which provide detailed metadata about how each factor and term is encoded. * As a result of the above changes, the split between :class:`DesignInfo` and :class:`DesignMatrixBuilder` is no longer necessary; :class:`DesignMatrixBuiler` has been eliminated. So for example, :func:`design_matrix_builders` now returns a list of :class:`DesignInfo` objects, and you can now pass :class:`DesignInfo` objects directly to any function for building design matrices. For compatibility, :class:`DesignInfo` continues to provide ``.builder`` and ``.design_info`` attributes, so that old code should continue to work; however, these attributes are deprecated. * Ensured that attempting to pickle most Patsy objects raises an error. This has never been supported, and the interesting cases failed in any case, but now we're taking a more systematic approach. (Soon we will add real, supported pickling support.) * Fixed a bug when running under ``python -OO``. v0.3.0 ------ * New stateful transforms for computing natural and cylic cubic splines with constraints, and tensor spline bases with constraints. (Thanks to `@broessli `_ and GDF Suez for contributing this code.) * Dropped support for Python 2.5 and earlier. * Switched to using a single source tree for both Python 2 and Python 3. * Added a fast-path to skip NA detection for inputs with boolean dtypes (thanks to Matt Davis for patch). * Incompatible change: Sometimes when building a design matrix for a formula that does not depend on the data in any way, like ``"1 ~ 1"``, we have no way to determine how many rows the resulting matrix should have. In previous versions of patsy, when this occurred we simply returned a matrix with 1 row. In 0.3.0+, we instead refuse to guess, and raise an error. Note that because of the next change listed, this situation occurs less frequently in 0.3.0 than in previous versions. * If the ``data`` argument to :func:`build_design_matrices` (or derived functions like :func:`dmatrix`, :func:`dmatrices`) is a :class:`pandas.DataFrame`, then we now check its number of rows and index, and insist that the output design matrices match. This also means that if ``data`` is a DataFrame, then the error described in the first bullet above cannot occur -- we will simply return a column of 1s that is the same size as the input dataframe. * Worked around some more limitations in py2exe/py2app and friends. v0.2.1 ------ * Fixed a nasty bug in missing value handling where, if missing values were present, ``dmatrix(..., result_type="dataframe")`` would always crash, and ``dmatrices("y ~ 1")`` would produce left- and right-hand side matrices that had different numbers of rows. (As far as I can tell, this bug could not possibly cause incorrect results, only crashes, since it always involved the creation of matrices with incommensurate shapes. Therefore there is no need to worry about the accuracy of any analyses that were successfully performed with v0.2.0.) * Modified ``patsy/__init__.py`` to work around limitations in py2exe/py2app/etc. v0.2.0 ------ Warnings: * The lowest officially supported Python version is now 2.5. So far as I know everything still works with Python 2.4, but as everyone else has continued to drop support for 2.4, testing on 2.4 has become so much trouble that I've given up. New features: * New support for automatically detecting and (optionally) removing missing values (see :class:`NAAction`). * New stateful transform for B-spline regression: :func:`bs`. (Requires scipy.) * Added a core API to make it possible to run predictions on only a subset of model terms. (This is particularly useful for e.g. plotting the isolated effect of a single fitted spline term.) See :meth:`DesignMatrixBuilder.subset`. * :class:`LookupFactor` now allows users to mark variables as categorical directly. * :class:`pandas.Categorical` objects are now recognized as representing categorical data and handled appropriately. * Better error reporting for exceptions raised by user code inside formulas. We now, whenever possible, tag the generated exception with information about which factor's code raised it, and use this information to give better error reporting. * :meth:`EvalEnvironment.capture` now takes a `reference` argument, to make it easier to implement new :func:`dmatrix`-like functions. Other: miscellaneous doc improvements and bug fixes. v0.1.0 ------ First public release. patsy-0.5.2/doc/conf.py000066400000000000000000000171471412400214200147220ustar00rootroot00000000000000# -*- coding: utf-8 -*- # General information about the project. project = u'patsy' copyright = u'2011-2015, Nathaniel J. Smith' import sys print "python exec:", sys.executable print "sys.path:", sys.path try: import numpy print "numpy: %s, %s" % (numpy.__version__, numpy.__file__) except ImportError: print "no numpy" try: import matplotlib print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__) except ImportError: print "no matplotlib" try: import IPython print "ipython: %s, %s" % (IPython.__version__, IPython.__file__) except ImportError: print "no ipython" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. import sys, os sys.path.insert(0, os.getcwd() + "/..") import patsy version = patsy.__version__ # The full version, including alpha/beta/rc tags. release = version # # scikits.sparse documentation build configuration file, created by # sphinx-quickstart on Sat Dec 12 22:10:41 2009. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.append(os.path.abspath('sphinxext')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.imgmath', 'sphinx.ext.intersphinx', 'IPython.sphinxext.ipython_directive', 'IPython.sphinxext.ipython_console_highlighting', ] # Undocumented trick: if we def setup here in conf.py, it gets called just # like an extension's setup function. def setup(app): app.add_javascript("show-code.js") app.add_javascript("facebox.js") app.add_stylesheet("facebox.css") # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'patsydoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'patsy.tex', u'patsy Documentation', u'Nathaniel J. Smith', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True # -- Custom extra options autoclass_content = "both" intersphinx_mapping = {"python": ("http://docs.python.org", None), "numpy": ("http://docs.scipy.org/doc/numpy", None), "pandas": ('http://pandas.pydata.org/pandas-docs/stable/', None), } autodoc_member_order = "source" patsy-0.5.2/doc/expert-model-specification.rst000066400000000000000000000250031412400214200213660ustar00rootroot00000000000000.. _expert-model-specification: Model specification for experts and computers ============================================= .. currentmodule:: patsy While the formula language is great for interactive model-fitting and exploratory data analysis, there are times when we want a different or more systematic interface for creating design matrices. If you ever find yourself writing code that pastes together bits of strings to create a formula, then stop! And read this chapter. Our first option, of course, is that we can go ahead and write some code to construct our design matrices directly, just like we did in the old days. Since this is supported directly by :func:`dmatrix` and :func:`dmatrices`, it also works with any third-party library functions that use Patsy internally. Just pass in an array_like or a tuple ``(y_array_like, X_array_like)`` in place of the formula. .. ipython:: python from patsy import dmatrix X = [[1, 10], [1, 20], [1, -2]] dmatrix(X) By using a :class:`DesignMatrix` with :class:`DesignInfo` attached, we can also specify custom names for our custom matrix (or even term slices and so forth), so that we still get the nice output and such that Patsy would otherwise provide: .. ipython:: python from patsy import DesignMatrix, DesignInfo design_info = DesignInfo(["Intercept!", "Not intercept!"]) X_dm = DesignMatrix(X, design_info) dmatrix(X_dm) Or if all we want to do is to specify column names, we could also just use a :class:`pandas.DataFrame`: .. ipython:: python import pandas df = pandas.DataFrame([[1, 10], [1, 20], [1, -2]], columns=["Intercept!", "Not intercept!"]) dmatrix(df) However, there is also a middle ground between pasting together strings and going back to putting together design matrices out of string and baling wire. Patsy has a straightforward Python interface for representing the result of parsing formulas, and you can use it directly. This lets you keep Patsy's normal advantages -- handling of categorical data and interactions, predictions, term tracking, etc. -- while using a nice high-level Python API. An example of somewhere this might be useful is if, say, you had a GUI with a tick box next to each variable in your data set, and wanted to construct a formula containing all the variables that had been checked, and letting Patsy deal with categorical data handling. Or this would be the approach you'd take for doing stepwise regression, where you need to programatically add and remove terms. Whatever your particular situation, the strategy is this: #. Construct some factor objects (probably using :class:`LookupFactor` or :class:`EvalFactor` #. Put them into some :class:`Term` objects, #. Put the :class:`Term` objects into two lists, representing the left- and right-hand side of your formula, #. And then wrap the whole thing up in a :class:`ModelDesc`. (See :ref:`formulas` if you need a refresher on what each of these things are.) .. ipython:: python import numpy as np from patsy import (ModelDesc, EvalEnvironment, Term, EvalFactor, LookupFactor, demo_data, dmatrix) data = demo_data("a", "x") # LookupFactor takes a dictionary key: a_lookup = LookupFactor("a") # EvalFactor takes arbitrary Python code: x_transform = EvalFactor("np.log(x ** 2)") # First argument is empty list for dmatrix; we would need to put # something there if we were calling dmatrices. desc = ModelDesc([], [Term([a_lookup]), Term([x_transform]), # An interaction: Term([a_lookup, x_transform])]) # Create the matrix (or pass 'desc' to any statistical library # function that uses patsy.dmatrix internally): dmatrix(desc, data) Notice that no intercept term is included. Implicit intercepts are a feature of the formula parser, not the underlying representation. If you want an intercept, include the constant :const:`INTERCEPT` in your list of terms (which is just sugar for ``Term([])``). .. note:: Another option is to just pass your term lists directly to :func:`design_matrix_builders`, and skip the :class:`ModelDesc` entirely -- all of the highlevel API functions like :func:`dmatrix` accept :class:`DesignMatrixBuilder` objects as well as :class:`ModelDesc` objects. Example: say our data has 100 different numerical columns that we want to include in our design -- and we also have a few categorical variables with a more complex interaction structure. Here's one solution: .. literalinclude:: _examples/add_predictors.py .. ipython:: python :suppress: with open("_examples/add_predictors.py") as f: exec(f.read()) .. ipython:: python extra_predictors = ["x%s" % (i,) for i in range(10)] desc = add_predictors("np.log(y) ~ a*b + c:d", extra_predictors) desc.describe() The factor protocol ------------------- If :class:`LookupFactor` and :class:`EvalFactor` aren't enough for you, then you can define your own factor class. The full interface looks like this: .. class:: factor_protocol .. method:: name() This must return a short string describing this factor. It will be used to create column names, among other things. .. attribute:: origin A :class:`patsy.Origin` if this factor has one; otherwise, just set it to None. .. method:: __eq__(obj) __ne__(obj) __hash__() If your factor will ever contain categorical data or participate in interactions, then it's important to make sure you've defined :meth:`~object.__eq__` and :meth:`~object.__ne__` and that your type is :term:`hashable`. These methods will determine which factors Patsy considers equal for purposes of redundancy elimination. .. method:: memorize_passes_needed(state, eval_env) Return the number of passes through the data that this factor will need in order to set up any :ref:`stateful-transforms`. If you don't want to support stateful transforms, just return 0. In this case, :meth:`memorize_chunk` and :meth:`memorize_finish` will never be called. `state` is an (initially) empty dict which is maintained by the builder machinery, and that we can do whatever we like with. It will be passed back in to all memorization and evaluation methods. `eval_env` is an :class:`EvalEnvironment` object, describing the Python environment where the factor is being evaluated. .. method:: memorize_chunk(state, which_pass, data) Called repeatedly with each 'chunk' of data produced by the `data_iter_maker` passed to :func:`design_matrix_builders`. `state` is the state dictionary. `which_pass` will be zero on the first pass through the data, and eventually reach the value you returned from :meth:`memorize_passes_needed`, minus one. Return value is ignored. .. method:: memorize_finish(state, which_pass) Called once after each pass through the data. Return value is ignored. .. method:: eval(state, data) Evaluate this factor on the given `data`. Return value should ideally be a 1-d or 2-d array or :func:`Categorical` object, but this will be checked and converted as needed. In addition, factor objects should be pickleable/unpickleable, so as to allow models containing them to be pickled/unpickled. (Or, if for some reason your factor objects are *not* safely pickleable, you should consider giving them a `__getstate__` method which raises an error, so that any users which attempt to pickle a model containing your factors will get a clear failure immediately, instead of only later when they try to unpickle.) .. warning:: Do not store evaluation-related state in attributes of your factor object! The same factor object may appear in two totally different formulas, or if you have two factor objects which compare equally, then only one may be executed, and which one this is may vary randomly depending on how :func:`build_design_matrices` is called! Use only the `state` dictionary for storing state. The lifecycle of a factor object therefore looks like: #. Initialized. #. :meth:`memorize_passes_needed` is called. #. ``for i in range(passes_needed):`` #. :meth:`memorize_chunk` is called one or more times #. :meth:`memorize_finish` is called #. :meth:`eval` is called zero or more times. Alternative formula implementations ----------------------------------- Even if you hate Patsy's formulas all together, to the extent that you're going to go and implement your own competing mechanism for defining formulas, you can still Patsy-based interfaces. Unfortunately, this isn't *quite* as clean as we'd like, because for now there's no way to define a custom :class:`DesignMatrixBuilder`. So you do still have to go through Patsy's formula-building machinery. But, this machinery simply passes numerical data through unchanged, so in extremis you can: * Define a special factor object that simply defers to your existing machinery * Define the magic ``__patsy_get_model_desc__`` method on your formula object. :func:`dmatrix` and friends check for the presence of this method on any object that is passed in, and if found, it is called (passing in the :class:`EvalEnvironment`), and expected to return a :class:`ModelDesc`. And your :class:`ModelDesc` can, of course, include your special factor object(s). Put together, it looks something like this: .. code-block:: python class MyAlternativeFactor(object): # A factor object that simply returns the design def __init__(self, alternative_formula, side): self.alternative_formula = alternative_formula self.side = side def name(self): return self.side def memorize_passes_needed(self, state): return 0 def eval(self, state, data): return self.alternative_formula.get_matrix(self.side, data) class MyAlternativeFormula(object): ... def __patsy_get_model_desc__(self, eval_env): return ModelDesc([Term([MyAlternativeFactor(self, side="left")])], [Term([MyAlternativeFactor(self, side="right")])], my_formula = MyAlternativeFormula(...) dmatrix(my_formula, data) The only downside to this approach is that you can't control the names of individual columns. (A workaround would be to create multiple terms each with its own factor that returns a different pieces of your overall matrix.) If this is a problem for you, though, then let's talk -- we can probably work something out. patsy-0.5.2/doc/figures/000077500000000000000000000000001412400214200150555ustar00rootroot00000000000000patsy-0.5.2/doc/figures/formula-structure.png000066400000000000000000000451121412400214200212710ustar00rootroot00000000000000PNG  IHDRX~sBIT|d pHYs tEXtSoftwarewww.inkscape.org< IDATxw\EߟPBUJP#`D EQC_z khCh MjB{s{뵯=gfsfygfdfAHZ60 `q3\`]@҉t3{Ly [RA4W+ @o I$G4:A i*Յfhf)WAo$,XA i'`,h 7f)p`e`.9j3;/g(tz1>tz)PGf]A,2H:0>H^4#ڹUDfnj,}>XAH: 8Ffmajv234x  HsKkdL 3{<\INK))fUGS'Y9Ok'51`! .ί4p\}Ii\umX\9R? pI:`7.QmNkfpGB\j CA`f%:IfvJ ?RlUD3[/̮4~XXbn.^_໒gIV}e Vlv|_v>P25|J_, Z/vsEydA1DAGBEyurىAv\}ʇ4E:(r)>9ظAtP 與w֒(pd=$NqA0P h3/`pR8SqgI= j}AC   >23<Ε 3dfŌ<^QY5 |L 3{\n/{~ ,Z%wŭV{ HԐ9`u  L AA4P  L(XAA &   VAA + :XAЂHZ_3`3XjJ)RǘC3QWgם].J3t:}x9[w$- =XWwJbw˒Vm7e}`A~n~f+t>0( 'no'3>JA [nfcjo4dx>piVނWe ufdL :כKU\U OXIwG}AЂvqSہq\㖯Yᒶ6RcgW$i!`0*l3PM|p#IlS؂Y$lf7^*Ӏ2W‡OJ7pfv~;`m 5! h!$ ,NV`s`t_8p2\[eí8Ki~.>Frŕ{p7$2w׆w?`t/\q:(IvOi-u|U•ӽ~ n)6GҢ3yOA`Ak2t\4Ca[vƝ_4}|{R>ffXpݽ)l|σ+xC~'WORJJ^8dfכYn}hf'-fvd\-՟-" h-dI h[rr!x6`q`tMJ$9S)U(˧へ?XQ^>4}_ IIZݴSjj:R7U} 츦+r;ǓY|US׫_:/u5>v3.i[\wd_X;Y&;~n `A af%[x(} KHbf3|Gҋ|IkUr+`Qy3;4UNܯ)33Ir3+>sew`3lfQIׁ$-dfOgߧk YqP)pdCAЊu]Y2\Q <'|)HNCt|;}j:IwHz$])頪4 >V_sy)M#IpK$% q—^]>",XAz(m.i#-w-p6>oK|cuTI-QC9_Ls1&m`%Y" <~o}*KUsv:d>2 u—8X1}^'8 i~AЇ-C &m%3 8̎bKHlJ;zVwWiKfi;y-[${= 8 AЂ-]qЛWafIa%}E + {AA + AA`B  h0`Ae$!+-K $$iβe  V3$#j~`|f`3.I7I*E,L#<d]'` |!uI$-,lfD=K00=^V@ҊNȳWWvTGe!.}ķ>,Q$-:=;}̞i,e&i `KUsY{VJǷ.h~BH p ݉=Yfؙ9NuL4$ 斸R ^#ߗmlfnu :)m;no7 lVv|3e8 kQ>*X)g@3@nI{{,+hB Z54keU?|eZC(i}܎,Y:3DQ^y$p3;Nz''ed:iQ&vUK;$+Sɒ6LJ@O<sddyfvd>Zefgn}'"V#@gy|gmo0 )~֙m'ڨ@r`:q47InfI? >4W*+sjǃ${/mZH*idI%x ɼ&dU`)+%= | 6IkVsC{)Ui4ZIysUNfunC(_Hz w%}9/UIg W-`k,|ΒT{]Rә%-7/ 7[]>0ixǸٿdI[% Cobi`><0M mϕ--Yy[ҟ%PY^$Iץ|'JEQԮ\ہÁq9Jg9 I:>IICy+RC%=|(vU~4 =+l]v[IsI:E$|ޮ7M`vB.&%O>-/iiI[uPwA3`fe`Aܗv|x7ko.>Ji7L+kE`yx|l l STGJˠ, oҍUu1  x#+&L[)~Ǣ55>-Z K5F%_ юwϴ  x] _w2272g4&Mט1KZol ƛ#T~A[{yNKs  !G,l*|.2i٧.wߕ4Z_$DJZVdI/$싿In#~lH߮[yxIC{Q(kٖ6u|fV4߼o_TsCU]`fa1?#p4$[ 4SH΂Q\?Fo+uժT)~_X[ߩ'q婮Aj{.VqUFb6c>, ܝ \ծ|SO9w2x;Q7Nmո xW{r_ސ ^6jxϖT92R`N> d3]/K \+\WL{f]8|x"Q}Q*\ph0FfHZ hTŇ vŭHÁ1^OJYJ[0;>s>;a K LJfgq:b৒<-3;/ w~!`f*cwh3FU݆yTTIGH P~g MXR>y`'PHBAaf~%&m\[&vKXm`f'r|fv}L҃~/id 恪UibhoZҮ^U{E:gmqe530[F.jßO!mjQhzW'L47ڑ]yV̇('NOx*0gcQfЄQ({Jڶ*3;wN=!M~O!xc]܇7WHK-A$-K x#|.M;eW4e>o`/I ᶸn-WΣG~52{ޔtp%lr* ބ;Mĭ b#?#SMpR.pn:J^x1quul)Aܺuڠbm|5䯸 uF9{=S>o/Ѷ4%HZ Qp9>4,lGnݮ>tO'$e}`C|e^d?oO^bO)#Y?gV%piv|k|4`>&ὴZӜ?puYqix)+$dxwTŇW/Ç=o+1_Z!oqa{O^1]wpZ2w^>-X֥.>|՟º#T-V <3{88/J3?ΰOF2t3 ߬)9ނ3ݘe7Kdل[l/ \):nEug۬xoJ^/CS>[_#}\I^ogN~UE+OS$mBgx86KՁk3Ju49aH(6Qt;,, Zxǩnj It=9<&PADŽ=HW{&.e}\Lqp]_qgp/Qf`A7o6=Fɚ~ yHۦ0|kӰcYiZw~ 83@r~2T-)c=H7[[~p~=J% hE?gGX5NZj[\Z wH.S 苄]$ZW׷+U/)[зIN˭DžunA Vi/Jզ9#kH[l[VƧ­[ֿ2 Vt@Z}\Z _g$pKM $ lBź&ɺef6` +jd,6 +A+!i\g4phfJ-z5`A"m3nLşVAJH WW nA0Y#𦸕j q\k AoE T|㣀M A(XABRRu}TlAHm~n2˔-P'Tl(R {Aא2߭/7֭[b +hQ.;㫩Or F`|};܇k<See VmC6­T[V2e F[ [E] 7⃾B(XAEҜ6RpT]CA$ǭpekS)*֭,` IPDyi:\>/S 'm/&߭ypk}\xAPB I+T;lHA0S]•ǨX/S YB I/Vm*3PlAtɺ߭fvgAW+h [▪luACe3;ly`F +( I PDy%|H`M)S  B JAҩ:M쁒E  VP 戥 V%  ӯl  ZP  L(XAA fh$/W0v0K3'Wޅ u&iU#Ik &٪E;cfwup IգL zKl/ Vc?BjˎOډmz6LazX .7R۹X|BGu[zΦ+9zX>I(X?Ub)݁ۀ[RX7=7ZupO dfJAm?:o+1='Za,MVgf))׀zZJ(X݈݈+ H+ 1i% ~ offei-HVw^>y_l5:Tv\\;: `a*3j_Y|a~[!A#kG`ϫer^q'5:Eq33f6Pu}Iϙٔ8>,q'fVXJˮr9aA>%2,I5gQ+ WsPQX^3wSxޮ\l(wh|ipќ=AEѪ_LaUss U3 ;,nl:A@`:of؏HAͲ7疤3"t:B*N -i>R(h&u=aEۤgQһxjvDbx_ki13;WD|9ER[ikr/#x2y _#e sjs^8;YYxny iŷyfoIPrsIz=YeNcfwHZ _`s5ܢ>,)zClv26/گWɫ-*,|Bv^]+omO5331L_g7fI_:Fo4Uo 2eHo3]˕6'f6W|^VHZW ,]k mHޛ+ͬ=TKED/5=P¸r>~e6 t|B3۽4aj vof5oEs|O(W/ ۔ħT9v uí&4iy3zBJll۔T*WL)1‚AA`= AA`B r~[AWtxZ8h4l9 V!BIs-Gbpʖ%ZI_n_9$H;mt᛫Aн f1?`vď< l!  I_&=eJ˖%PICʖI i"#i6WeҢߏ1hrBjbb0#ͬ\X`%m~bђ4߿o3lyƘYXHZ X'mtv61˖%OX{\f0Ae-Ɖ\u;KlAU"i&-K$ Hofח-Kcf&lhjbD$ |̾U,} IweI HZjfS˖'`ˡZ1AcfK^,A H3v([6`a9e\ VAЛ 7p\ٲABP#Ws%cBA/8wfaق5A`puhBB*I /$=Ik-KFҳ]X,} 6%`ifyق1L]A)[>fdC{Iǀaf6ly:ic% <=lY:~|bf'-KP!,X=7kBjl28>dA'4epwZ[1hyg~C Kfvق`fow_.[B(X=//sej`SA;HZC,dճ;M<lP,A 8890a$$Z,A1~V-[L(X݈ul\ٲ:`b ~h3{lA&l(PXXafw-Kc˖%WIlA*`uije fa 88!m2D%J#fv^4-K0cH<֕ >5!-^dX5fF*[+V Vc8_-7hMnF-%>h*`OaЍ5Hx̞)[{0ue@R?$*C$3%PfÈE{{챲 J!lB $- cwkڂ"inruLɢ%afc]ʖ k8w Z]ѯ\,A0r<aA7 V4ٞ_,AÄA&m.al5˖U lcf-Hc |QaF#AtI'c켲e lqffa # h %3l&\YvaZi0$zEl`޸+U=f$= kf-K_Dҹ\Yf,~%`/`(0ѣB8`fVuIgQiw2#ʒ4p}V<,LM[hkj)VণW#-fI_8hTUD[KbFynقQΧ=fh$}"?0]Fs{U؁.DMf gtS9M#i4d%%RIf&3j["@wX#g I RQn6) .:^yO绫&bb8/YfaPQ۬ûE pL׍h;`~3;k5ѻ0$--i`_H4 on5*< fvc[#+ßђv%IRVKfP(fIX mfw6tOAΑ, ~s/m5'ˤ6k ^SbY5M㕀9g:~9+cQp;0_bg93;:c>4,[VD3#=ϮğW?4&N&˙iz2?:t25M{,_Q']` |V#?JtxC8|#+YʀWG]|l9Wv 7z @,k,N^S?y-EYc,/ˮ:u@~3şZ'`몼~+i׫O)Y9eqgUܕ.Kmw,[VԴ`? )쏸$)pxo=$if7'xĀp G@U\! 8ؿJeE߬Oo}]Q࿸'xgN67GRϟ4O;/ito{^sgH̞\;+.?lAioKSXܟ<nny%q8? OŪ~Wnv.n) ɒ^ KU[N۰Qm`,Wt0 Wj-H?Ja+m\0 xl]|n]íX/4cҨu0WIw֘\֏/p_%odC(4/%R?e)~,lT;YIap% 2Kgkg>oA!jl;7pE/X,u3(KsAUWr cuki Vǀʖ}cJqՈ_ )))1 ێ qUMYܚYxnm'~Rvvd>F\-YS)뽔斪 r ֫dUyi VÁ-G|YEd:II߹c](V3f*Mt;PƇ >. IֈKd*u4̺2&9d:>Byx' '\MźU͢{ڬ<;?cpK措.byMᖩ$Y*u>1tͺ&uݒNXaBR?wjV_L ZbF 1_t:r<-X0;^y3<56~+WwC ij:7 (y-X֚OtVb4?\[5kYw"9q낅j)x2ވqQڽ>m;<;nQ:fR$'Of*XKWVzd{HZ8IKuclUR̖[+OgXVpIϏiI/o/ ' ߬2a3ZFڢgH~$H38/| fǻfV#`>raI -K!E3[)*i6k\XIgu晴,l,fkdGd;g]]d3j-pYJʗ)\%4OQ`ƈ%DW͒n~R|i(Y?KzCRqQTܼJCK6]U { =ʎ(+5ofsYxp cS}^8|fv xwV+#լeDo7PY%NNdf''%'A,i)9Uқ~Ee!)h3YfH; %_>w^=wu*tOm r@5wR̞'<սqE!AqcIӃNU)nj_s 7 nM3 )8fv?>g\JFe`3oapv\= (>gmXǕ=ʋ"O;WLA J3u>хgm|'xC$9[[>mbwP<;5h]v:_ U8H`rQx;9̗& Yxwwxg!/Kdj#g{Euڗu o$9(ih|C_`ܿ馽MdA3S{ϝ03[LzKj17uj I͓%[o%fvd[x>D) fIT_W;i׺nɻ̪cj)UuGٵ/'N^;'MH 5]u0n}yӉk_pkDFyfݎw"eN/L f}^۬ɪOŭ|[J=fVkXϐQ\QmVS}\*nӫt7n1[L7_4y`،*8ISi`_P~efԃƓ)XwXrz5 \z`U}_uUKD+#X˱+X×]ٲV{}JAV *7+J '`43ۤlY5cSg̤I<'?(?>5F!Ē ]X2@X`A4;N 7蘮:AЖK}: ZXa&+Ŵ^AЫ1'%([H(XAIJBٲA4X# h5֒`)6bB H ~ٲ6B  h]m`AcpvH["=A $Y.1u˖%Qp0 ʖ h i% 0qA/̞ޗVٲB kbAЊ *g" #i`N3{lY r+ aAA + AA`f)[5;[U!fb ̓ffO~` `p_R8GΏl||:3]aV^6zVGvvfvGR8B ŊNUa̎TP jtz7;6nm> 3V( f? .;ff|I ,?h$ NO1s]F3#@oUwC9'_>5bڬ" !f&3{ ``a3{58ש<_rRϳuCٱf&;ݑd<ګ}bʳX7잾?rVR݂}5Y'aa YЧV;EV6ؽmM]ڮa3\I[h̟-XVfh~``y` p e:lofGw&ty03pYfv[=W1fv=aYoHz Aw}kf7W>\p[j] i`,?ކ=,i\X4pyNgsI`2 0 N[ :Yu_8, |NB%k5}H.&k۠!3*/i~,G^;:~̮o^n3~1}kZ,>kn\u):TqYLJjc ݲ㍢]u^P#d IDATׁۉ?*x^|+HsC,IvYogˮW|Vv|fvԞ{.7Ky| xNYee>p]\sZqGU3F)gL~nSqe^Umy[X+nnNIwGcxl5dr!a7GpK֣4O %}Gtn8 x 80u{mz'p\i8lf137b #@<U/X3dݻU\7Rۥ|[KX{_-UL܁resY.K#t_XP ۣ'9l*,7b| pw:~Qr3o[Le'i53J/wf{`_`^ 9=Ž[pQxgג1W$- j[>I ,+hJmx -VɿeTGԢlM6>+ŭWqs^ؔ*['a\X,,,[f/~;\=FpP="ҶbUYܢ)h1??ݫNYaO|cJqg׈_0w +_ԑajLަgkk5_Mm=nN9EU@nWo?֬SVX@oqӬ5gHD23#Vʎ9^uW=^E _EY$}i闕%3*6"0+ޓ|&S*V hZHRGEX$NU^=?of g_ܯQvp_YZey,/X'M#],8W&un*(9oKRyMWIPYP2Y2;>.LJ֪=p`z4)S| |윮F4!Ok+z~8nIIKerRկcnIYuwTv|‡,`fj/$-fAۗ UfIsk-j,+~}!mIaT)_-ǎʬ#`/ |tYj$?;O|Y>eqE=L0 _^ǃq;L3 VP&~VJ:w<P# x`o|B89*F gZYVAIЈ\I`pDmj}-)P-ʝ(d*N5AS;{̈߼;{ssG$ {S< =JzFٓ9Ev)[Dm Vg#5[5=ۀϐX}I ;ST!~&9w4,%xOt>0s53h-9WC(|y9k^roШR7%xV5l 6 =Ȳ^3 .cT{^С1J;5wr.`M\.^ɵ!S5%s8[Z=['(jݫí[,9NS^R9!ץ fQ׮{E,|՜  K\sw[ /S.ND2n;tX image/svg+xml formula (ModelDesc) [Term, ..., Term] [Term, ..., Term] [factor, ..., factor] [factor, ..., factor] [factor, ..., factor] [factor, ..., factor] Left-hand side Right-hand side patsy-0.5.2/doc/figures/redundancy-1-a-b-ab.png000066400000000000000000000141031412400214200210710ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{\TeƟ 3 wA$%ZeYVnYfWL)(]-ifn7xGa~~9@ρ̙9>@ t4\ Lx3fr^ի/''[oL& NM>q>Y,NIIɼAJZ{Q0|pBp޽[4i{A-]TjjAAAXzɴ=O.\1Z)--=YXX8ܹsqiu:]#GV+x<B!v;@$tB,vtB"nC$j"00<v\.BEA$rrA"p0ㆆo߾^X,H$^@ @PPF#@&jrAPnfAPl6#88\.:"bF\.Rf>2 6 v!!!p\X,|0LJxza2BZU^]|>$I N!!!?t"99˟8qbM9s&3)K/G'}m+6eFnn4hRHP칼 #1)l[!Y1}ѢEgnH$㿢5:SO}}= hJ5˗* v\=2 K,yzL&v\=׮]Cjj*Z32Lp\"t{bLjqEp%`l"t{Ck&Xuuu!^/]Hkp0h40\ЀQņ`z&t"##Q\\Mk_Ο\ Q;mv (f*g%4pQn-uY ,X0ԩS2C r㘶VKpP7Wݡ{}CW@՝bVL n4@|=.~}ADS\1j78UT'Zxv=, 6l̏fX{zu!#7 㿥.`KКbq Wau`i\+Ӛ ֋/t̙3uE\ykryPA( \n#.j:L/^SO㪋qܸ kx e W{gshp?M<9 hvC Q9 "_rSȆ.OXXL233$XqOZ|;N8Ok&Xiii5MgeDF>8Y tm/<bZڵkرcW]'٨k>嚽P|<=/m!{Lj=߁m> \ɡ# 77ũGnn͛74 {Ѐ5FDD GQQQ%֭IE Zt:222Қ ֥KfWnlff0 SRRn<,$E=PRRNkU[[+z:@L&M5r>$k1LP*hͼ&$$xd2J豲@qm+6{AFFĞ V||-(((9a >P^>dÒʇҞp8φΚ5k ,X[n 8q"LÞ[Q^踁ڟm+6ZUUUc=Vll,%JqFDDD 11q+`͞=LM& ÁL&kHg̙Fv\=ؽ{7| Vddd]`[7rn5 =nD"&O| &tR__q8%zR)zqLV\YDvM&L&ҥKҚ ŋ v\= 6mZX!C,3{4ALM&Khh(V^DVE`Μ95x,x^na3y歍$F:Z3:x rNiԩSddX,h4Ghk̘1Q L\b`ʕn^ Vm,6(Ak&X/˻" B"** ?&Z30aBWnACCaL25/u d1_nu.و3swp$6X_~f5w̦M ҙ};ed\v ;wd{`b#k,6d2ݛ` V^^&M _T*… Gk&XYYYu:;X-㭷ZHxm4>o*bqp8۟pv]__+Ҭ={0(!!-fzx"{n|A=0S"3.X,\.of͚Um=uEH>}`rL%&\T޲ͪS6r/ />8Z ;򳼭1㫙x'%0O2x10] V\oZ3W+VMv; 9Bxg8Rг$}=;ݧKo2QPTD>'qK|{?c7szu|~wGY7tEj5rssgO]T*U};g *0˕xS-^8`4gW>kqΐ!XW^is7„ B_ M?ls3x<5s^=iq,N~r9 0x|k`k{}l'ZfROAvq}[kkP(W"""EEE%fURR2jĈ xmTj=fփu0F RpjRٯ{ Kz},NWVZ=,X  j;U ׅa+=!`'wI|/{mٻuz݊˺0::<zJz2ccYZ=qwW;hlڴi9`544(9n.t8#B,eO4ʪ{=5{Zk5X1yb|cwnu_cۻJgMM%j?8}nS07o~ ?:K=fNk>@x6f;F.=ׅ _AFZ7/8P(&;pH÷zD@꠲Z1<:$nޗY,m0py]8T"A.yz p\u^{5d c!#bG;DGGckh\.ƓQQXLLyv<e1]qD=€b 衭KLX:x ? Ep+/wNKkE]`:k֬'F;a {w @R`UjOCnnǥ/7|O aÆ}@ed;_\.BBBV r*(x֤@!B@ ) t @)B@ ) t @)Bf3JK!_,xv1GkR @f3LFlcD6 $;Vb R @z=Ν;rW||q2m/m$zLkR @_3<1LSOE `Ȑ!Wh!J† ަ5,Ph `<{f7^BR0uTfkh&XNSHLvL W@pwsk+Vj6 AV#---Z @ 0Z~}nTT^ݘ[ `"t{t:[>+N0E ܆N+ tjS w/4YCN) n-`U\\VqEhZTUU5) t@)Y 0{lRH6 UBWnB@\\&A,cҤI-L#i@@hRᕴ&A&aٲeК ;:6C6 N';Pb !,, k׮BOSK.'˿xt:yW@!DEEaƍfu@/m?~25/@)`h+**JO6^#f֭[5;v<& ) t HNNy)))m6ol5HUc-2V5 pw >:N( Ea̙ /_|>T*)HQESZ(jll)(NGQEQ&R*v;T*)NnJTRf(R*^((ZMQEi4JRQEQR*磬V+T*)E9NJTR6GJxTՌG^x4ͭzx<t2V+`005 㱡ouuuFƣVe<Fƣfc<\.<*ʛ<<*Jjʔ)<,Ye˖@q%,,xOTWWJLLNOO|ĉg}vNvv_~n̘1//cǎ[AApϞ=8PshѸa͛/^aÆ-7oްӧOW>3ϟ?~ԨQ3rrr8p4eŊO?T4tiu׮]<۶m04445???zKbbbzFFF9rzvvߏ=ܦ?\=vW֮][]VVH$m߾=`0,7n|ŒaÆ͟7o^TeeO? G5eѢE$%%ܻw)>>]<6m3go֭˗/XZUUވ#o925;;W_U=z?*++O䗗ѴIII\޳g5>>;w}6mA]]{_q߾}cǾL{iǎۻ}+***'Lp 8Eq$NN?pIENDB`patsy-0.5.2/doc/figures/redundancy-1-ar-br-arbr.png000066400000000000000000000163171412400214200220120ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<LIDATxw@gǿ!aaVբVj[Z^PkuU"V*hvV[=nEKE$`}>{sw=_EQ K pDEE5XWRRұɓ''x۩ۧM6W\>..nRlaÆE ,' GEE}ΎٳV={&iرc͞=;O:3.+^| .k֬9)nڴiƊ+ܺuHR_5cǎ{uvuݺu˓M2e111r ry'(RRRvO:3{{Ҵbcc't:u֭HLL$;.Z(%==˭[ 8𛒒ɓ'Gkno޽]Swlƅ }#**SNҳgϭ@s)mڴ9gOM6V̙3Ǩjjl޼9s2el˖-9k֬Y0L&mܸq!qo߾,++kHnݶܹsJ``O}ZѼ<899A$())xxx@.eeePTL&;lll J D"899Rnnnd0 PT(++74  <==a0rѪU+prrX,}5 :4lx5zyyABPh4<pvv6K^Gii)f!H GGGרQ.C?F;;;UӨjUFB:thQv+::zWdd1Ѳ E-Vak?D]?̐4}زe)wnڴiPTy3aϏ2P_`P^pw*Be @PPAzy;w777(#X=4hf?I,[FDze1.gXQEz\\\{c-^82Vϟc}CKKK-`"222crr˨"X=vvvprr*kX+W2V75cKQ*QEzb1bbb&5sT(vn,c(W`ö($<Fz1V\\W...nY(<ė !-e=f5k֬kTxĉ Ng/қbػ;>L뤼\  OA&0{9b?~@.]p؞p 􀠃/d盧j=p.Xteds잂XPݳARA"kX?yzz}*|ecS*hV+Eǐipsk #kvc3(i=2lڴi]3bXH<:uڽ2?腿^VCbvU*֮];!! PwChr`bGBʃJ=Cd2ݻ]3G#f h%y'ކRA 34gڟ9zh=}{CnD|||{1c :*yc=|ۀJ},KΖSfėP^BL;PDǎ31VBBoQetoh4/|(,<%Y[[[1W(bbb-ʊte?G@QMu8vVl(,,Ķmۦ5rqqZFNg4? |p8P8ػJ&Klrx<O5c ,ɢ?oC|.z8JB[%0m~/ޛ@ݺ΅ۍxx;mA;{_WsvvFBB?n֏LJ5X|!-"jo[@ѳ%??I7nT2Vyyh}zA_Goo5c+Vld0'x[>Vx<-Z+]3WreTHiӦ1W$ɘ2˨"X=, 'c}W~~dy\r]3ZjU89b̥5c{u"},t:(Jf5r^^^QEz|}}~8fm0,`( ,X`,]3/J-`( Nt =GheՀF\ط1YV,e\ )"TEYfC/c|!u‚׶@۶mr0kwӂUqpVŁzDGGT2Vzzx|||SC3%//c^! {Nk5cƌ]*}sd0шzXތ~p,@Q`5pssc&`/22/"Z o XbBTP. >pΚ_2cϞ=!_MeN^^^wZvx<˨"X=7nE&F!??gNkX,K&<7rL'/ɣB H;c5f߇`@EE ]3Ɗ2VOT `u%%%QEz0z `Uj# @0JTfV @h0z `UkΜ9ZF @hj 9sfH$*S5@1ԼPnnnc-Xg˨"X=...HJJ:F׌Gi撟ɓ''5c,VB&^# ǻM׌VXLI0wwwnkXK, S(5E CU*eT^8;cիWxyyaʕ1ֆ j˨"X=%%%E׌]L&*գjT*[5cѣG\6lXD׌ },< G׌?>y'ˣtk׮|eT???DEE}I׌BCCQEzt:t]>VdQQeTTfժUd)\z칕c͘1i!b*<ŃN\)>eVs/]L+ŕs(h D666`3Ɗ+_[[,f};B3?VWcwl2,6]d|#*рIai 0 0x>x r$w/Ď߸ ZdZ)6^\j6/Ӻ$߸ zqs{kf (7` ޒ^űɵ̓`WѵYo RVD5>7†ѫSAQ֟_\o>@`۵oA#jmC="Aá7:*DmS?:_Qʫ  kp8O5~;D[M}K|$Z 77W|e.0P_nÆekܶVAYxG~パ75>އ2blwd2eeaC|86߸s2jP%D_4m0$x :;k'X|2-AvS rs },9-S:'H?]Ks]4v6vйps1>8׺S@3!egz VZL&{/$`anL-pt-[uv6t{4Gg|u[RY Ӗ(hB}u1>5_MjLGزqZ4JC)|t^myt"7{{^}]tO:dj7/2j$V `t>#G}a4y/GGE dܻsz=6^$Ҿ=|տX1=%)#R3g!f: ذl0TRV)'P3Nx!m65Q*N(RA]S\'Gפ~#H2!---d֒/a|TH>{m6}LX%~5:tÔ xٳ] W1^(:\s۞LHshp$ޣ)WfJxLd7mXlZ-1VUӧ ]j;h<{{Vjh[0н`?VqrS ,X]l!x9=|S>k\ngVfqf?d/f?jjO~u~AAY^a0`#U㎨>k>l7!]ܐ\6Yx[-M{$gw.x>Qmxwj}%YEW#4g/ ѯlEejDhkmrP\KJII\.Ǝ[}: /B2 @ < ...YtBM!%%1… a3s)**ԩScZ2VÁ})]3Zj՗>>>Xnٳ+*,3"HLLdffu̙p24`.O @ FfͰhѢf$EV#== ]3ڲex2U$\*h`P8p7tm `cc+j5=Gg(OL_#+\X, a4QRR8(PiӦɡF#% )\NQESR(*..)*//(R(P(z=h(PHi4 R(B!UZZJQEQ"(H$TAAEQ%(PHF Tyy9(PHj "XZZhT*5jQ12(HEEEfkc43R)Q.3j5,BDcEEFPHM4){޼yT/^̿qw{8{~E%&&N8q),,lyJ~∈Q˖-gsԱcǎn۶%D7n|={5sL}5k}fff&͙3ѣG'-Yo>=zL222gŋOo޼90//oϏ1>>ٳi48qJXXdCDnڵKyp˖-lL6tݺu\8w\ѣGxbdzzZNOOIJYx1?;;zx3gׯ_dbb gƸ8Ϭ_`yFő#G hҥ9{- m۶S;v0~شiI55t{jh~$&{`h`IENDB`patsy-0.5.2/doc/figures/redundancy-1-ar-br.png000066400000000000000000000126061412400214200210630ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATxyXSw/BIXdStıiuںNV>թ[1SEmDXQ@umGg:Ҫ㸌S6.mEQKزAHB¹\?Ms'9'9z0 m@=8 .jmm},++  /iii84337ogggT*G,\G^t鵑#G~ܼ{[._|Exx~,X|dɒYYYjz֭[׭[7ʕ+u:رcw'%%]ܴiӊ肬, _222RT*N7 4D"9 i\~%K̵ZA6mZbŊ&Q.n߾+W4iҤxǏ=!!aϨQ'cNNθ.\QyyysM9rgXVV+w>`3sܦo[t͒Ur)))|222Rv;H*Nq[la3Θ1"ܹsSv5JTq`uu5V@444a0` H`6a2 H ^ xzzB!00PG@@t:8z=D"L&f3BCCX } h)hjQF///.3bh4B$  rh4p*nGcc#V@ARu`0n(7A!!!wh41tP|Wɯ5ŋHMMm{>__9?l~.NFt{yde[n tx+<Aai*=?`0 TTG3F\\~1rjժ5(ɓ'_`kX˖-h\r{555ϗ5i,///kRQn#<<[&f͚b5('Ic^zZcckRQnؚ4`g\r{gkXCCC]r{"5iqMMMIE=FylM> U*Vcٛ[a3Y _WI=`4VffF>?s+uWh1c鴞f@hh(75y+<|pjuzǧAŎ hA tO6[˨+Gs^+ӧ;%# 30ԣl=>n܏3gC>i#x߸0a2j5i'Y,;V8xOMiw:;=Eġi2. | Nwu>[~֤<<<+X]lMK*~TnjJKK?aGr .8rB+##cJrM*7"I'nx?V pxyy8QWW;w.bkr\.5X``4MLǙ+&S5b_ \1P(DLLa&C/tv??u[WIXZpzQo 6lAH4܃] -3c*anrQ"NFT4h Plפ8p )M{ ?0u'_߅X,ؿccc:4-[ GvY0WǠP]]K.@c tq[Y!!!=zv&X_L_?QP90Β%B݅[ٚ4ŋ_JMMϹ\,<:X\tN&T*޽;v[k~~O*1Tϵ8֤ IE=???̙3|DP4&AϡA.M&P(Ivʽ^,:@z,XVDKGd RrR ~UB9}q^jA!՛' Hceffn wM*텆ޞlckXOuCCkRQn}@ [j Tk oJe2P("`t+$5iIE0$''B:@vjX+W|5(jQ^^w:@+r|uҥKjפ^]]vz,???z+@WfggvM*|Z5+VM1USSwKbkx<Ba ׭[.I9+(( k׮}h4vEC lMfdrM*pȱi" @9+$$ٚ4͛777&$5i˗/]oEQ`XD!5{?гBY͛;OcQN3dٚ4֡CO)gFTWW'5iÇDIE~a_5iaÆ)\kRQnjرc XIE=N/va'$ FyIc^N96R?,^x)KrV_Bm@޽c\}n85@ppJu @; 6^?^;c8R!7']֣t:X{ oh_^EY_}Sk]ow X3@C0o`|0|88дƁ*=ԣGH$~~ӄ /]b#+//oܬYɳBjUYoU*v;\S hZ*,, th ͬϿ1l:fН_pѢEbxbכ沚oo1r/w uɟM0՚ŀT|>y 㢢zr yyy p/~F/yb555At^{رq|~.txF"󡡽rxzzvUvBٳUY H9t^^/063'#O1֞={ވ^xE#Yo $:D؈W>С|D(vyVods(xGq\jC=Y 0s̳;@ ՁVek:@Ǐ֤""".қ)(gu;@w>z,ooo$%%5WbӦM,45(O O"%%e*[ƲZB,???j&UPPPBEP}BV###c<[T^AP} @ :@t'OT>AP}Dߥo HvECPPbccO} {x;wM&%J @Q=!H*ؚ4B&H\r{ؚ4VNNN2lr֝Hci4-tvId @Q(,,<Ez3,VQlMڵkI/Yv6NkѢErVXX6mڴIc:u*d0bŊD&7L&SNDD֬Y9[ o Qjnnƶm5iO?t]*rV:4Vפ^hh(&M5[6@]>0 ž{O\ ? d --mי6FT2aat:0 RJ0 4440 0hdJ%cۙFT2---Lkk+T*&aFT2 0L}}=VaV2 0zQ*L[[c6Rl6j2JinndT$ccc#eF+V+h6IF^O2jZ$J"u:h0Hff9QTޖJ7o^:OKRٳg/]~~4**|ʕd2r>3??3\j9Ek׮lْaÆqUUU/̘1"ܹsSv5JGe2b\.[vvv:i.,,5k4ay}ѫuuu?VTT|x|\\ܩ)ST+߯^L&+((8.vߢNf&n2l˵gL2L!WWW'H] xbҘ1c|ݻ'*11x||h4?CQ^AJEo-dIENDB`patsy-0.5.2/doc/figures/redundancy-1-ar.png000066400000000000000000000111561412400214200204610ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{Tw/n@ 㖛+=U,;"Z;g*uEt9lWZ:v#ԩ+BByFl6|OJs{}!/t`(,,׷#99TIIɧ-*J-eeeYdRH䨬S\\bػwoƍyfVaaOIUUђ|F3f477O˫SN=Y7eʔw^ŋZVV|ܣP(LԟQ.7m޼ÂwVXroooݻYfV,--Nlnn~277ↆcƌ9!F>{;~翬@<\~qXHO N7ƚ9scbbx/_gSyElzd_G ^wyO\H'tLJJJBMMؚk,???'}c#;kA4`p~?߈Թ{p/صZصkTBqtu_ӗűDD/gbe`h4ڵk9lͽc?QQQ‡։o  N `2}W"Wo#-u6fLٳgsƚ;wEc|1xG8H“~>m#{W$|@ؚۣkk6??_T>ׄ.y}l6ycǼc^_1N_z饃cÅK4گ`"5%ϭ`"@/&&ؚk,Bq,""BT>֩>ZߡVߑt XEEE:::Ibqpn5 j\1D" ʇED$8e4!2DX !QpF^'JR\cmܸh,7@w5U{$3#=Iޝ@S&C|^]]mIHg B]-<< kժU/+_Euc/gc @6ç RTP*%AeC~, gGy>y L&ھ}^@Fwl6cƍO5X z03͸yf[s'H'O|D&켪Az]@pmuh>GWw@$&&}kG}M*¤J 0rH;󭀁fѣ5־}^W(6m`x |0tsRSSO涄XL6]ޱ/_7a""""ؚ{ZtDxOGGؚkb1eE pݤ:wB"r3I&h4%#>MRaժUsz'K&=DDdZ '%hn 0UGOގn"FcfVOr_ 5C>t(**t:t:3R0LZbbb mpKRx :x' x:d450@ رc z86 #^jkU$k`@&[s5nܸ/IE|^||<5XcBCqp9h4<X%%%d—NCccfD"L*45} MMVch+22IIIؚBIg׬YKD|J¢EұlaK,C*r{KRH<}),++O<\c1 fXIE";xRwGhk`ˏ aRg`Xؚ[ O>=vĉD"ѐO&/j:cf3Jh~V&yf Ìak&OCR ?Æ CAAl5֤I:/Woo/N:M<sf[[0xԋ4@%ˑ]փ4 <mjK.toܿ^4@x  ^A7Y4@x  DgChiiO&/^f2fmiZێ[ :4g=,X8ܗBDGW.s#Oh+Wi=zQQQ¤">o? Џt:Bؚjhhha  ^ߏ .˄ 4@xH[[v= F/ @jkkŋ!L*BCCɰ5-ēh,FlMWRH<Ϣ+h& @<+h  @+ |Y U¤">/** iiiؚ&qinM"@.5-ēܿ޴it kZ6GTD"q׶RH<})ܱcGLArիW_/E'/gϞC>r3ghS  ^1w ݍwyg[shH  ^clMGׯ_0`nh^e0X,`N6 fpaXV00oͣ{^Zpc+0p\0n0q˨VZh4VNG2v.cww7=Vj`ɨRe. ===\ƾ>^--p7Sttt>rߞyuYYٜK֭T*H$uyy#[o5h4&WWW_QQ1aq ,8,mjj:<ҤP(ZJk6mH$UTToٲ%KgϞ+++ǩTQ[ly߾}---rssY,cǎ=}5smݺq^?]v_v-sGZ[[%SN&''[80OFBq6eRNN ~vLYYٜo֯_AT o۶4pܹ(όse2sJ@@)V5v ha(Cqŋ䜐H$κ~jFNRSSs͘wTшfff6dddf3oЄx }CPIENDB`patsy-0.5.2/doc/figures/redundancy-1-br-arb-combined.png000066400000000000000000000145361412400214200230070ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATxy\SWƟ$%RVVֶVnցRZ@].VѺVZj֭4$,IM?½!"&P/~77χn\2URĹ(d2ծ =|}LJ \ST*UWW/{.T*agg'''\]]Q^^^BJhZ(  j@(BR vvvP*pppL&JMpvvZ˲pssVEee%<==Q]] eY'yr%ʍF8(fHB8 (eYFBMhh`fYPT, 'vB! 2*@NQP,XnшJ4F#( Fe, 9EAe! ^˅@(( q߂rLbUOcMhD_oo|IoGu5s̢H߇ϫZhUZ9L~/MB MZ‘J#tx0##x)T(^A^~K``&yr '˹ 4ZpZ6 Z (}ۓ{a?:RRRcQۨ"{) >^^9\߱VFLXh&O.++*BHǴiӖq5orw^oUvX } WJOOO*BM$Bjjj2WJNNh4QEh j‚2Se^;EIhM&0 cռ͛qF"od4.hfB;S,ƚ5ks5#E%yc-kNS(p\x4ކ8ևSnSm.EhH) Q36Y?/ d޼u n,,į^=<&X=s-o,uECDdMxc̋͟4+ ys s@o "X kw6)SzRX Nxc]rRb!Cr'M!J,@ `DX ؘb2NOyE(l,, vގdr@Zڜ"b޿ vħgxW\Z@REKVP8>x=N|%>$JJm ),B&[] !֭ b@trrOM[pDF>z֢[7g>jhRM qL!^Nb<)0 W_nmg_Wx֬O];pSƎ hB~CN UUwgV { Ьki4de݇ő0֊Uj E_U b |1xa~)'Rմ#wp>o˗-*Ws {+wGj5yNDD 槌VtRnʺEghq`F삾}$CIqÞݷpz)N~O-ۅj&iغ:f[珀C1`9$i D"!'O!{y:2Y/(8;9jX)~J={vjb;Գ_\./&յn씈ޘ9{l~{Ƅo>CHsgLZTT9vܵykR{æ̙Ңz|Si؊h ib1UTO~ iji )B*p#2gn )OYUX//kubYnk@Bx6-Iш >37Q{t?e+羠?cT=W?H1@zio;ooT^-><> 6lP?~qcJJ^zJn׳0 Οjz.hwZ ܊e) ?i!^đo!%x>55_пXߝ=JҴw WqT!^x'Wrq`q2[|~ZFnnvM>[7'Lx&R j! I_% 0 $ۑ#ʧy'26^s'uZy99*d"Fu=cEu>U{E5o(aûUTJ#:K w˯63FghZ[ '/jX=Y99&b=/91kq^361lxz%Kk-t&233 @k-#q K&;[y@S]^uR*FV۠S';   n" ycM8kuVV6g w>9h!*11OM&X ռ &;o#Ѿqit޽i=k~1O   $@*4;@-5W\Ԉ:f\Qa_!yi6]+Şݷ5;ȧ?? :&;p[GZQ6ӬHHOU(زH(lޔӪmx'mȴ{xH1bDݛYaUUU"TJ2%槧0t9-L#!BNx1V;9ifcucKd2|ZsT\n 9po/]1{aa%z|%Ş5_1]P0X`ĺt'@왆|>m^bͪŸ BTpXC<Ȯ_nVr7 t<,W xSn:d/R}{saLzb1lB[eX{j ^}˗]͟y.Mƍ/p5orrXC':7ֶ'(;J/@ K5p `MX\rM TVL{aX᯾)8wnUW*+6PY#L쀳grM}K Ga8h2jO?h"N4 'FN;"z="eXvv0  &JzŠ  (ܭ p_B,`hDrakgjEO& Jn"("Zz=L*k5jYjF@!6x*zDjti8j ph55pը!Xj0@WaPQe(p_kF-ˢ$0h,G|IIaFF/@@\}0(pBqii,T*=%4'& 'IgY. Sks? 0;W:x)\ii S,e"--< --%bxHK _z4a s##[5a w& Mgu~~[9+\]r9 M5]N'& NR99( BO2_Ghhx>&&ŋc|ݤ-[RRRDFFQՃRSSE"Ν;[XX85 633ݡC~?o޼k׮qo.]d۶m#"";ܹdLLLX,bݹsgwd2g92sܸq7w=pm111g,YKJJ*<<<RRRDEET*FPX|ҥq} t:ݻGGGK$ ܹ8mڴOr݅ lohfeӧHG^5w)X+`PwUs\\\0dȐ6~Lge71իJ$"X(<P(DJJt? "W,rDFFΡkXEEE'},t:h4of5{nnnQEx<==gϞ-tĽ<JB\\\ ]3}Y&GQTD 3f@0W~O 3Q;2пDFF&5c,??|>I ̣#\aÆ[pɿ^+==}ի1qD#X.2 S|4c  'NLk抵bŊc^z z1("R NNNt+,,,0$$Fޢkhs3Ah*dffЬ)tssgkKV!G111|@0[[[z2]6[6cI$%%N; PYYKG׌ AJX[[Ύ7"X<$@`R)"""5c  =P__)3Ɗ<}\2Y.Gi AyT, @0 $@`H$I @:[f9+\:]R p`οlHCu)BCr ֚B7[$_F]޷ր|u:UVb-ҥ_T ÿ_žO ] 0@Vp߫_r>sl̦c'a0Pرz_Ex< 8<]3Ɗ?ǫ'Lhxfھ:ǾU0+j4zd/?_J<^_#GaF~:W9sZ.t_qXܿg am‰)?&e Cg&فߥ d/5u k}⧰:.v}\|> llgy"b HJJpST*U(+\-a1ͻ 44B-X,+TTԠÐ#qXzc*`nXVH[z)FcvmlϖLpFBjlxyh2υ\^~lx#˜1m̃JJhMr8殆1֎; EPఛH -Cjk x5._ |\plX=֫,<8uIuea%%jDjRRR5cԱӦMѓY tQJBe\\7MZo)1fԗmhϼ-=C#*=8V4(Ϝ{~N.#11q֭[oXnݚPXhoO1p`䒊 mΗo6xYiSǻM.Rvܪ%jp8,x>hr{:Atkܹqf']Im4ךƨf2:xͶV8///&5c>;/իb46yZI@,n X4KM~E6uK96_S0.\_Dnj1L+;;{С}s=u fԛ8&[7gKqh2LJ?P1kNu)de݇@vœ0g)چR X,+l};ƞcǞ}/W2zl.>B1sVG= =ۅ娬AX(ɯ =ƌmΆ-;|dp9&O~ l;MkOy2ں6,__>5G33SrwC|vZ3uuX̿'&X\.g*t:Ν;hڱcGHe M/U1~+W= вn 'srrM!x{;SP)\z [PCt<@৾:՜LJX,\\\@) [EfG ~?$ڗFohJ @0 $@`Z0wIII B# N66xo YOӭB ǎaի-oaO?aSwoi \%@`Qv6 cǂbAR[7N%%v6kBL.bӸqచv/fBL7oлi \l2ivlmAQTUUXӃO72ܒmoHR/HNNQ|:Ȋ}0%j6G#6?Fe`~˗quu}Ch^qiz\٬r)]ȷ1߂i +//_~;`ԦWaafcҀWLJ%lrΣvBylm}Z Dg/~-hjԜ۷[{pS&M>5a}YO m׮!QX 6q|#GZ캪ɓS΢5؄IHN!?hgш<}/?ɼNO'N-$cǎs̹ܺu~Tj4 s*Pijj:USSs7ܻwdǎSpiOryEEE[o}o߾ڜ>|W$QG={axmmzѣ%ïR%%%#'NFif۶m% i0TWWhjjy<Mkky^$+++?|r V-Ck l2wM*G VVVKMM555TWW Bɓ'/SUpso޼t.\_>zBnn.LLLJ`ZA&H$AD"Q T*nC$ٳgj\.ΆI(  ̚53|>x< Pv;T*0:: 2 b1$dZ @Tp8333!33f3\.@V3srr nR BFGGA$AWW7oqڵkW'+@aC+s=IۏL JQ$rly|w05_0~r9^g཮S"p ^zz:,WEq d%BIIIܤBGoYX S*ܤB'HYXrIxb3<On(&TzfVSSoIxOP#GzYX~q EQgϞRzfN޻woIV$ɐ=$D"4zfVMMoX XF;LMN$O;63lhnnpbb4^׷̒?eL7n۷O.]BfTs;O_SFE|n3֭[/JVW]7뀈4 0)))t[~ C!=3 ?^Fln}v1=3 ^LM*{gφ 60 cM6 ;H$69i|b}X,ܤBgۡo%=3 K|M*{JctLnR!KII"<wYVnR!޳X,pY<(p T*Azfaf/CzeddN33or[߄k~*3d2^0maBIoxW #bM,Bgfa=zvJ]>R)3s<7l6عszfEQƸIx/)) RRRۼl&=Jǎgfa?~ t:f=3 ֭[Z*5cEܤB`^u2=;3߿zT0AܤB`^FqF'ժU}bkGGGKxmXĖD"Fsq+D{潶Vk9Kd2֭[c b|bK$T*Kϸx  p[֌$I|>nRgP(dp+D [3ܸq'.T |zfK.& g|1BZ,4<ؚ@{%EQܤB`/_ާTbC*b'77*++ct-[[[pufTCX RTjDBςd屝UUUxb PB`J,@ (!@%X  P|aJ Fog|1B&&&8K@mmmm@= E1f/}׮̚5pbB x|F P\\.yflIӂ [&q ތ7o?ClEQi(.(Rz^ p [X brr]w/'fNt bkܤB' !55m;n(bǏ nݺoBlE"Bs@SVxy(=ʌ_~e >(!fV%CsYX||T$b PBdggCYYY=cIp:m^$ t۪lf-t$!B0$! B$$IvGmZApbᄊ$ SSS$I $IhXE1n7?4c4KP(d LFb({d4LLFdt:Lo )g2aVI/c$/V .W_ z]] ^ Kccc}}H$"hii㏗9mmm;6$Eڒ@GGڼANgo(Z+ ǎnhhЎ)N:YSS"4ܙ3geeee]>Oݽz…ɸ>##}p8u%cn[[۹'N޾}ݻw%%%=s={v:~1JKK/tww.,,R^^N j\bȑ#H$ 7)^&%%%}~? H|^ӝxbЊKAD;;;dg%$1k| ?Ok׮}t6mW[n5klH$*֭[/\GKKKW[ǎصk===9k׮CSSSƕ+W^=0𻬬/^yolRΝ;?-..$ upƍ9NqMM|N]WWr:::.((8bWgggɹq،[T*՟KJJBXi65\.o۽{'ś"##߸qk###G9fxzҥd4ͫgΜyf…5cyySO]---ږ析¬/6mof\n݆pkUUoohٶm[b%%%E.KPSSsHP4Ll_|PgggJ[[ 2e@}}}aZZZB8 !Qn0 D"Z- HHHl\. l6r9N'L& XDEEAB$!&&qqq0LpݐJlDRRaX v100xt:@$A!22#b}e D"cDvX,HRb``rq߰0z"::wh6r+cDD<2v$&&hFł9s?_zOn[7|㢢߾DTI~>L RY_WWpdNP!L}$\EBB233ϱ]o߾!..`$EGG#??2;s;^=ZJ;s ' zb߰3X{@&8ܹSvkǎ+IEFAQQ;-lp8IE^DDD"Q;sU]]^RR?HГJؿ-־}Z"AOӡ-VOOcX[p\Bvd~R-Vccc?(jzC;Nߑd–-[{.]ܹsI`46m͛0l7ox yJl6z,vkɒ%*ɖ+\øU_L"F4#_?k]0K+wԀwٙ[7ҹ%<"#:JzϞ˷pqOI<<% <6 ^P(.Μ9tRfO)L&\~=g9s%>> z& {14017dpfʔ)(((8bXs'gWӊP%@ BȘ pyNX\.ٙ[m۶UTTOs^jZ`*<^DF -n|/sxLz=Z[[_|WORSS_0_rۖ L.>"Z Rv%0pD5HT=;suctǞxG"ؙ[,RlwwZ-֭[ǽ*K=488O*BBB -֡CTSN' zrdgn<8=coFfgnrxkdd6-z嗏ӫBd=zt;suO*, v4;sܜo0(Bb'[:?+J ]O:̖^TX?pD"^k#A+*j v;Ν;pFF:~ -- ^ ʕ+/8LJOOo%JLLĢE>`gkÆ _ 663;?GCCC;s# (Bb@zď&O@_%^hLJjZd$jl޼y;O+%bpMR2>lW!ܯqe2;?P(43ė<ʇ- oHp())Ybuwwbnnv(d&#%%~ϣ… tN6^ ;sΧ&o3w?Ѽo#""x GQ:JW[6 **jhhXL?I5^ b͞=~R7e,[;srJ&::T$\.hZ"̭[Gz=ZZZ=矿[8sҥK[ְųfjCx<QSSs~߾} 鵵>|8٫V:-niiY4m4ɓ'xQPO윗wgff^,((`T*򔔔wqYTD"+**\.WԡCyqT*QQQqIT>ϸf%g0LBq>Knoo_wA,׌z>qٲeZeavvvsVVbp&ބ&g 3F)sIENDB`patsy-0.5.2/doc/figures/redundancy-ab.png000066400000000000000000000111741412400214200203030ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{Tupa5kٯ,Գd,l=աfe@ Q ENԬH[21W#/ _aIa44ygl7O30<ϛEQ i~L@#";;{fޖ^fD-Z"((H[UUe^X,ᵵtƒↆ/]F ikk˸[f̘͸q>qĝO'&&D{fN0+Vx 999KUUU\%$njsbڵfffxkrssv;{˖-5yyyt۷oHg|gk._iӚ{7n\k^^6B\>l6P( 88@@DDzZ `pp@$fNj 88 Z l6|>t:p8  a``!** nܸ"##pVp`٠T*Bhh((J `0bMA  Q,lBN'hZ2vJ0p8 ˇͨnd4 6`ɓ'áCb-Yd_VVVzKEJ4rJho}}?y)#1O2 Ǐ?HF~x_n]3g&yӿgXVZT*I|^oo/3Y,+b&y<bbb3Y҆HfR!a,֛o9O2 ~ZEdz}je&y*gXb&yB***v3YM66̤B>OTBnnz&%JBr:`C,D"{LL 3ϋ-[lgX- 3tWz&u9'ZRgX)))]P___HdX,?ALKw}7bMB^^Qz&oWT̤B>.\bYVd&y  /3Y:XL*bzfR!P( ''goJr 3cXfL,66TD"Ad6o<4 H$ z&ݝ?c!wY,gX?{[!rWLL l۶m=ź~z`&y6lؐDdj533 J,#왃AҾ'8G#w|;vqz<֬Y9=*((x~4^ƈϿ@fhIIӽAji;xw@B4X7u-D}6ڌ5k怈afJĆYPh!p#Dy_4w'zW <=n. wԦGAnzŝ;a)a%gJS A9 ?2<<>gXoV``&mEp IP;s+lY '=릮Y gJz ;@ѓM?<30'Ԝw@fRFr, x'C릮 Coowf555e:.Hr)) 2+AmVf^9|OS~1Nr[lh,L~UU$owD,Cuu6z&UWWHZX1Xv|\_!p x|ްwe o~x}D/FҌ~X.\xc.m7/y+lJ,'b,VJK{-Nf0w*=1Lz d꯺==..FX+gX#} 2wF~~얫-CnpRvB!pg؝n?/pY _+>o#y guyK`jӣ +92[VXq\V2X܀}Hҭg Fow&8 Ic V P!Z[gu^Ë/0#nCDp8\3\ɑa,w o3yƚ:u4 T ́}0wS{\5\&<>G=cCM ӇnjT;?ZZruCpRNxm`wX,p{Ex▬ KpB*U@֖EPTTi,@" 11q=c??? [a,@^+@y X  W` ,@^+\,^x"w,j2 <^#F#(Jr%YYfK(2 <f??a/Jr2 jpҥM4KOVCWWW*=c,@^@`ҥeH$<0a„O G^;//Uvr~ "@yχLkƍOa3]. JfTl "&X <"22ޥ.cx2rixrnNX <"::jkk+,ӧl6fR!@ԩS)xh2r bz&c !wL&hhhxb566DQQQ=T _Cr{#4~g?8Nh4!F(ir t:)TJz(REQ\.r9EQh(`0PRԍ7(TJݸqlT*F#EQ%J)VKQEd2JPPEQ*(t:%J)I RRZbR)e2d# ɨjIF8lFd$u:ɨRHFLvƞQ.jd$d"V[RMm6MR)`=>￯5_Rŋߝ1cD"D>}@RRRvAAW 5flrٟgMMM,N^Pؾ};{[bb+WFwvv={իWϙ3guL}ՔKKK0$$$cǎ{챳앝zzzo͸jժӧO1WRfTUU]ܷofllltZmmmmm;'N|pzϞ=v7|φ+WTRaWW?fΜ|ժUaN:UPP/_v'D"ٳ1ȑ#SRR^ܴiӕ'tbNFOO?C~mƃjRSS3cLhBB7%%%eeeqqqqGL{}&&&TJJ7|%Eqqq1fgg&''F?;<_ry=]U8blݺ 55uN'|{>}W\+++>|xaHHq޽DGG! @T8 B!2hZB! ( B!TVVB8bZh$D ql6qtBZ&R\.'qBqL&qihh VXYY١GV=T*걺GF=666RF.zlhhGBHrzTvyԣFУL&kbP ԣF!;vk…IIq3gҒ'+44o;3'o!)!gtt4VtqJR8g QSYYxM%Hq=ވk62I89ꊵjժZV0G qS]]O5 ^6¸bT OOO Pp$¬A0G qR44Xofb؍fbk:dȫlʁ+h?:t(d2 !nt:/_>4XOh:>VyM‚AG>-J3=Bggg|Ű:dff+??BCt:\~} i}ѓ¸b0L:i^|E0b@TzkŊVVV !zj5~Ǒ-+ x~f8Պcƌb%&&WWW !jsx^`h(^]k62I˗TT9bJ̝;wifs&+D``M^H֭s7J .|4X ⍺:a\1D$ ]z(pLW{X7oX {jHIIk8a/fuuuQCǟ !J f'4`-^8y@`t5 ֐!C8V ٌ^ǚ/˅q=?[ V2%88qqq;xMdɒ5QYhk(N^k.܀3 ; ŋ]:@88;RW^Z~c"h/+\|Y@+'/w"fS!qwCIڟy}{a?#2N0=|ЌAu|t{@7pWUz?$%.z?C/7~ww뽘 , f\a*8 ś@QxdN?'AsEKs;~G^ ƀvݨ탧M4?m%*.}Y~': տF屳;Xf9993Aȶ`?<͆J70E ]9py++RPOmLٰ޷)>=0+~stge *䥗P_IkHjoy?47u>Akv(`#s(pwދWT0Fn"G`srJ/,Z~ؗ_~YW]l(N݆K_m+[S=.]IϨU=>=CN <==MοsoŃ0ex~h/F7pĄ1?> bDH}O+ļ0 n^maOoN<U$n^׾*wc۝cƍf d߾}FEEu|]H)q5%bm[o3:}cճ:-3i LzA5T*$22a/,[;v^R)~׬@Gk,777c2{@ ===m °V 0o޼弦2~l5׷4X6lmɰV J%0V p@-F +`t @-F +`t @-Fi%KbC!^:-ѣ0=0fΜ~;٢-`Ν^q2^@qoxMnݺl ^r95 Je2OH$pss{`m޼ð0a\1DOPP6m[)v3^j5f|a/fٟ4X- { CNNF^`$+@ĉli2^,j)CDFFO5 Vxxx-xa/F۷o4Xyyyo"Ҫ@=P ; ɓkZy2}3Ψ?{ʬ`tZVA$  6pGz=!Jh!( P(!hZ"! qX,pGHSS8B8BDTBQդBN#F qfq1O.W|5&H```FFFFK>9rdJJJJ@ii/HKKs:z&NիI&ʺoZpCvrt)O?%'';ve˖O0aʕ+ZFF~_OOٳ"Hvo߾M&eqҥ>Z}׿tϞ=ŋwn߾ݭbSHHHzFFF2z跖.]sĉǏfO8q=SRRʾ;vT[<>W۷O>oڵݻwیF;2/۶mh'M4(J??//E.`kIIIٴiӮ!D' et 7IENDB`patsy-0.5.2/doc/figures/redundancy-ar-arbr.png000066400000000000000000000151321412400214200212450ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{\Te?3 a``(fZ&Q\]˵} 2*dJ01/-&uf1WCY2>ssf>g<|XE@iخ6@p?f͚ŧ(jzTTԹ+V\5k[999/s8[^^ޱ%K2 Rs駟Bϟ??*..8((1cƍe˖M;wn"ծ_$%%%fq7mt:==}Rcǎ6lJ4iRR ()):tk>}~a|~uNNW)))s //oX,gZ*A"Λ7ǏLd***9`ӦMeB"++kܹsy{{krsswΰX,[n=fggg<[UU0a„sKJJ<@||z\z[n0o޼x5..nP(۷o.Zw]I{LNN^iݹxL&y䑲{ٳx֭۝2jr6o޼I,Oj1۷o_M{|7]|9QFD}͎.XEJb r!P(lFDDz=z="""`XV! f!!r!QWWOOOCRf!,, :QQQhhhFH$fRDHHpEt:ATAb@(FBj`P Z Bf!88F  l^Gpp0v;4  R nyZh4JB@@|}}!`Zˣ|~fw1d9rSNPXO]t金N&-[ E jͷصk-233رUPvu }A`FPPbbb˘8[ހ/&NxYaZZXX@*2 ֭Ӛ),b\xK|pLO5 R[[9sњ),N'X,qEp{WGk w+Jk֭[^w+ۣT*ºu),BZ\Z3%?s/aaaؼyFZ3u1lv^BS*Ԗ`yMbњucǎETTk1o3+Nr.W, =F#T*Փf +11_qjJ`61tH2xB >cڦSŽ;VҚ),6m'tǰ".v5_`ϫo2XhĖ-[b91VQQX\:gn  C@]00eDիiXO=/AAAq 0[h.`f}"]g=´?Y3Kѿ$}~ϙAD"&MĘc kʔ)R___׸rs]?fjm75;^#rd;1jkkgϞfzz/' aC\88>p|a0JO\Wt:.5SXe§Q}u?j43@ػ_WX,fog k񩩩 qD,߻уE&!333cSXnw%p|LaÆ\c Dvv״fzիWjqEp{ Bry_Wbጠ5SX6m'C.Byyyhƍ]둖LaUWW!XbX`0Df kԩ߉D":!<<۶m[BYV׸"=zVFkJJJT*k\^D2ґ#GU+~F閫m+eLa 4Hg]`<7W gf3: v- kΝdHr!JQYY91V~|׸"=/5c-\5Ɓƒ3sSXϟtR<3(jzsխ\.GQQѐD-°0 pcn-@k,z>}:BA=L&ir\2t v7@Nsm}~5  <?К4Fn#0k֬ Z3e6l6k\.˴&A  ''+Z3=VNNNWT`0WpIlIG E^^6Z3߷5nO}}=,Y2La]paFq+c6a05  Ϧ5SX>: H5SXJN Kk)@y}HkFQWbi:VbMMk\Z$ZB  La-\W=~7\RswWT?Gfs_`l0[qN k/wa5qN 6FPP5o]Ô_].+>_V3Lo~5%i=cvv"֖.wfҺW|'f]yc'01f5Y(8 Z$?yc' k{[|@ h+7J}?\?!\Lr20);2u}f nƐg1To>5Ea۩5{.| Hsm(PN:vk㢪/`j6")N:s_0<"?::m{uڈCWB/lmWwb7'!^/&u%O۬@AflThT6g)[>፯G=J`(]۵` kڵn??Yy9&<6ayQᲺՁoa9\Q_5%K:GL`H7ڜ!0~=VCQxLﶇ&fk#3އhxO[NxZ4P&zu8,#8s/‡8n7MA㫋;Qq?O}vן`)eX;c||8"yח py[k$>5,8N,+Dt);}H =Ƌ#V`rvHn"M͆'ё>?*6u6 v+j۵`ٸq~y=m=jw`U^EC zPۿDݡ${?܄ù=?_8whjUw ` m2d6Lܪ㴏;zl ́/uF)LFp=tWُQTʚ(,:@ !t+>ė>0(.!O;,P>)|xR;2들xyx3kͷ{Vgqվ%a]Pfy8h[{h mDDĺ.9x fx9ruX`!8HˡH$wu/3j4uo(+  fz+`q~w8k2bi+7aHسKeG;i~.*<1QGXQ,?vl wi(jnT' tHOOH^ LxX`Rqac4ٚPt vB8;eeAtZ_퇿4G}|{)'Y18wN_;/6'}ۣ FF㉐۬;1oo$G7 >t+_وOw~HѠ*@>Т4hЙ،eB}}iٝ7ԑFљ=ޱڭ };Lo-%>wL|9\C'Q:lqc\WJ kR\e +##cX,vf.wZb^1x4?RFJ%%%SzWzζ 9^^^Ak @jjje˖Қ),st UK  8̞=yejm'մn pM!P(2$@HG @N  8 @p $@H)S :@ p*X": XpD2KBPWG `GiSD p\L6tiii px< Z3Cڵk>+p>,fzDZzp t T*#;N Á731qϊK ܍u@^ tJf IXVX,fJȖߒ]D"n$D1U^^t/BK: 8yd,]Z0#22Yf +<<`g%L& 5SXw~LI.-k Ä Ak  Ao&S?,쵰OQڵkn$ ((LFj(J.Sr(R)EQ)DBYVH$Tcc#LI$`0PEQh4EQT]]P(((JEPEQZH$n(DB555QH$dj㱦P(h0hZ{ģba<6440Z-QR1Q*2r9QV3u:d21Q"8xlnnv(Hwy'+W>b,$$dUvv͋//Yzyy )gƏ?gʕ?}weIIIs</رcٳN[VWW}k/^XZQQe̘1-[vtϸqWX:v7׬Y#9x>66͏>Taa,[PP-JYfMKv/IKK (//?0/==V\\|nogff*9HJJ{o1+--=k.VFٺuրmqqqK/^,;f̘K..---7n;+V08p?BdZv/ ]f͚F״'O~0'==?V?~zw<.Y$1c,ŞewIENDB`patsy-0.5.2/doc/figures/redundancy-ar.png000066400000000000000000000105721412400214200203240ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx}Pw7𐐄@"+OUk:2Z،:nׇ~dwwӋ-&joo_9}OΝ{^3VUUMoo_YQ*EEEg6nkf\vzP训=o^3N:>z-vMM͑~^,w8MMMUl^x'k9seee[ZZVi4nf9ݐdH$D0LJ׋)Srraʔ)|ZP*h\.H$^D"L&C?B! , 233t:vV100ݎ,fddd 11_5N'b1f3 Jap\|P*Rr9aZ!J XTXV$$$ -- @ PTzp\P(nGZZB!f3RSSa!ˑTt:'1112,$J. 3gg}dʕ'ZX֭[od46~VH%SBX'_Vminn^ 333Ad~(dL`(,,<[X۶mXAŋ_fgSo9}/EȽѰ3H\\?H̓H$Pӝ'yd2agcǎGMB`믿&;s t*}>?HKLLX,ggna՝'y {;s g.T$fTVV=;s  (~hNO琟L444ggna={61ίja4u"~.)QZ 1x`Xf3.\;JOkkE̵@]Μ} AQTQV;s +>>~N֣ `q?~?߉e{p/xr8uV;sGG}t~R=^o{|AD2/W{cC AYYY(//q kLJJ ?b~)Μ}~DLHRHJC|w#wG(e"C1{IRG_WhS=8U`C{]_ŌޯQgXp…^~cXj[",FyKB7xHuedd8;sG,FsHs!g6 ;/B! O(r`'U S)ߞ`0Eu/s`0رcؙ;bIR[b?B$Ixx=:'Yd= ~$9._m;fԥ!//j;1ܼu}HOYPTw#9Yyڃ}Zjj*4 wy;l2wÆ P*$qy/BËB~;ur!j555-ƽ|ɣ P(3wڷoi~"AZZt:3wĪzp<Ʉkr e4x<"1/..R[X.Rfgna߿X$\6 6mZºyLzEx-[X+W|X49++ !㧇a\zyc_d#MRᩧ#DqqqJ#֭[W?7U1hmm-`gnaeffD*B~X%vW+oEb_0fS+ =@$Bz=6o޼?K&aH$.MDR6~ Hb!;s p3HI؄B!D"wAm$BB* aXPYY=n HDvn+d ''pK&!t+2cO3:;;g[V~R7V Vx:YdBB* xP]]= HD3@LFVV,YrEd2~?L&Shmd6łoީ@LX@;;Ih@Ν˟7o-`kƍ4G^%p=idP՗ٙBI߼oݺcD¦jժлt^8c* ׇ,fgna%''F$\T @ HdaKy|ļd2i+$D$L&*++3DȢT @ HdQ * QA$*@D_~1m$\(J}JJ ?H̓((($4*G$\?T @H$ Je;VH")$tUUt ׄf9ghhT$ $&&r׶VH")t+ܿLA5a͛7g_$\~s@DRVxҥ _A@ŋ%P.* Q1loBBHLÇbgna9rd=* HTdffbѢEٙ HDfqyU pmA0 mld{͖n[8v0 ^/0x<ap|Gm4 kZa0|aA a`xx `hh($`2f.2z<"qxx888edt6 `kf3ޢp2z.d2l6. 2a22f ɓ'g5TWW/UWo~M.H$ :nOhhh8{r8MMM>0̣+V8ݝUZRRrNR ~ui4VBFF-NeUU3[[[۶{ҁO=ٽ{G|AnOOϬEv mmmg̘q{2.NMM5e,Ilhh8gϞ'VCMMM566޺uxŊ'{{{%sKJJ溏=0xKKgVYYYhkk_XXx^s}ǎZbXlݻwoN+̸LPt_jBpx,ǣ8a~}ʕ9eeeg%IufX,y،K,9e2Dsۋ.7$4!CoHT-(IxIENDB`patsy-0.5.2/doc/figures/redundancy-arb.png000066400000000000000000000126601412400214200204660ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<-IDATx}\S!!Bxu*2utr٘s֋enV^ٺX((0D >!TiB;uR'B-]Z-Җ#Sсbo!׋/眜EQ GcbxbXt)"##ܼyK ZRRReٶ3iiiӌFcXQQQyVVք։'Ο?W_M 쮨x6**E}q"""k_&'''vQg|wu9###QTN...޲sΘׯϙ3HTWWW'M<<&&={:r󙙙XlY:o[jjr(,,'EEE9[nH$1+VxܹsQcccKMDGGp HԐ}"99y5kY,{{.))4ouuuĉ+l۶m+VQ[[;"pe]vɒ%[SRRV{yy [O&I4{M6}lٲt]~~~ijjr޽{.XL///BT*l6@h4"22j fRDPP8^ ^. R oooCa0`X ىD"@! , j1b \.V @NPtvv"88fBvZ^^^P*RVZJ?||| ڣ^j}9|~f3٣`Ipԩ%%%3X+W\aÆ7"""|S*繺ӧ>ſn|r]ݿ('pu+-Wx 00ƍ;I Vffi|||%]3 ׯ_z_ K*"??_L`tE=zf6999\1|5zצ_5 b۱|t ^Y,tE=.+kf ? sMW رc{t V~~O k"ܞRDjjbf 1dvV՗'9+$$w~:sLlvMWtHOO]3uj5]n* f+!!lppk"^dd$53X,NNCс8f7r5]nO͛'|>005]n/44s)kfΝ㚮gZ!9R5]nOR… < LE YbCaAw #׿Pa*T].|Qr˃^ˊYuػK'tv]9L撦2/Fr.ѣ(|pGJ  r-uurԴaq.Ha񒉰Z)z >#GkfjBkzU^Nň dhlTdr㞞zJqFcr贃g,53Xc8ьH?X,Ǟ {g!")){ơC_#e>4s+ '7^^,|X-Zg]___tM]=便(WGy}'rŧ(-o>[}\;%@xp< *N4@q#ּ:١@@@F̦0//o^wiAg8~CE{eD&G|by5NK&x3PM@F `XQF,z߃gJphv@6ݻms˹u: Gt#< woh4fh>(<>f0eJHG#2--Ftt8esfo5`ڵ|`{#m}obN⇨DT7=qk]o0vYӓ'k6էup/<#$#Əw(,,G`^`\C?Л;Odף^ ~ۦ9uSky3sl6k::>DAsq:Fܤ/\SVt#G#mo7y|`~<2.mя D1Cf}l6 a_yfQtML |~v_JO~e_Ull({J8& Pp53X#X&{eq|wmmq .^tyպK?{fmʨC~l6 y O}ϼv {328{TUU=ѽl^?om`Ӧ)00~> ??քnPY-mA/?z?֮{IOa|{x# h~ct;G=@E?AqQpi./ب8>ϷAމU&9E(s1ej6֋02QQTU f|_wٴ:Jxx8VX]{Pݸqcǎb 1ab5 .9ʊf|ǔ^{/|Dq|6e8hժU}0^is@/v|҂W=6CE3@ 2? o=6 pID"Ɩ5 ݮO31cb!009H S)H$@p  @8  NA S) XrdSH Հ" '^D&CF(J}fO~N$r%#{0@<%NܹjqM0󀀀/E1`'Ce٠T*g!J%oH1Tw0c%b8p8Immmسgϫt ֈ#l6K} LRvvvkZ" @8E[[-[N`f(bxoN}? @8B@jjrf Gk"#?L w0UTT45]nO --G9@xp$@p  @8  f ֔)S4ƈGbs c @8ĝ`A) Xzu"C5`H$W0@tttM… q k @ սbx<D"Q]!|>O5ڶm3b P*]]]p{l6̚t}`E)0@ r1TVy$$  "44{I`vww+  pҥPbHɦkf̞p{& %%%?kfJKK%$N-fNBK@ đx=nFQD"z=EQ%J)ZMQErJ.SEQjmm( %H(JuuuQzzz(DBF(J"PZ(dB(T*FQEt:J"Pv$ MY,J"P&_mmmL Q2=ƻhZңbazdztL*Q& VG\Vz=ӣdbzRO===}zH$ŋyܲe wU[,CAAA[srrzꎸ8qZZڷuuuoǧfddb֬Y˷lrӵgs^^+++x<^ʙ3g+++7dljjʍM_n]kCCÞ3fݸq՚3gl޼̙3&$$,͕|Gf544-))nmm k׮K[~]]/g͚rVVԩSٳg'\~~~+jjjN9rSӽjݻ׿i_lluօ6441cƪ 6x̜9s͛Mw8~>\W^^n5L^ג\accӧ~zK.}<##駟6Κ5륡3Vb[=>l߾s/=zn26*..l'O̞=EGzŏ9)rpB /p<(rړS?)mzǑIENDB`patsy-0.5.2/doc/figures/redundancy-arbr.png000066400000000000000000000136071412400214200206520ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{\TƟ ; 4/IFꁬTO&TxL$@ff'ߨ"EM=!Kh> 0!{p]߿{gϵu=bŊNodz-Z@ դO7L۶m;5QV}w?g=>㏇73&&.++@!2l80DB DRBJD*BQ*D$Bh4ab6N# @ 0X,0 QՄBb1Q(Br9H$Bj5aBHkk+attt@!zfգH$գJmr(DEE]`5 )$cYfJr#s#x٬ KфFǸ8=V{s(,**jr,[l*ºv-,X,L&VsCP(u^r=cǎ 987jWjN<9ITP(1£G:uIwww8==B =Jkk+ bYVyytTWGVcYֽS``c\Qp<YܹsqEqzL&d2yR$9Q(Nd5WX1(NOpp0Ǝ[jzbf3?>G tӓbQXO?4\aQ#&&t(ؓWW^(܈D",\iVse4=,c\Qooo]e5 )P~qja(΍L&CZZc K*WsѡbO8 t鱶l2X[QTx\a544X[1hmmf57Ԍ0ai>J? ]m2hZI 2AP8j`&\aM4:881(NOTT222 X֨QnnnqEqz ><\aڵk}b+JgϞMa5WXÇ?W'44?p k K.w[@!{<t.Zس 2,Yʕ+w7Ipl) +)T*Eyy_ `ޚB \s1(OOƠ7gV]DXzu6)6#ߎD _?j @ YZsRl ^^^] W'((7n4@b -[64@bf3:::yGCm۶ma5 ؅9 0\a%'';441(NOtt4vܹ4@b:zNnK(Cg@ϼB @O`5 Dj @ 555ϰPBg@籲8e0Xnz˗ϔH$qEqzb1JKK+,ooo }Şb+~~~:tuU;e0  +;;NcDVse0fc\Q///_d57nڴc,Q(((\UPP0F8 yV( G8ePIlںuѽoAChh(a5WX;v츳1(NOΝW`0ez珆;Ď;6+,XAcQl K @"##ꫯj&Nrwww+c4qz+1(N  !!!HHHj.]:ygd'!^ҟ`&fLz+oh%NKB[sqqA``%K ti1)_!~[˗bޤü3v\yi _iG}clVle@kwqcxXx%(961&dx Ŝy(z]+<VND r1^$.^LJ'gwB:ZPrjƄM½ >g2>cbi8.V>oہܗhLImuW"'gw#ݣqLN/ _]; ^ߑ e|cD-`&=o}~.<<Dm0QPs> s_L'gw}`9 /~zv*,,tNIk7p_DVy/EYt{%9+ȯ̰/7j3D]GfY{@mTehP`cɑgqK܍)C l7t}77l7~ jyGv r r-3]WѡnzvFSZ5bn5 >§s#J'|q'nxN`nP\6}'zb@φXݯG'B_VX]\61>-ܧy37 #Q?6: >0YL2} e˖wF>^׫rGZ Y[!P- m-{؜1<קbsocQѹ ¸˓bDNboxe Ny\7Y3{WXEEE#.\__[2Gps*B,(Z_Jnƅ ;R2fn=8룣uuw#;V!tr\\[_m?:zk4Fyyy7oޜt),6@ !4F§øX4> #_C ~>)~z'Iqw}VT]qվ+gb[Psj[ot }022nxc9rK>a4C <"Njfg}X$v3mb{qpuY~g@uuuh-fL)"Uݵf_70?1Q_XENCL̸+M ^a#Nh=Ճ*@YIwĢ܎X-_G.a  &ȐM:.٥{L1w#3`kr ĭBli=rRT ۵@.e5GN:xHH]M{bbC}q/]Wf %6pV3_wNiw/'UhmbNKpvb!\U_^#ގa }jݽag@0YLV@klƜ0!r]Wg@ pqqu#gQ/XݪvG26>beU=nw؃w#Dl(8x)#Xs1>^掜x8tn^ ƄMĶYa3qFv#'Yo"!^-2 Ønw3ֆM6qc>cOf8l&bF/1KLJz{7`ڝ)z ^a E.}V̿[EͨOc5،+X*#xVLzdoq,:Č~hGg7b{?o=cV? 3@뱞{_˃~S>e . $|/&cxH#w<W`&iP{3?frVNNΓYYYN?~{A!CJ37 @7^._|s:@`zrSoB􏻻;㯲PBSS~+,>)Qg@^^^c,Q4@2 455aѢEۭ]&S7=)􄟟_} he@dHOO_j @h.ʀ@(  4@2 ʀ@(  4@2 ʀ=@ tzX  Xtl:Rlc\Qu]bu O8e0兗^ztܙ cQb3!!!u[ɕByyy\~ ˣ(1dg k˖-##Bؼy.Vw-[Q(Xl|Vs0EM 5  )۷s Q\a| ҕ>~x:)Vp5(#** 6lc5WXWwf^AӡdڳgctHt 0(!,, ??YtA8>t? *"baF!"R$"JT*%RP($j 0d2^O!ztttaHKK !aHss3!DBd2!BA!j0 X,0 Cۉh$ NcSSQ&q9---z4Lvh49mmmGZyT(GDbGPyJGRyh4GNyloo#0V;::<2 C,XP֓ڵk|h4~Ɔ ~=Ĭ+'%%ggg(5kk~ꔔ|u:@vرKKKk4Ud{!?!!a+uuuoO6mUWUUΘ1#-77wر+/3ǿ{(++3uuu{KJJ¢ .JLL=jvvYn:#Gd)))o޼¾} >>>VUU޽{Z~y}IHHXbŊ𺺺ӦM{mʕCf̘ 77Wc\\_?ڲ2N[uOKJJ466n :{GSN{ff?uRRl~{v֬Yl񘑑\WWŴi^rMNN~mqqq---޻wEӭ ܹV=>|X,=~8{nWD~2!nmA #A4IENDB`patsy-0.5.2/doc/figures/redundancy-arrow.png000066400000000000000000000015671412400214200210600ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx1WlX 8w pl  {0c6`yhKH0]GTUcW{i¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT ahqwZ1~c0z2{Eca\.6{fZ\!syɖ$~q{I~Nm3.M<ɯIͶa_+z6{<+W}u[wۻͷ~TX :G:Z̩<BNa-^Nda̟)~Y}چv4Ƹ;g9E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXT aQ!,*E¢BXTVIENDB`patsy-0.5.2/doc/figures/redundancy-b.png000066400000000000000000000105031412400214200201350ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATx{PH$$!W G*՞ʎڅvzj2ўԭ7쑦i`5dwl̴kwZzzVnSE/Bȍm̺Mȉg$fBіĠXjP(h4kjjjJl˥8R՟;wNWZ^^- ]]]/Nzɓ'_[o-2YfnVV_՞ݰaCϋ#++W\r/<Z[[O)NsQ.]zիyeeegRѣszJedP? fsçz=p8X,<@(`0|>x^H$ t@ L '', p8 |>8ND" | b"0L:u*dggh>\.F#X,`X@*အ!dvvL&@ v$ dffl@l6 p\`2`X,p r\~XVD0nܸ_L`l6H$8z:l,`! <x<FP\.3B(b L&`pԩ鯾ꕟ,ڵkVUUW:"%+I@zT*UMggg3=? bq?͆b10u &>' "5ӿwԤBib… L/;jMM* h4EL/VVVV0###5Pr fzM,&J{|>n=5Ӌu#5PL/ԤBi/;;8j&J{"z^]v=xR = ׯfzt:L\,9L/Vmm;(CKKIj8&J{N6m4źtKU R^l6[5Ӌ5o޼7HQ tvv~BbeddD#/ :v?(^NnܸQBb=)R X`W!}ŋp8 M+֖-[HM*l6?ijKP|T(b())PTxIɄ)S8^f95PΣfz\d&J{yyyPTTKbm۶P(LM*rssATS3X7n\fR =+VXLbAHjRp@$ݢfzv!JS _jRL&͋^!:eD{fzX(^P[[[Jb]~B zz> Q 3jH$T(nPTӨ^vBn7L/SO=u\$&J{rJaj72? 'NзӋcxv~WxIR8Mb[++##\.Xk׮݀wxfZ*^gXI^͛7KM*X,w Ш0 qK322D"IM*@%dVN<ŋd͎žyOAAAjR()l6Dc(H"HkX ()@%X  PR`J ,@I()R i1 6lذb/= ;y=bX,;bAPD_A(\.룯y{m417.Ɩ?F`Ÿo߆v;ǃ'¦3ǣf!///D !@bAIaoo/,si4@&N;wv֡!ˢEK/A-Y_PFӏ?&=b '^oBOvI s>Ʉra`0O&E}oHyy0uh(G/T Z6|~Ob0@㴚\.5Y1 \zaxQ( gz0|0G&:5!&ɠҡk:&דF&q3 ͅ?߾ ? D͘gtq@Ν;'1c-{*'=+ufAݬY DX FD"U,@P (X  PR`J ,@I()@%X  PR,XfM;(DY p…_? p\PWW}-Dx<Zq?쳇D"QjR@ g(N>+O@%pkעKMv/AY to@+fˡf,@jBOOO5Ӌ%f  +;;JKK3F@!mmm^q9R ;vyGh4ʕ+Q3X@ /уb멙^}ƶb@MM<QF@]I+@cHZےZ Ʈb-HM*~^ @/V"Q|XU&)5Pڻ@@,@I@`ݺuoBfNN <* PRCee)jI@ff&|CCt ! f|D)5227!@իWy&!:\.B!vB!ffB!DBqDӑp8LN#CCC$ NG<!NG!D, !FF#!It:!>t: I  :FhXx1J@ @g|tFIgltFwF^Og4tFNgt\tFOg qet? ~Qӑ7|sO+lwرsFoٲ JjllXVWaFkkkΝ;t:߿;wYdIKwQii'#G,**V*gU*bqZt۶m _KCC>}מ={^Vw|?_n7̙3˦O~_>h)WT*\KccZ] [[[OڵknC+qFҥK߹s{N0syEEEJ_R-7j۷e0{nhh(|ϴZ TRyȑ#/..pBFY$غu*j>ñ755p8r2ΌE"-Z}IRd2w3z^݌ztRyĉ}}}\n_fuvv~Be\`I>%%%=SLqn AɀW_=mc IENDB`patsy-0.5.2/doc/figures/redundancy-br.png000066400000000000000000000105531412400214200203240ustar00rootroot00000000000000PNG  IHDR<qsBIT|d pHYsGGݹtEXtSoftwarewww.inkscape.org<IDATxPSgQH{tX粶Ris(qG:6^lN{u]:R+U-Ķ$!$$yy =O|q@hE8?lڴ)_UTTڴiTڭVDVP^^ߟvС*jN_RR˗sZ|MMMׯ_;[_[ @ pn׮]f9ȑ#5MFgggNQQ925?;;577ظX\\:>>^V)ZR\p86L&먮n+)))ZBX$g،'=:3gʕm65W,Yb:ɒ^֖ג9}]7n|͸e˖QQQnV{iyn[!H$FsUP,Q?T*>f\voV֖xKӧOhS*BBd2f8NB!:l6B!FFB!VzB!0 Cq݄ax# E avB!X,B!fBaB!.0 C! ÐAz~Ԍ63f`0ܓ1 Ҍ>ftݜ2:Nqppf !d- fZftd4 hZG}4'Xblذ'zC\\w!F? u#߾wܙ V/7L%CAVfkX"h(""TH$ [lK!Oi 0rf-67o^ִ X,8+[RSRhp:%Bf6a۶mlM q OfktL\ށ#] /\xxKnΝ;sؚ6֍7YѯBh . ,K[ӥOżCFFF qr@Vg5mFTHv;tuue5m'|/ B EEEؚ6K/'<&)ic/z~R!X,pʕlM+==[DO*$xIIIKh4.\tie˺}b)B_2$h;VִNjÇ+T mlK!OOwܹl6 ^M6akX^wr0'8HLLq)D)t)zn de4׹\.~R!h-ypT"t)ܷo߯ qe٠[Zhљ$~R!T*O5m'x'::TH<={6icq?jB{{{ٳg_'dϿBYY٧. ._3 0g`m|GA$ִnݺ}Dt4lI7 o ;h&'' W# T*'Xn]7[ &#z=ر#tkrDDD`KjQctӎ%>>*++b ^oo/jgUtt4 }wO*$xP__OUWW\??Y,P(1 B^+//ѯBh )))zTdKKK޼#~op^!jX[Wpȑl}ϯtp*Јz6m? @wY}!=v[|Kqu_N2THFT*O:TH𢣣aΜ9oo&THFxHOOAgϞc8q%H"t]^a㈫<$u7 JC+|%''~Bc6m7'z<= HDhc544OO"$xɠjX:v ^|ͳ8@qx? W_}}V>}::t(dϱga\xq >yG\w@bb"?M>JJJ>fkX .dp˗MX{{{Ip)J&A^^^ [*++#"##!!!΄u 4@Hy=p""!4@/|ЄhB @RGv~@g0U,_ksB$HZN7n- >^ӟ544n 6^̓-Qd$P&hFGG LU eCAL}5X=ʍs#O^y^ܹ!/̚'/F#g?:` bZ8zafEg} {ڵQ:w.6 @lr0))pJf[Ѥ&һ? ^󂂂ѯg`捲'L8* 1#Ap8PWW]]]}!|ԭ==svf98e˖kkoݺ> n?d˔΋"8OddW5?1;yoބ]]3(xQQWq700XD*U > KX, 4 lA?-a] ~ bLfk ƅl|%e UoY| q%!77риF-6֤IQQQBv@uux(  ūٚ6kױ5mxT$  a2@P,gk &@ ЄhB4!p8@M &@ Є;@췸"dw&ONk˖-q)D\BccuڵC8qp8`׮]lM/(ĭɈG P8rjkkO5m~鎎+W|tF'2^xZ^uEE7*jՔ)SLjMMMАxrVÇ8p@VWWWڵktwwK\tŗfΜرŒ6R١R^NJJꪩ^UUB$y}ݖڼă~jP*'O:ѱ0??@KK򬬬k/NV*++RT555~rCC9$&&~_SSs]RDGGfu\w3tJɳgϦ_H$hX29rXTTtd2^reivvv9sN~x9= A&.IUIENDB`patsy-0.5.2/doc/figures/redundancy.svg000066400000000000000000000700261412400214200177370ustar00rootroot00000000000000 image/svg+xml a− (a−):(b−) Export settings: Width: 150 px Export area: Page patsy-0.5.2/doc/figures/term-containment.png000066400000000000000000000145621412400214200210570ustar00rootroot00000000000000PNG  IHDRvesBIT|d pHYss!tEXtSoftwarewww.inkscape.org<IDATxyߟD!]X*E(Ujim--R*h,wRXBC""A}N8sι9g_yݙs{gg.23JJҦ_RJ2TL($J*ɄRJ2T$ ۡy˓7bdBX%P*VI&U bdy À s3+cv2;o XNfVk>:nBQeRU_qөRJ5xx 6V6Yt|9H L[530m2 / \X؎sQ9>PnϨ̬$@UFe~e  [X E2`\=b볣B3 ) Pth{~][qX[&I$ GsEό>3R3x';gf/4(s>06 :ښ*9U=BY&_fS-(bIZލIQ׺Yfn ǒ4>(՝>P`̄wnQk4ZؿP2e:µXdu 4 4'(X+G/% e$&oPfhȍ"*V<ϴJr6BIF|M085nkEXODc%maf$ .?;Gf65:ۖ}{c5`%`m x7wO80$Л[GãN⯪ix>U28nVJݬ(bI:8^ _ ffeTxn.3)p$>h7tOMf9 = ,RpVe ) |!{V+L1\O65w2f6ΐ1U< (pu2?W 5*4w0陂<ٴϾ{3B/P 3F/Ƨ#v2r)u *4?>3c*݆GFEJW,I 㯽Q|j݇\BV,ICqӀ}JQ|-\I,RjtbI܋R8^Fx7̺>Ks* 3gjVLM77PREX>ļ=H-+$[}>lYmYiŸfvO"}+_g/HɡPHnYk=_+xKU*U9>J)gG-Kq϶6%nw-fvG"*׸`ٮZaZ*)&m9AZU yJ2F?ifz MRJ2TL($J*ɄL+tY=+$9:HM4!i$Zi/$m-ISgZWj7&0k9l:jN ;V ,tl9_ҋ{Ja?`;x#1ghq-a#m@2;>@OXʯUfR5+ɋ)p <TI~gfThf}tl+t-~߈sޞwYx6쏒JO+Jm:$\Y5 fP?":KV',PIIE-+Vȇ|)ɈWYN-)Vh*>O/Ls'\?$/^k~+Vs]Ju0jwn-IYfZbbVu.B=%LLVFEW7(b G3ah?ŊCLI*$?>[?;IZ(mp8qVh:B=%17]S 8_8iE(1_/E[$=fGJ:1I"'RPb%s`:IOIBҽx^[$5?H/i<~| h-jur~'|*L“?zJ)`K|JҨd(adw"U^az*\5"L#i ܖjqE81Gޚ]%:^//p ,}>6l̬+ܬu 7Ԁ#/+|X/ikQa#$ >df,R is| }`#3둻{tbH+0`3C"er?<9-:F$+`w3,g2!gط6Jp,m3*l3{cH9:޷ܻ :L̳oy].iEJ IĖƘEJs:N)kQ"LppX8NipJ[Q}j$mBY, /:С-V3_Es+Mw;] nv*iE~~ bѯI{'_WH>p:Rޠ0 iL%`c3{#g p Cef̈́q X~NjZœR4꘥f>V5fv2~eϵ`"}`>Um Xfsd<͒FjnG; NX Xf6Ư\/ipS2%, ,O)ܔ: _ѡbHZWaxZ֚]@谰Ч @b^-g31]);>3*L^,4ܧT*ϷX$-bfpfn2u2bdBdCl0} Z=ZA-zڽ!$ֽQgIm$=ou^5!i9p57zR>L($ZzefV|De)QgBjMk l |->?: p}PkL ?a r4cpo'?Li1(3jt99\À%XU)w~<Γ#pX*?ׁQ@Pf 9~|/~ٳѳR o9jn_E&bX_[7P,S':ޗGP㳕dK//U#iXy=`dT﫵[p6*;)ϥTٯyc3>gfֻU!t5\TX']&.i-^M\[XϚcI_R [,$\#>i4saNI K\Jpޏ5sOp0ZMb}gD6(mҜy2Ȝ:2Nx+ި:$3_R\f6=ڍU;+g I#bpIK]t+⃓Zi#_.TISjL%l.[Xm G/vޘ!2pZ46pTISj!dh>qnZs#Z$gkzw $+BޏO ^L|a%-٨@_4NLS 0krݯF=ق,D5?XqgzR]t֑4ObQQ5&,_E%5,گXy+n O,;.\C#'u')><0TX<Xui7ŊGc78pCC蠚H(io*([E|iIƄB1IHooKx]-/wgX_{cjb_~l&e4>ߴ;>5]?la>6/|J 4C\;O}?H'~z77"rLuߵ Z9RkX۫wY̦t-hj|%6*|¦SۑlS}f8!m jTMokQߔgxҹZtMPfjT:תp8'q,-`%W0]#W٥n[a4#p=\j>؏ uPU-9%3{>I;iQ5mMxh%f軁L VYn}T-Ceʧ'?{ra*)[GޏvRJ2TL($ yAEmq 9P?嫰$J*ɄRJ2TL($GEeGIENDB`patsy-0.5.2/doc/figures/term-containment.svg000066400000000000000000000232541412400214200210700ustar00rootroot00000000000000 image/svg+xml a:b a b Intercept patsy-0.5.2/doc/formulas.rst000066400000000000000000000747661412400214200160170ustar00rootroot00000000000000.. _formulas: How formulas work ================= .. currentmodule:: patsy .. ipython:: python :suppress: import numpy as np from patsy import * Now we'll describe the full nitty-gritty of how formulas are parsed and interpreted. Here's the picture you'll want to keep in mind: .. figure:: figures/formula-structure.png :align: center The pieces that make up a formula. Say we have a formula like:: y ~ a + a:b + np.log(x) This overall thing is a **formula**, and it's divided into a left-hand side, ``y``, and a right-hand side, ``a + a:b + np.log(x)``. (Sometimes you want a formula that has no left-hand side, and you can write that as ``~ x1 + x2`` or even ``x1 + x2``.) Each side contains a list of **terms** separated by ``+``; on the left there is one term, ``y``, and on the right, there are four terms: ``a`` and ``a:b`` and ``np.log(x)``, plus an invisible intercept term. And finally, each term is the interaction of zero or more **factors**. A factor is the minimal, indivisible unit that each formula is built up out of; the factors here are ``y``, ``a``, ``b``, and ``np.log(x)``. Most of these terms have only one factor -- for example, the term ``y`` is a kind of trivial interaction between the factor ``y`` and, well... and nothing. There's only one factor in that "interaction". The term ``a:b`` is an interaction between two factors, ``a`` and ``b``. And the intercept term is an interaction between *zero* factors. (This may seem odd, but it turns out that defining the zero-order interaction to produce a column of all ones is very convenient, just like it turns out to be convenient to define the `product of an empty list `_ to be ``np.prod([]) == 1``.) .. note:: In the context of Patsy, the word **factor** does *not* refer specifically to categorical data. What we call a "factor" can represent either categorical or numerical data. Think of factors like in multiplying factors together, not like in factorial design. When we want to refer to categorical data, this manual and the Patsy API use the word "categorical". To make this more concrete, here's how you could manually construct the same objects that Patsy will construct if given the above formula:: from patsy import ModelDesc, Term, EvalFactor ModelDesc([Term([EvalFactor("y")])], [Term([]), Term([EvalFactor("a")]), Term([EvalFactor("a"), EvalFactor("b")]), Term([EvalFactor("np.log(x)")]) ]) Compare to what you get from parsing the above formula:: ModelDesc.from_formula("y ~ a + a:b + np.log(x)") :class:`ModelDesc` represents an overall formula; it just takes two lists of :class:`Term` objects, representing the left-hand side and the right-hand side. And each ``Term`` object just takes a list of factor objects. In this case our factors are of type :class:`EvalFactor`, which evaluates arbitrary Python code, but in general any object that implements the factor protocol will do -- for details see :ref:`expert-model-specification`. Of course as a user you never have to actually touch :class:`ModelDesc`, :class:`Term`, or :class:`EvalFactor` objects by hand -- but it's useful to know that this lower layer exists in case you ever want to generate a formula programmatically, and to have an image in your mind of what a formula really is. .. _formulas-language: The formula language -------------------- Now let's talk about exactly how those magic formula strings are processed. Since a term is nothing but a set of factors, and a model is nothing but two sets of terms, you can write any Patsy model just using ``:`` to create interactions, ``+`` to join terms together into a set, and ``~`` to separate the left-hand side from the right-hand side. But for convenience, Patsy also understands a number of other short-hand operators, and evaluates them all using a `full-fledged parser `_ complete with robust error reporting, etc. Operators ^^^^^^^^^ The built-in binary operators, ordered by precedence, are: ============ ======================================= ``~`` lowest precedence (binds most loosely) ``+``, ``-`` ``*``, ``/`` ``:`` ``**`` highest precedence (binds most tightly) ============ ======================================= Of course, you can override the order of operations using parentheses. All operations are left-associative (so ``a - b - c`` means the same as ``(a - b) - c``, not ``a - (b - c)``). Their meanings are as follows: ``~`` Separates the left-hand side and right-hand side of a formula. Optional. If not present, then the formula is considered to contain a right-hand side only. ``+`` Takes the set of terms given on the left and the set of terms given on the right, and returns a set of terms that combines both (i.e., it computes a set union). Note that this means that ``a + a`` is just ``a``. ``-`` Takes the set of terms given on the left and removes any terms which are given on the right (i.e., it computes a set difference). ``*`` ``a * b`` is short-hand for ``a + b + a:b``, and is useful for the common case of wanting to include all interactions between a set of variables while partitioning their variance between lower- and higher-order interactions. Standard ANOVA models are of the form ``a * b * c * ...``. ``/`` This one is a bit quirky. ``a / b`` is shorthand for ``a + a:b``, and is intended to be useful in cases where you want to fit a standard sort of ANOVA model, but ``b`` is nested within ``a``, so ``a*b`` doesn't make sense. So far so good. Also, if you have multiple terms on the right, then the obvious thing happens: ``a / (b + c)`` is equivalent to ``a + a:b + a:c`` (``/`` is rightward `distributive `_ over ``+``). *But,* if you have multiple terms on the left, then there is a surprising special case: ``(a + b)/c`` is equivalent to ``a + b + a:b:c`` (and note that this is different from what you'd get out of ``a/c + b/c`` -- ``/`` is *not* leftward distributive over ``+``). Again, this is motivated by the idea of using this for nested variables. It doesn't make sense for ``c`` to be nested within both ``a`` and ``b`` separately, unless ``b`` is itself nested in ``a`` -- but if that were true, then you'd write ``a/b/c`` instead. So if we see ``(a + b)/c``, we decide that ``a`` and ``b`` must be independent factors, but that ``c`` is nested within each *combination* of levels of ``a`` and ``b``, which is what ``a:b:c`` gives us. If this is confusing, then my apologies... S has been working this way for >20 years, so it's a bit late to change it now. ``:`` This takes two sets of terms, and computes the interaction between each term on the left and each term on the right. So, for example, ``(a + b):(c + d)`` is the same as ``a:c + a:d + b:c + b:d``. Calculating the interaction between two terms is also a kind of set union operation, but ``:`` takes the union of factors *within* two terms, while ``+`` takes the union of two sets of terms. Note that this means that ``a:a`` is just ``a``, and ``(a:b):(a:c)`` is the same as ``a:b:c``. ``**`` This takes a set of terms on the left, and an integer *n* on the right, and computes the ``*`` of that set of terms with itself *n* times. This is useful if you want to compute all interactions up to order *n*, but no further. Example:: (a + b + c + d) ** 3 is expanded to:: (a + b + c + d) * (a + b + c + d) * (a + b + c + d) Note that an equivalent way to write this particular expression would be:: a*b*c*d - a:b:c:d (*Exercise:* why?) The parser also understands unary ``+`` and ``-``, though they aren't very useful. ``+`` is a no-op, and ``-`` can only be used in the forms ``-1`` (which means the same as ``0``) and ``-0`` (which means the same as ``1``). See :ref:`below ` for more on ``0`` and ``1``. Factors and terms ^^^^^^^^^^^^^^^^^ So that explains how the operators work -- the verbs in the formula language -- but what about the nouns, the terms like ``y`` and ``np.log(x)`` that are actually picking out bits of your data? Individual factors are allowed to be arbitrary Python code. Scanning arbitrary Python code can be quite complicated, but Patsy uses the official Python tokenizer that's built into the standard library, so it's able to do it robustly. There is still a bit of a problem, though, since Patsy operators like ``+`` are also valid Python operators. When we see a ``+``, how do we know which interpretation to use? The answer is that a Python factor begins whenever we see a token which * is not a Patsy operator listed in that table up above, and * is not a parenthesis And then the factor ends whenever we see a token which * is a Patsy operator listed in that table up above, and * it not enclosed in any kind of parentheses (where "any kind" includes regular, square, and curly bracket varieties) This will be clearer with an example:: f(x1 + x2) + x3 First, we see ``f``, which is not an operator or a parenthesis, so we know this string begins with a Python-defined factor. Then we keep reading from there. The next Patsy operator we see is the ``+`` in ``x1 + x2``... but since at this point we have seen the opening ``(`` but not the closing ``)``, we know that we're inside parentheses and ignore it. Eventually we come to the second ``+``, and by this time we have seen the closing parentheses, so we know that this is the end of the first factor and we interpret the ``+`` as a Patsy operator. One side-effect of this is that if you do want to perform some arithmetic inside your formula object, you can hide it from the Patsy parser by putting it inside a function call. To make this more convenient, Patsy provides a builtin function :func:`I` that simply returns its input. (Hence the name: it's the Identity function.) This means you can use ``I(x1 + x2)`` inside a formula to represent the sum of ``x1`` and ``x2``. .. note:: The above plays a bit fast-and-loose with the distinction between factors and terms. If you want to get more technical, then given something like ``a:b``, what's happening is first that we create a factor ``a`` and then we package it up into a single-factor term. And then we create a factor ``b``, and we package it up into a single-factor term. And then we evaluate the ``:``, and compute the interaction between these two terms. When we encounter embedded Python code, it's always converted straight to a single-factor term before doing anything else. .. _intercept-handling: Intercept handling ^^^^^^^^^^^^^^^^^^ There are two special things about how intercept terms are handled inside the formula parser. First, since an intercept term is an interaction of zero factors, we have no way to write it down using the parts of the language described so far. Therefore, as a special case, the string ``1`` is taken to represent the intercept term. Second, since intercept terms are almost always wanted and remembering to include them by hand all the time is quite tedious, they are always included by default in the right-hand side of any formula. The way this is implemented is exactly as if there is an invisible ``1 +`` inserted at the beginning of every right-hand side. Of course, if you don't want an intercept, you can remove it again just like any other unwanted term, using the ``-`` operator. The only thing that's special about the ``1 +`` is that it's invisible; otherwise it acts just like any other term. This formula has an intercept:: y ~ x because it is processed like ``y ~ 1 + x``. This formula does not have an intercept:: y ~ x - 1 because it is processed like ``y ~ 1 + x - 1``. Of course if you want to be really explicit you can mention the intercept explicitly:: y ~ 1 + x Once the invisible ``1 +`` is added, this formula is processed like ``y ~ 1 + 1 + x``, and as you'll recall from the definition of ``+`` above, adding the same term twice produces the same result as adding it just once. For compatibility with S and R, we also allow the magic terms ``0`` and ``-1`` which represent the "anti-intercept". Adding one of these terms has exactly the same effect as subtracting the intercept term, and subtracting one of these terms has exactly the same effect as adding the intercept term. That means that all of these formulas are equivalent:: y ~ x - 1 y ~ x + -1 y ~ -1 + x y ~ 0 + x y ~ x - (-0) Explore! ^^^^^^^^ The formula language is actually fairly simple once you get the hang of it, but if you're ever in doubt as to what some construction means, you can always ask Patsy how it expands. Here's some code to try out at the Python prompt to get started:: from patsy import ModelDesc ModelDesc.from_formula("y ~ x") ModelDesc.from_formula("y ~ x + x + x") ModelDesc.from_formula("y ~ -1 + x") ModelDesc.from_formula("~ -1") ModelDesc.from_formula("y ~ a:b") ModelDesc.from_formula("y ~ a*b") ModelDesc.from_formula("y ~ (a + b + c + d) ** 2") ModelDesc.from_formula("y ~ (a + b)/(c + d)") ModelDesc.from_formula("np.log(x1 + x2) " "+ (x + {6: x3, 8 + 1: x4}[3 * i])") Sometimes it might be easier to read if you put the processed formula back into formula notation using :meth:`ModelDesc.describe`:: desc = ModelDesc.from_formula("y ~ (a + b + c + d) ** 2") desc.describe() .. _formulas-building: From terms to matrices ---------------------- So at this point, you hopefully understand how a string is parsed into the :class:`ModelDesc` structure shown in the figure at the top of this page. And if you like you can also produce such structures directly without going through the formula parser (see :ref:`expert-model-specification`). But these terms and factors objects are still a fairly high-level, symbolic representation of a model. Now we'll talk about how they get converted into actual matrices with numbers in. There are two core operations here. The first takes a list of :class:`Term` objects (a **termlist**) and some data, and produces a :class:`DesignMatrixBuilder`. The second takes a :class:`DesignMatrixBuilder` and some data, and produces a design matrix. In practice, these operations are implemented by :func:`design_matrix_builders` and :func:`build_design_matrices`, respectively, and each of these functions is "vectorized" to process an arbitrary number of matrices together in a single operation. But we'll ignore that for now, and just focus on what happens to a single termlist. First, each individual factor is given a chance to set up any :ref:`stateful-transforms` it may have, and then is evaluated on the data, to determine: * Whether it is categorical or numerical * If it is categorical, what levels it has * If it is numerical, how many columns it has. Next, we sort terms based on the factors they contain. This is done by dividing terms into groups based on what combination of numerical factors each one contains. The group of terms that have no numerical factors comes first, then the rest of the groups in the order they are first mentioned within the term list. Then within each group, lower-order interactions are ordered to come before higher-order interactions. (Interactions of the same order are left alone.) Example: .. ipython:: python data = demo_data("a", "b", "x1", "x2") mat = dmatrix("x1:x2 + a:b + b + x1:a:b + a + x2:a:x1", data) mat.design_info.term_names The non-numerical terms are `Intercept`, `b`, `a`, `a:b` and they come first, sorted from lower-order to higher-order. `b` comes before `a` because it did in the original formula. Next come the terms that involved `x1` and `x2` together, and `x1:x2` comes before `x2:a:x1` because it is a lower-order term. Finally comes the sole term involving `x1` without `x2`. .. note:: These ordering rules may seem a bit arbitrary, but will make more sense after our discussion of redundancy below. Basically the motivation is that terms like `b` and `a` represent overlapping vector spaces, which means that the presence of one will affect how the other is coded. So, we group to them together, to make these relationships easier to see in the final analysis. And, a term like `b` represents a sub-space of a term like `a:b`, so if you're including both terms in your model you presumably want the variance represented by `b` to be partitioned out separately from the overall `a:b` term, and for that to happen, `b` should come first in the final model. After sorting the terms, we determine appropriate coding schemes for categorical factors, as described in the next section. And that's it -- we now know exactly how to produce this design matrix, and :func:`design_matrix_builders` packages this knowledge up into a :class:`DesignMatrixBuilder` and returns it. To get the design matrix itself, we then use :func:`build_design_matrices`. .. _redundancy: Redundancy and categorical factors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here's the basic idea about how Patsy codes categorical factors: each term that's included means that we want our outcome variable to be able to vary in a certain way -- for example, the `a:b` in ``y ~ a:b`` means that we want our model to be flexible enough to assign `y` a different value for every possible combination of `a` and `b` values. So what Patsy does is build up a design matrix incrementally by working from left to right in the sorted term list, and for each term it adds just the right columns needed to make sure that the model will be flexible enough to include the kind of variation this term represents, while keeping the overall design matrix full rank. The result is that the columns associated with each term always represent the *additional* flexibility that the models gains by adding that term, on top of the terms to its left. Numerical factors are assumed not to be redundant with each other, and are always included "as is"; categorical factors and interactions might be redundant, so Patsy chooses either full-rank or reduced-rank contrast coding for each one to keep the overall design matrix at full rank. .. note:: We're only worried here about "structural redundancies", those which occur inevitably no matter what the particular values occur in your data set. If you enter two different factors `x1` and `x2`, but set them to be numerically equal, then Patsy will indeed produce a design matrix that isn't full rank. Avoiding that is your problem. Okay, now for the more detailed explanation. Each term represents a certain space of linear combinations of column vectors: * A numerical factor represents the vector space spanned by its columns. * A categorical factor represents the vector space spanned by the columns you get if you apply "dummy coding". * An interaction between two factors represents the vector space spanned by the element-wise products between vectors in the first factor's space with vectors in the second factor's space. For example, if :math:`c_{1a}` and :math:`c_{1b}` are two columns that form a basis for the vector space represented by factor :math:`f_1`, and likewise :math:`c_{2a}` and :math:`c_{2b}` are a basis for the vector space represented by :math:`f_2`, then :math:`c_{1a} * c_{2a}`, :math:`c_{1b} * c_{2a}`, :math:`c_{1a} * c_{2b}`, :math:`c_{1b}*c_{2b}` is a basis for the vector space represented by :math:`f_1:f_2`. Here the :math:`*` operator represents elementwise multiplication, like numpy ``*``. (*Exercise:* show that the choice of basis does not matter.) * The empty interaction represents the space spanned by the identity element for elementwise multiplication, i.e., the all-ones "intercept" term. So suppose that `a` is a categorical factor with two levels `a1` and `a2`, and `b` is a categorical factor with two levels `b1` and `b1`. Then: * `a` represents the space spanned by two vectors: one that has a 1 everywhere that ``a == "a1"``, and a zero everywhere else, and another that's similar but for ``a == "a2"``. (dummy coding) * `b` works similarly * and `a:b` represents the space spanned by *four* vectors: one that has a 1 everywhere that has ``a == "a1"`` and ``b == "b1"``, another that has a 1 everywhere that has ``a1 == "a2"`` and ``b == "b1"``, etc. So if you are familiar with ANOVA terminology, then these are *not* the kinds of interactions you are expecting! They represent a more fundamental idea, that when we write: y ~ a:b we mean that the value of `y` can vary depending on every possible *combination* of `a` and `b`. .. figure:: figures/term-containment.png :align: right Notice that this means that the space spanned by the intercept term is always a vector subspace of the spaces spanned by `a` and `b`, and these subspaces in turn are always subspaces of the space spanned by `a:b`. (Another way to say this is that `a` and `b` are "marginal to" `a:b`.) The diagram on the right shows these relationships graphically. This reflects the intuition that allowing `y` to depend on every combination of `a` and `b` gives you a more flexible model than allowing it to vary based on just `a` or just `b`. So what this means is that once you have `a:b` in your model, adding `a` or `b` or the intercept term won't actually give you any additional flexibility; the most they can do is to create redundancies that your linear algebra package will have to somehow detect and remove later. These two models are identical in terms of how flexible they are:: y ~ 0 + a:b y ~ 1 + a + b + a:b And, indeed, we can check that the matrices that Patsy generates for these two formulas have identical column spans: .. ipython:: python data = demo_data("a", "b", "y") mat1 = dmatrices("y ~ 0 + a:b", data)[1] mat2 = dmatrices("y ~ 1 + a + b + a:b", data)[1] np.linalg.matrix_rank(mat1) np.linalg.matrix_rank(mat2) np.linalg.matrix_rank(np.column_stack((mat1, mat2))) But, of course, their actual contents are different: .. ipython:: python mat1 mat2 This happens because Patsy is finding ways to avoid creating redundancy while coding each term. To understand how this works, it's useful to draw some pictures. Patsy has two general strategies for coding a categorical factor with :math:`n` levels. The first is to use a full-rank encoding with :math:`n` columns. Here are some pictures of this style of coding: .. container:: align-center |1| |a| |b| |a:b| .. |1| image:: figures/redundancy-1.png .. |a| image:: figures/redundancy-a.png .. |b| image:: figures/redundancy-b.png .. |a:b| image:: figures/redundancy-ab.png Obviously if we lay these images on top of each other, they'll overlap, which corresponds to their overlap when considered as vector spaces. If we try just putting them all into the same model, we get mud: .. figure:: figures/redundancy-1-a-b-ab.png :align: center Naive `1 + a + b + a:b` Patsy avoids this by using its second strategy: coding an :math:`n` level factor in :math:`n - 1` columns which, critically, do not span the intercept. We'll call this style of coding *reduced-rank*, and use notation like `a-` to refer to factors coded this way. .. note:: Each of the categorical coding schemes included in :mod:`patsy` come in both full-rank and reduced-rank flavours. If you ask for, say, :class:`Poly` coding, then this is the mechanism used to decide whether you get full- or reduced-rank :class:`Poly` coding. For coding `a` there are two options: .. container:: align-center |a| |a-| .. |a-| image:: figures/redundancy-ar.png And likewise for `b`: .. container:: align-center |b| |b-| .. |b-| image:: figures/redundancy-br.png When it comes to `a:b`, things get more interesting: it can choose whether to use a full- or reduced-rank encoding separately for each factor, leading to four choices overall: .. container:: align-center |a:b| |a-:b| |a:b-| |a-:b-| .. |a-:b| image:: figures/redundancy-arb.png .. |a:b-| image:: figures/redundancy-abr.png .. |a-:b-| image:: figures/redundancy-arbr.png So when interpreting a formula like ``1 + a + b + a:b``, Patsy's job is to pick and choose from the above pieces and then assemble them together like a jigsaw puzzle. Let's walk through the formula ``1 + a + b + a:b`` to see how this works. First it encodes the intercept: .. container:: .. image:: figures/redundancy-1.png :align: left .. ipython:: python dmatrices("y ~ 1", data)[1] Then it adds the `a` term. It has two choices, either the full-rank coding or the reduced rank `a-` coding. Using the full-rank coding would overlap with the already-existing intercept term, though, so it chooses the reduced rank coding: .. container:: .. image:: figures/redundancy-1-ar.png :align: left .. ipython:: python dmatrices("y ~ 1 + a", data)[1] The `b` term is treated similarly: .. container:: .. image:: figures/redundancy-1-ar-br.png :align: left .. ipython:: python dmatrices("y ~ 1 + a + b", data)[1] And finally, there are four options for the `a:b` term, but only one of them will fit without creating overlap: .. container:: .. image:: figures/redundancy-1-ar-br-arbr.png :align: left .. ipython:: python dmatrices("y ~ 1 + a + b + a:b", data)[1] Patsy tries to use the fewest pieces possible to cover the space. For instance, in this formula, the `a:b` term is able to fill the remaining space by using a single piece: .. container:: .. image:: figures/redundancy-1-br-arb.png :align: left .. ipython:: python dmatrices("y ~ 1 + b + a:b", data)[1] However, this is not always possible. In such cases, Patsy will assemble multiple pieces to code a single term [#R-brag]_, e.g.: .. container:: .. image:: figures/redundancy-1-br-arb-combined.png :align: left .. ipython:: python dmatrices("y ~ 1 + a:b", data)[1] Notice that the matrix entries and column names here are identical to those produced by the previous example, but the association between terms and columns shown at the bottom is different. In all of these cases, the final model spans the same space; `a:b` is included in the formula, and therefore the final matrix must fill in the full `a:b` square. By including different combinations of lower-order interactions, we can control how this overall variance is partitioned into distinct terms. *Exercise:* create the similar diagram for a formula that includes a three-way interaction, like ``1 + a + a:b + a:b:c`` or ``1 + a:b:c``. Hint: it's a cube. Then, send us your diagram for inclusion in this documentation [#shameless]_. Finally, we've so far only discussed purely categorical interactions. Bringing numerical interactions into the mix doesn't make things much more complicated. Each combination of numerical factors is considered to be distinct from all other combinations, so we divide all of our terms into groups based on which numerical factors they contain (just like we do when sorting terms, as described above), and then within each group we separately apply the algorithm described here to the categorical parts of each term. Technical details ----------------- The actual algorithm Patsy uses to produce the above coding is very simple. Within the group of terms associated with each combination of numerical factors, it works from left to right. For each term it encounters, it breaks the categorical part of the interaction down into minimal pieces, e.g. `a:b` is replaced by `1 + (a-) + (b-) + (a-):(b-)`: .. container:: align-center |a:b| |arrow| |1 a- b- a-:b-| .. |arrow| image:: figures/redundancy-arrow.png .. |1 a- b- a-:b-| image:: figures/redundancy-1-ar-br-arbr.png (Formally speaking, these "minimal pieces" consist of the set of all subsets of the original interaction.) Then, any of the minimal pieces which were used by a previous term within this group are deleted, since they are redundant: .. container:: align-center |1 a- b- a-:b-| |arrow| |a- a-:b-| .. |a- a-:b-| image:: figures/redundancy-ar-arbr.png and then we greedily recombine the pieces that are left by repeatedly merging adjacent pieces according to the rule `ANYTHING + ANYTHING : FACTOR- = ANYTHING : FACTOR`: .. container:: align-center |a- a-:b-| |arrow| |a-:b| .. *Exercise:* Prove formally that the space spanned by `ANYTHING + ANYTHING : FACTOR-` is identical to the space spanned by `ANYTHING : FACTOR`. *Exercise:* Either show that the greedy algorithm here produces optimal encodings in some sense (e.g., smallest number of pieces used), or else find a better algorithm. (Extra credit: implement your algorithm and submit a pull request [#still-shameless]_.) Is this algorithm correct? A full formal proof would be too tedious for this reference manual, but here's a sketch of the analysis. Recall that our goal is to maintain two invariants: the design matrix column space should include the space associated with each term, and should avoid "structural redundancy", i.e. it should be full rank on at least some data sets. It's easy to see the above algorithm will never "lose" columns, since the only time it eliminates a subspace is when it has previously processed that exact subspace within the same design. But will it always detect all the redundancies that are present? That is guaranteed by the following theorem: *Theorem:* Let two sets of factors, :math:`F = {f_1, \dots, f_n}` and :math:`G = {g_1, \dots, g_m}` be given, and let :math:`F = F_{\text{num}} \cup F_{\text{categ}}` be the numerical and categorical factors, respectively (and similarly for :math:`G = G_{\text{num}} \cup G_{\text{categ}}`. Then the space represented by the interaction :math:`f_1 : \cdots : f_n` has a non-trivial intersection with the space represented by the interaction :math:`g_1 : \cdots : g_m` whenever: * :math:`F_{\text{num}} = G_{\text{num}}`, and * :math:`F_{\text{categ}} \cap G_{\text{categ}} \neq \emptyset` And, furthermore, whenever this condition does not hold, then there exists some assignment of values to the factors for which the associated vector spaces have only a trivial intersection. *Exercise:* Prove it. *Exercise:* Show that given a sufficient number of rows, the set of factor assignments on which :math:`f_1 : \cdots : f_n` represents a subspace of :math:`g_1 : \cdots : g_n` without the above conditions being satisfied is actually a zero set. Corollary: Patsy's strategy of dividing into groups by numerical factors, and then comparing all subsets of the remaining categorical factors, allows it to precisely identify and avoid structural redundancies. Footnotes --------- .. [#R-brag] This is one of the places where Patsy improves on R, which produces incorrect output in this case (see :ref:`R-comparison`). .. [#shameless] Yes, I'm lazy. And shameless. .. [#still-shameless] Yes, still shameless. patsy-0.5.2/doc/index.rst000066400000000000000000000013401412400214200152500ustar00rootroot00000000000000.. patsy documentation master file, created by sphinx-quickstart on Sat Dec 12 22:10:41 2009. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. patsy - Describing statistical models in Python =================================================== Contents: .. toctree:: :maxdepth: 2 overview.rst quickstart.rst formulas.rst categorical-coding.rst stateful-transforms.rst spline-regression.rst expert-model-specification.rst library-developers.rst R-comparison.rst py2-versus-py3.rst API-reference.rst builtins-reference.rst changes.rst Indices and tables ================== * :ref:`genindex` * :ref:`search` patsy-0.5.2/doc/library-developers.rst000066400000000000000000000133351412400214200177620ustar00rootroot00000000000000.. _library-developers: Using Patsy in your library ============================== .. currentmodule:: patsy Our goal is to make Patsy the de facto standard for describing models in Python, regardless of the underlying package in use -- just as formulas are the standard interface to all R packages. Therefore we've tried to make it as easy as possible for you to build Patsy support into your libraries. Patsy is a good houseguest: * Pure Python, no compilation necessary. * Exhaustive tests (>98% statement coverage at time of writing) and documentation (you're looking at it). * No dependencies besides numpy. * Tested and supported on every version of Python since 2.5. (And 2.4 probably still works too if you really want it, it's just become too hard to keep a working 2.4 environment on the test server.) So you can be pretty confident that adding a dependency on Patsy won't create much hassle for your users. And, of course, the fundamental design is very conservative -- the formula mini-language in S was first described in Chambers and Hastie (1992), more than two decades ago. It's still in heavy use today in R, which is one of the most popular environments for statistical programming. Many of your users may already be familiar with it. So we can be pretty certain that it will hold up to real-world usage. Using the high-level interface ------------------------------ If you have a function whose signature currently looks like this:: def mymodel2(X, y, ...): ... or this:: def mymodel1(X, ...): ... then adding Patsy support is extremely easy (though of course like any other API change, you may have to deprecate the old interface, or provide two interfaces in parallel, depending on your situation). Just write something like:: def mymodel2_patsy(formula_like, data={}, ...): y, X = patsy.dmatrices(formula_like, data, 1) ... or:: def mymodel1_patsy(formula_like, data={}, ...): X = patsy.dmatrix(formula_like, data, 1) ... (See :func:`dmatrices` and :func:`dmatrix` for details.) This won't force your users to switch to formulas immediately; they can replace code that looks like this:: X, y = build_matrices_laboriously() result = mymodel2(X, y, ...) other_result = mymodel1(X, ...) with code like this:: X, y = build_matrices_laboriously() result = mymodel2((y, X), data=None, ...) other_result = mymodel1(X, data=None, ...) Of course in the long run they might want to throw away that :func:`build_matrices_laboriously` function and start using formulas, but they aren't forced to just to start using your new interface. Working with metadata ^^^^^^^^^^^^^^^^^^^^^ Once you've started using Patsy to handle formulas, you'll probably want to take advantage of the metadata that Patsy provides, so that you can display regression coefficients by name and so forth. Design matrices processed by Patsy always have a ``.design_info`` attribute which contains lots of information about the design: see :class:`DesignInfo` for details. Predictions ^^^^^^^^^^^ Another nice feature is making predictions on new data. But this requires that we can take in new data, and transform it to create a new `X` matrix. Or if we want to compute the likelihood of our model on new data, we need both new `X` and `y` matrices. This is also easily done with Patsy -- first fetch the relevant :class:`DesignInfo` objects by doing ``input_data.design_info``, and then pass them to :func:`build_design_matrices` along with the new data. Example ^^^^^^^ Here's a simplified class for doing ordinary least-squares regression, demonstrating the above techniques: .. warning:: This code has not been validated for numerical correctness. .. literalinclude:: _examples/example_lm.py And here's how it can be used: .. ipython:: python :suppress: with open("_examples/example_lm.py") as f: exec(f.read()) .. ipython:: python :okwarning: from patsy import demo_data data = demo_data("x", "y", "a") # Old and boring approach (but it still works): X = np.column_stack(([1] * len(data["y"]), data["x"])) LM((data["y"], X)) # Fancy new way: m = LM("y ~ x", data) m m.predict({"x": [10, 20, 30]}) m.loglik(data) m.loglik({"x": [10, 20, 30], "y": [-1, -2, -3]}) # Your users get support for categorical predictors for free: LM("y ~ a", data) # And variable transformations too: LM("y ~ np.log(x ** 2)", data) Other cool tricks ^^^^^^^^^^^^^^^^^ If you want to compute ANOVAs, then check out :attr:`DesignInfo.term_name_slices`, :meth:`DesignInfo.slice`. If you support linear hypothesis tests or otherwise allow your users to specify linear constraints on model parameters, consider taking advantage of :meth:`DesignInfo.linear_constraint`. Extending the formula syntax ---------------------------- The above documentation assumes that you have a relatively simple model that can be described by one or two matrices (plus whatever other arguments you take). This covers many of the most popular models, but it's definitely not sufficient for every model out there. Internally, Patsy is designed to be very flexible -- for example, it's quite straightforward to add custom operators to the formula parser, or otherwise extend the formula evaluation machinery. (Heck, it only took an hour or two to repurpose it for a totally different purpose, parsing linear constraints.) But extending Patsy in a more fundamental way then this will require just a wee bit more complicated API than just calling :func:`dmatrices`, and for this initial release, we've been busy enough getting the basics working that we haven't yet taken the time to pin down a public extension API we can support. So, if you want something fancier -- please give us a nudge, it's entirely likely we can work something out. patsy-0.5.2/doc/make.bat000066400000000000000000000056351412400214200150270ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation set SPHINXBUILD=sphinx-build set ALLSPHINXOPTS=-d _build/doctrees %SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (_build\*) do rmdir /q /s %%i del /q /s _build\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% _build/html echo. echo.Build finished. The HTML pages are in _build/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% _build/dirhtml echo. echo.Build finished. The HTML pages are in _build/dirhtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% _build/pickle echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% _build/json echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% _build/htmlhelp echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in _build/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% _build/qthelp echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in _build/qthelp, like this: echo.^> qcollectiongenerator _build\qthelp\scikitssparse.qhcp echo.To view the help file: echo.^> assistant -collectionFile _build\qthelp\scikitssparse.ghc goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% _build/latex echo. echo.Build finished; the LaTeX files are in _build/latex. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% _build/changes echo. echo.The overview file is in _build/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% _build/linkcheck echo. echo.Link check complete; look for any errors in the above output ^ or in _build/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% _build/doctest echo. echo.Testing of doctests in the sources finished, look at the ^ results in _build/doctest/output.txt. goto end ) :end patsy-0.5.2/doc/overview.rst000066400000000000000000000121141412400214200160100ustar00rootroot00000000000000Overview ======== |epigraph|_ .. |epigraph| replace:: *"It's only a model."* .. _epigraph: https://en.wikipedia.org/wiki/Patsy_%28Monty_Python%29 :mod:`patsy` is a Python package for describing statistical models (especially linear models, or models that have a linear component) and building design matrices. It is closely inspired by and compatible with the `formula `_ mini-language used in `R `_ and `S `_. For instance, if we have some variable `y`, and we want to regress it against some other variables `x`, `a`, `b`, and the `interaction `_ of `a` and `b`, then we simply write:: patsy.dmatrices("y ~ x + a + b + a:b", data) and Patsy takes care of building appropriate matrices. Furthermore, it: * Allows data transformations to be specified using arbitrary Python code: instead of ``x``, we could have written ``log(x)``, ``(x > 0)``, or even ``log(x) if x > 1e-5 else log(1e-5)``, * Provides a range of convenient options for coding `categorical `_ variables, including automatic detection and removal of redundancies, * Knows how to apply 'the same' transformation used on original data to new data, even for tricky transformations like centering or standardization (critical if you want to use your model to make predictions), * Has an incremental mode to handle data sets which are too large to fit into memory at one time, * Provides a language for symbolic, human-readable specification of linear constraint matrices, * Has a thorough test suite (>97% statement coverage) and solid underlying theory, allowing it to correctly handle corner cases that even R gets wrong, and * Features a simple API for integration into statistical packages. What Patsy *won't* do is, well, statistics --- it just lets you describe models in general terms. It doesn't know or care whether you ultimately want to do linear regression, time-series analysis, or fit a forest of `decision trees `_, and it certainly won't do any of those things for you --- it just gives a high-level language for describing which factors you want your underlying model to take into account. It's not suitable for implementing arbitrary non-linear models from scratch; for that, you'll be better off with something like `Theano `_, `SymPy `_, or just plain Python. But if you're using a statistical package that requires you to provide a raw model matrix, then you can use Patsy to painlessly construct that model matrix; and if you're the author of a statistics package, then I hope you'll consider integrating Patsy as part of your front-end. Patsy's goal is to become the standard high-level interface to describing statistical models in Python, regardless of what particular model or library is being used underneath. Download -------- The current release may be downloaded from the Python Package index at http://pypi.python.org/pypi/patsy/ Or the latest *development version* may be found in our `Git repository `_:: git clone git://github.com/pydata/patsy.git Requirements ------------ Installing :mod:`patsy` requires: * `Python `_ (version 2.6, 2.7, or 3.3+) * `Six `_ * `NumPy `_ Installation ------------ If you have ``pip`` installed, then a simple :: pip install --upgrade patsy should get you the latest version. Otherwise, download and unpack the source distribution, and then run :: python setup.py install Contact ------- Post your suggestions and questions directly to the `pydata mailing list `_ (pydata@googlegroups.com, `gmane archive `_), or to our `bug tracker `_. You could also contact `Nathaniel J. Smith `_ directly, but really the mailing list is almost always a better bet, because more people will see your query and others will be able to benefit from any answers you get. License ------- 2-clause BSD. See the file `LICENSE.txt `_ for details. Users ----- We currently know of the following projects using Patsy to provide a high-level interface to their statistical code: * `Statsmodels `_ * `PyMC3 `_ (`tutorial `_) * `HDDM `_ * `rERPy `_ * `UrbanSim `_ If you'd like your project to appear here, see our documentation for :ref:`library developers `! patsy-0.5.2/doc/py2-versus-py3.rst000066400000000000000000000032321412400214200167130ustar00rootroot00000000000000.. _py2-versus-py3: Python 2 versus Python 3 ======================== .. currentmodule:: patsy The biggest difference between Python 2 and Python 3 is in their string handling, and this is particularly relevant to Patsy since it parses user input. We follow a simple rule: input to Patsy should always be of type ``str``. That means that on Python 2, you should pass byte-strings (not unicode), and on Python 3, you should pass unicode strings (not byte-strings). Similarly, when Patsy passes text back (e.g. :attr:`DesignInfo.column_names`), it's always in the form of a ``str``. In addition to this being the most convenient for users (you never need to use any b"weird" u"prefixes" when writing a formula string), it's actually a necessary consequence of a deeper change in the Python language: in Python 2, Python code itself is represented as byte-strings, and that's the only form of input accepted by the :mod:`tokenize` module. On the other hand, Python 3's tokenizer and parser use unicode, and since Patsy processes Python code, it has to follow suit. There is one exception to this rule: on Python 2, as a convenience for those using ``from __future__ import unicode_literals``, the high-level API functions :func:`dmatrix`, :func:`dmatrices`, :func:`incr_dbuilders`, and :func:`incr_dbuilder` do accept ``unicode`` strings -- BUT these unicode string objects are still required to contain only ASCII characters; if they contain any non-ASCII characters then an error will be raised. If you really need non-ASCII in your formulas, then you should consider upgrading to Python 3. Low-level APIs like :meth:`ModelDesc.from_formula` continue to insist on ``str`` objects only. patsy-0.5.2/doc/quickstart.rst000066400000000000000000000175031412400214200163430ustar00rootroot00000000000000Quickstart ========== .. currentmodule:: patsy If you prefer to learn by diving in and getting your feet wet, then here are some cut-and-pasteable examples to play with. First, let's import stuff and get some data to work with: .. ipython:: python import numpy as np from patsy import dmatrices, dmatrix, demo_data data = demo_data("a", "b", "x1", "x2", "y", "z column") :func:`demo_data` gives us a mix of categorical and numerical variables: .. ipython:: python data Of course Patsy doesn't much care what sort of object you store your data in, so long as it can be indexed like a Python dictionary, ``data[varname]``. You may prefer to store your data in a `pandas `_ DataFrame, or a numpy record array... whatever makes you happy. Now, let's generate design matrices suitable for regressing ``y`` onto ``x1`` and ``x2``. .. ipython:: python dmatrices("y ~ x1 + x2", data) The return value is a Python tuple containing two DesignMatrix objects, the first representing the left-hand side of our formula, and the second representing the right-hand side. Notice that an intercept term was automatically added to the right-hand side. These are just ordinary numpy arrays with some extra metadata and a fancy __repr__ method attached, so we can pass them directly to a regression function like :func:`np.linalg.lstsq`: .. ipython:: python :okwarning: outcome, predictors = dmatrices("y ~ x1 + x2", data) betas = np.linalg.lstsq(predictors, outcome)[0].ravel() for name, beta in zip(predictors.design_info.column_names, betas): print("%s: %s" % (name, beta)) Of course the resulting numbers aren't very interesting, since this is just random data. If you just want the design matrix alone, without the ``y`` values, use :func:`dmatrix` and leave off the ``y ~`` part at the beginning: .. ipython:: python dmatrix("x1 + x2", data) We'll use dmatrix for the rest of the examples, since seeing the outcome matrix over and over would get boring. This matrix's metadata is stored in an extra attribute called ``.design_info``, which is a :class:`DesignInfo` object you can explore at your leisure: .. ipython:: In [0]: d = dmatrix("x1 + x2", data) @verbatim In [0]: d.design_info. d.design_info.builder d.design_info.slice d.design_info.column_name_indexes d.design_info.term_name_slices d.design_info.column_names d.design_info.term_names d.design_info.describe d.design_info.term_slices d.design_info.linear_constraint d.design_info.terms Usually the intercept is useful, but if we don't want it we can get rid of it: .. ipython:: python dmatrix("x1 + x2 - 1", data) We can transform variables using arbitrary Python code: .. ipython:: python dmatrix("x1 + np.log(x2 + 10)", data) Notice that ``np.log`` is being pulled out of the environment where :func:`dmatrix` was called -- ``np.log`` is accessible because we did ``import numpy as np`` up above. Any functions or variables that you could reference when calling :func:`dmatrix` can also be used inside the formula passed to :func:`dmatrix`. For example: .. ipython:: python new_x2 = data["x2"] * 100 dmatrix("new_x2") Patsy has some transformation functions "built in", that are automatically accessible to your code: .. ipython:: python dmatrix("center(x1) + standardize(x2)", data) See :mod:`patsy.builtins` for a complete list of functions made available to formulas. You can also define your own transformation functions in the ordinary Python way: .. ipython:: python def double(x): return 2 * x dmatrix("x1 + double(x1)", data) .. currentmodule:: patsy.builtins This flexibility does create problems in one case, though -- because we interpret whatever you write in-between the ``+`` signs as Python code, you do in fact have to write valid Python code. And this can be tricky if your variable names have funny characters in them, like whitespace or punctuation. Fortunately, patsy has a builtin "transformation" called :func:`Q` that lets you "quote" such variables: .. ipython:: In [1]: weird_data = demo_data("weird column!", "x1") # This is an error... @verbatim In [2]: dmatrix("weird column! + x1", weird_data) [...] PatsyError: error tokenizing input (maybe an unclosed string?) weird column! + x1 ^ # ...but this works: In [3]: dmatrix("Q('weird column!') + x1", weird_data) :func:`Q` even plays well with other transformations: .. ipython:: python dmatrix("double(Q('weird column!')) + x1", weird_data) Arithmetic transformations are also possible, but you'll need to "protect" them by wrapping them in :func:`I()`, so that Patsy knows that you really do want ``+`` to mean addition: .. ipython:: python dmatrix("I(x1 + x2)", data) # compare to "x1 + x2" .. currentmodule:: patsy Note that while Patsy goes to considerable efforts to take in data represented using different Python data types and convert them into a standard representation, all this work happens *after* any transformations you perform as part of your formula. So, for example, if your data is in the form of numpy arrays, "+" will perform element-wise addition, but if it is in standard Python lists, it will perform concatenation: .. ipython:: python dmatrix("I(x1 + x2)", {"x1": np.array([1, 2, 3]), "x2": np.array([4, 5, 6])}) dmatrix("I(x1 + x2)", {"x1": [1, 2, 3], "x2": [4, 5, 6]}) Patsy becomes particularly useful when you have categorical data. If you use a predictor that has a categorical type (e.g. strings or bools), it will be automatically coded. Patsy automatically chooses an appropriate way to code categorical data to avoid producing a redundant, overdetermined model. If there is just one categorical variable alone, the default is to dummy code it: .. ipython:: python dmatrix("0 + a", data) But if you did that and put the intercept back in, you'd get a redundant model. So if the intercept is present, Patsy uses a reduced-rank contrast code (treatment coding by default): .. ipython:: python dmatrix("a", data) The ``T.`` notation is there to remind you that these columns are treatment coded. Interactions are also easy -- they represent the cartesian product of all the factors involved. Here's a dummy coding of each *combination* of values taken by ``a`` and ``b``: .. ipython:: python dmatrix("0 + a:b", data) But interactions also know how to use contrast coding to avoid redundancy. If you have both main effects and interactions in a model, then Patsy goes from lower-order effects to higher-order effects, adding in just enough columns to produce a well-defined model. The result is that each set of columns measures the *additional* contribution of this effect -- just what you want for a traditional ANOVA: .. ipython:: python dmatrix("a + b + a:b", data) Since this is so common, there's a convenient short-hand: .. ipython:: python dmatrix("a*b", data) Of course you can use :ref:`other coding schemes ` too (or even :ref:`define your own `). Here's :class:`orthogonal polynomial coding `: .. ipython:: python dmatrix("C(c, Poly)", {"c": ["c1", "c1", "c2", "c2", "c3", "c3"]}) You can even write interactions between categorical and numerical variables. Here we fit two different slope coefficients for ``x1``; one for the ``a1`` group, and one for the ``a2`` group: .. ipython:: python dmatrix("a:x1", data) The same redundancy avoidance code works here, so if you'd rather have treatment-coded slopes (one slope for the ``a1`` group, and a second for the difference between the ``a1`` and ``a2`` group slopes), then you can request it like this: .. ipython:: python # compare to the difference between "0 + a" and "1 + a" dmatrix("x1 + a:x1", data) And more complex expressions work too: .. ipython:: python dmatrix("C(a, Poly):center(x1)", data) patsy-0.5.2/doc/sphinxext/000077500000000000000000000000001412400214200154435ustar00rootroot00000000000000patsy-0.5.2/doc/sphinxext/requirements.txt000066400000000000000000000000571412400214200207310ustar00rootroot00000000000000numpy scipy pandas mistune jsonschema ipython patsy-0.5.2/doc/spline-regression.rst000066400000000000000000000216021412400214200176140ustar00rootroot00000000000000.. _spline-regression: Spline regression ================= .. currentmodule:: patsy .. ipython:: python :suppress: import numpy as np from patsy import dmatrix, build_design_matrices Patsy offers a set of specific stateful transforms (for more details about stateful transforms see :ref:`stateful-transforms`) that you can use in formulas to generate splines bases and express non-linear fits. General B-splines ----------------- B-spline bases can be generated with the :func:`bs` stateful transform. The spline bases returned by :func:`bs` are designed to be compatible with those produced by the R ``bs`` function. The following code illustrates a typical basis and the resulting spline: .. ipython:: python import matplotlib.pyplot as plt plt.title("B-spline basis example (degree=3)"); x = np.linspace(0., 1., 100) y = dmatrix("bs(x, df=6, degree=3, include_intercept=True) - 1", {"x": x}) # Define some coefficients b = np.array([1.3, 0.6, 0.9, 0.4, 1.6, 0.7]) # Plot B-spline basis functions (colored curves) each multiplied by its coeff plt.plot(x, y*b); @savefig basis-bspline.png align=center # Plot the spline itself (sum of the basis functions, thick black curve) plt.plot(x, np.dot(y, b), color='k', linewidth=3); In the following example we first set up our B-spline basis using some data and then make predictions on a new set of data: .. ipython:: python data = {"x": np.linspace(0., 1., 100)} design_matrix = dmatrix("bs(x, df=4)", data) new_data = {"x": [0.1, 0.25, 0.9]} build_design_matrices([design_matrix.design_info], new_data)[0] :func:`bs` can produce B-spline bases of arbitrary degrees -- e.g., ``degree=0`` will give produce piecewise-constant functions, ``degree=1`` will produce piecewise-linear functions, and the default ``degree=3`` produces cubic splines. The next section describes more specialized functions for producing different types of cubic splines. Natural and cyclic cubic regression splines ------------------------------------------- Natural and cyclic cubic regression splines are provided through the stateful transforms :func:`cr` and :func:`cc` respectively. Here the spline is parameterized directly using its values at the knots. These splines were designed to be compatible with those found in the R package `mgcv `_ (these are called *cr*, *cs* and *cc* in the context of *mgcv*), but can be used with any model. .. warning:: Note that the compatibility with *mgcv* applies only to the **generation of spline bases**: we do not implement any kind of *mgcv*-compatible penalized fitting process. Thus these spline bases can be used to precisely reproduce predictions from a model previously fitted with *mgcv*, or to serve as building blocks for other regression models (like OLS). Here are some illustrations of typical natural and cyclic spline bases: .. ipython:: python plt.title("Natural cubic regression spline basis example"); y = dmatrix("cr(x, df=6) - 1", {"x": x}) # Plot natural cubic regression spline basis functions (colored curves) each multiplied by its coeff plt.plot(x, y*b); @savefig basis-crspline.png align=center # Plot the spline itself (sum of the basis functions, thick black curve) plt.plot(x, np.dot(y, b), color='k', linewidth=3); .. ipython:: python plt.title("Cyclic cubic regression spline basis example"); y = dmatrix("cc(x, df=6) - 1", {"x": x}) # Plot cyclic cubic regression spline basis functions (colored curves) each multiplied by its coeff plt.plot(x, y*b); @savefig basis-ccspline.png align=center # Plot the spline itself (sum of the basis functions, thick black curve) plt.plot(x, np.dot(y, b), color='k', linewidth=3); In the following example we first set up our spline basis using same data as for the B-spline example above and then make predictions on a new set of data: .. ipython:: python design_matrix = dmatrix("cr(x, df=4, constraints='center')", data) new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0] new_design_matrix np.asarray(new_design_matrix) Note that in the above example 5 knots are actually used to achieve 4 degrees of freedom since a centering constraint is requested. Note that the API is different from *mgcv*: * In patsy one can specify the number of degrees of freedom directly (actual number of columns of the resulting design matrix) whereas in *mgcv* one has to specify the number of knots to use. For instance, in the case of cyclic regression splines (with no additional constraints) the actual degrees of freedom is the number of knots minus one. * In patsy one can specify inner knots as well as lower and upper exterior knots which can be useful for cyclic spline for instance. * In *mgcv* a centering/identifiability constraint is automatically computed and absorbed in the resulting design matrix. The purpose of this is to ensure that if ``b`` is the array of *initial* parameters (corresponding to the *initial* unconstrained design matrix ``dm``), our model is centered, ie ``np.mean(np.dot(dm, b))`` is zero. We can rewrite this as ``np.dot(c, b)`` being zero with ``c`` a 1-row constraint matrix containing the mean of each column of ``dm``. Absorbing this constraint in the *final* design matrix means that we rewrite the model in terms of *unconstrained* parameters (this is done through a QR-decomposition of the constraint matrix). Those unconstrained parameters have the property that when projected back into the initial parameters space (let's call ``b_back`` the result of this projection), the constraint ``np.dot(c, b_back)`` being zero is automatically verified. In patsy one can choose between no constraint, a centering constraint like *mgcv* (``'center'``) or a user provided constraint matrix. Tensor product smooths ---------------------- Smooths of several covariates can be generated through a tensor product of the bases of marginal univariate smooths. For these marginal smooths one can use the above defined splines as well as user defined smooths provided they actually transform input univariate data into some kind of smooth functions basis producing a 2-d array output with the ``(i, j)`` element corresponding to the value of the ``j`` th basis function at the ``i`` th data point. The tensor product stateful transform is called :func:`te`. .. note:: The implementation of this tensor product is compatible with *mgcv* when considering only cubic regression spline marginal smooths, which means that generated bases will match those produced by *mgcv*. Recall that we do not implement any kind of *mgcv*-compatible penalized fitting process. In the following code we show an example of tensor product basis functions used to represent a smooth of two variables ``x1`` and ``x2``. Note how marginal spline bases patterns can be observed on the x and y contour projections: .. ipython:: In [10]: from matplotlib import cm In [20]: from mpl_toolkits.mplot3d.axes3d import Axes3D In [30]: x1 = np.linspace(0., 1., 100) In [40]: x2 = np.linspace(0., 1., 100) In [50]: x1, x2 = np.meshgrid(x1, x2) In [60]: df = 3 In [70]: y = dmatrix("te(cr(x1, df), cc(x2, df)) - 1", ....: {"x1": x1.ravel(), "x2": x2.ravel(), "df": df}) ....: In [80]: print y.shape In [90]: fig = plt.figure() In [100]: fig.suptitle("Tensor product basis example (2 covariates)"); In [110]: for i in range(df * df): .....: ax = fig.add_subplot(df, df, i + 1, projection='3d') .....: yi = y[:, i].reshape(x1.shape) .....: ax.plot_surface(x1, x2, yi, rstride=4, cstride=4, alpha=0.15) .....: ax.contour(x1, x2, yi, zdir='z', cmap=cm.coolwarm, offset=-0.5) .....: ax.contour(x1, x2, yi, zdir='y', cmap=cm.coolwarm, offset=1.2) .....: ax.contour(x1, x2, yi, zdir='x', cmap=cm.coolwarm, offset=-0.2) .....: ax.set_xlim3d(-0.2, 1.0) .....: ax.set_ylim3d(0, 1.2) .....: ax.set_zlim3d(-0.5, 1) .....: ax.set_xticks([0, 1]) .....: ax.set_yticks([0, 1]) .....: ax.set_zticks([-0.5, 0, 1]) .....: @savefig basis-tesmooth.png align=center In [120]: fig.tight_layout() Following what we did for univariate splines in the preceding sections, we will now set up a 3-d smooth basis using some data and then make predictions on a new set of data: .. ipython:: python data = {"x1": np.linspace(0., 1., 100), "x2": np.linspace(0., 1., 100), "x3": np.linspace(0., 1., 100)} design_matrix = dmatrix("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3), constraints='center')", data) new_data = {"x1": [0.1, 0.2], "x2": [0.2, 0.3], "x3": [0.3, 0.4]} new_design_matrix = build_design_matrices([design_matrix.design_info], new_data)[0] new_design_matrix np.asarray(new_design_matrix) patsy-0.5.2/doc/stateful-transforms.rst000066400000000000000000000223241412400214200201710ustar00rootroot00000000000000.. _stateful-transforms: Stateful transforms =================== .. currentmodule:: patsy There's a subtle problem that sometimes bites people when working with formulas. Suppose that I have some numerical data called ``x``, and I would like to center it before fitting. The obvious way would be to write:: y ~ I(x - np.mean(x)) # BROKEN! Don't do this! or, even better we could package it up into a function: .. ipython:: python def naive_center(x): # BROKEN! don't use! x = np.asarray(x) return x - np.mean(x) and then write our formula like:: y ~ naive_center(x) Why is this a bad idea? Let's set up an example. .. ipython:: python import numpy as np from patsy import dmatrix, build_design_matrices, incr_dbuilder data = {"x": [1, 2, 3, 4]} Now we can build a design matrix and see what we get: .. ipython:: python mat = dmatrix("naive_center(x)", data) mat Those numbers look correct, and in fact they are correct. If all we're going to do with this model is call :func:`dmatrix` once, then everything is fine -- which is what makes this problem so insidious. Often we want to do more with a model than this. For instance, we might find some new data, and want to feed it into our model to make predictions. To do this, though, we first need to reapply the same transformation, like so: .. ipython:: python new_data = {"x": [5, 6, 7, 8]} # Broken! build_design_matrices([mat.design_info], new_data)[0] So it's clear what's happened here -- Patsy has centered the new data, just like it centered the old data. But if you think about what this means statistically, it makes no sense. According to this, the new data point where x is 5 will behave exactly like the old data point where x is 1, because they both produce the same input to the actual model. The problem is what it means to apply "the same transformation". Here, what we really want to do is to subtract the mean *of the original data* from the new data. Patsy's solution is called a *stateful transform*. These look like ordinary functions, but they perform a bit of magic to remember the state of the original data, and use it in transforming new data. Several useful stateful transforms are included out of the box, including one called :func:`center`. Using :func:`center` instead of :func:`naive_center` produces the same correct result for our original matrix. It's used in exactly the same way: .. ipython:: python fixed_mat = dmatrix("center(x)", data) fixed_mat But if we then feed in our new data, we also get out the correct result: .. ipython:: python # Correct! build_design_matrices([fixed_mat.design_info], new_data)[0] Another situation where we need some stateful transform magic is when we are working with data that is too large to fit into memory at once. To handle such cases, Patsy allows you to set up a design matrix while working our way incrementally through the data. But if we use :func:`naive_center` when building a matrix incrementally, then it centers each *chunk* of data, not the data as a whole. (Of course, depending on how your data is distributed, this might end up being just similar enough for you to miss the problem until it's too late.) .. ipython:: python data_chunked = [{"x": data["x"][:2]}, {"x": data["x"][2:]}] dinfo = incr_dbuilder("naive_center(x)", lambda: iter(data_chunked)) # Broken! np.row_stack([build_design_matrices([dinfo], chunk)[0] for chunk in data_chunked]) But if we use the proper stateful transform, this just works: .. ipython:: python dinfo = incr_dbuilder("center(x)", lambda: iter(data_chunked)) # Correct! np.row_stack([build_design_matrices([dinfo], chunk)[0] for chunk in data_chunked]) .. note:: Under the hood, the way this works is that :func:`incr_dbuilder` iterates through the data once to calculate the mean, and then we use :func:`build_design_matrices` to iterate through it a second time creating our design matrix. While taking two passes through a large data set may be slow, there's really no other way to accomplish what the user asked for. The good news is that Patsy is smart enough to make only the minimum number of passes necessary. For example, in our example with :func:`naive_center` above, :func:`incr_dbuilder` would not have done a full pass through the data at all. And if you have multiple stateful transforms in the same formula, then Patsy will process them in parallel in a single pass. And, of course, we can use the resulting :class:`DesignInfo` object for prediction as well: .. ipython:: python # Correct! build_design_matrices([dinfo], new_data)[0] In fact, Patsy's stateful transform handling is clever enough that it can support arbitrary mixing of stateful transforms with other Python code. E.g., if :func:`center` and :func:`spline` were both stateful transforms, then even a silly a formula like this will be handled 100% correctly:: y ~ I(spline(center(x1)) + center(x2)) However, it isn't perfect -- there are two things you have to be careful of. Let's put them in red: .. warning:: If you are unwise enough to ignore this section, write a function like ``naive_center`` above, and use it in a formula, then Patsy will not notice. If you use that formula with :func:`incr_dbuilders` or for predictions, then you will just silently get the wrong results. We have a plan to detect such cases, but it isn't implemented yet (and in any case can never be 100% reliable). So be careful! .. warning:: Even if you do use a "real" stateful transform like :func:`center` or :func:`standardize`, still have to make sure that Patsy can "see" that you are using such a transform. Currently the rule is that you must access the stateful transform function using a simple, bare variable reference, without any dots or other lookups:: dmatrix("y ~ center(x)", data) # okay asdf = patsy.center dmatrix("y ~ asdf(x)", data) # okay dmatrix("y ~ patsy.center(x)", data) # BROKEN! DON'T DO THIS! funcs = {"center": patsy.center} dmatrix("y ~ funcs['center'](x)", data) # BROKEN! DON'T DO THIS! Builtin stateful transforms --------------------------- There are a number of builtin stateful transforms beyond :func:`center`; see :ref:`stateful transforms ` in the API reference for a complete list. .. _stateful-transform-protocol: Defining a stateful transform ----------------------------- You can also easily define your own stateful transforms. The first step is to define a class which fulfills the stateful transform protocol. The lifecycle of a stateful transform object is as follows: #. An instance of your type will be constructed. #. :meth:`memorize_chunk` will be called one or more times. #. :meth:`memorize_finish` will be called once. #. :meth:`transform` will be called one or more times, on either the same or different data to what was initially passed to :meth:`memorize_chunk`. You can trust that any non-data arguments will be identical between calls to :meth:`memorize_chunk` and :meth:`transform`. And here are the methods and call signatures you need to define: .. class:: stateful_transform_protocol .. method:: __init__() :noindex: It must be possible to create an instance of the class by calling the constructor with no arguments. .. method:: memorize_chunk(*args, **kwargs) Update any internal state, based on the data passed into `memorize_chunk`. .. method:: memorize_finish() Do any housekeeping you want to do between the last call to :meth:`memorize_chunk` and the first call to :meth:`transform`. For example, if you are computing some summary statistic that cannot be done incrementally, then your :meth:`memorize_chunk` method might just store the data that's passed in, and then :meth:`memorize_finish` could compute the summary statistic and delete the stored data to free up the associated memory. .. method:: transform(*args, **kwargs) This method should transform the input data passed to it. It should be deterministic, and it should be "point-wise", in the sense that when passed an array it performs an independent transformation on each data point that is not affected by any other data points passed to :meth:`transform`. Then once you have created your class, pass it to :func:`stateful_transform` to create a callable stateful transform object suitable for use inside or outside formulas. Here's a simple example of how you might implement a working version of :func:`center` (though it's less robust and featureful than the real builtin):: class MyExampleCenter(object): def __init__(self): self._total = 0 self._count = 0 self._mean = None def memorize_chunk(self, x): self._total += np.sum(x) self._count += len(x) def memorize_finish(self): self._mean = self.total * 1. / self._count def transform(self, x): return x - self._mean my_example_center = patsy.stateful_transform(MyExampleCenter) print(my_example_center(np.array([1, 2, 3]))) But of course, if you come up with any useful ones, please let us know so we can incorporate them into patsy itself! patsy-0.5.2/patsy/000077500000000000000000000000001412400214200140045ustar00rootroot00000000000000patsy-0.5.2/patsy/__init__.py000066400000000000000000000066631412400214200161300ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2013 Nathaniel Smith # See file LICENSE.txt for license information. """patsy is a Python package for describing statistical models and building design matrices. It is closely inspired by the 'formula' mini-language used in R and S.""" import sys from patsy.version import __version__ # Do this first, to make it easy to check for warnings while testing: import os if os.environ.get("PATSY_FORCE_NO_WARNINGS"): import warnings warnings.filterwarnings("error", module="^patsy") del warnings del os import patsy.origin class PatsyError(Exception): """This is the main error type raised by Patsy functions. In addition to the usual Python exception features, you can pass a second argument to this function specifying the origin of the error; this is included in any error message, and used to help the user locate errors arising from malformed formulas. This second argument should be an :class:`Origin` object, or else an arbitrary object with a ``.origin`` attribute. (If it is neither of these things, then it will simply be ignored.) For ordinary display to the user with default formatting, use ``str(exc)``. If you want to do something cleverer, you can use the ``.message`` and ``.origin`` attributes directly. (The latter may be None.) """ def __init__(self, message, origin=None): Exception.__init__(self, message) self.message = message self.origin = None self.set_origin(origin) def __str__(self): if self.origin is None: return self.message else: return ("%s\n%s" % (self.message, self.origin.caretize(indent=4))) def set_origin(self, origin): # This is useful to modify an exception to add origin information as # it "passes by", without losing traceback information. (In Python 3 # we can use the built-in exception wrapping stuff, but it will be # some time before we can count on that...) if self.origin is None: if hasattr(origin, "origin"): origin = origin.origin if not isinstance(origin, patsy.origin.Origin): origin = None self.origin = origin __all__ = ["PatsyError"] # We make a rich API available for explicit use. To see what exactly is # exported, check each module's __all__, or import this module and look at its # __all__. def _reexport(mod): __all__.extend(mod.__all__) for var in mod.__all__: globals()[var] = getattr(mod, var) # This used to have less copy-paste, but explicit import statements make # packaging tools like py2exe and py2app happier. Sigh. import patsy.highlevel _reexport(patsy.highlevel) import patsy.build _reexport(patsy.build) import patsy.constraint _reexport(patsy.constraint) import patsy.contrasts _reexport(patsy.contrasts) import patsy.desc _reexport(patsy.desc) import patsy.design_info _reexport(patsy.design_info) import patsy.eval _reexport(patsy.eval) import patsy.origin _reexport(patsy.origin) import patsy.state _reexport(patsy.state) import patsy.user_util _reexport(patsy.user_util) import patsy.missing _reexport(patsy.missing) import patsy.splines _reexport(patsy.splines) import patsy.mgcv_cubic_splines _reexport(patsy.mgcv_cubic_splines) # XX FIXME: we aren't exporting any of the explicit parsing interface # yet. Need to figure out how to do that. patsy-0.5.2/patsy/build.py000066400000000000000000001234061412400214200154630ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2015 Nathaniel Smith # See file LICENSE.txt for license information. # This file defines the core design matrix building functions. # These are made available in the patsy.* namespace __all__ = ["design_matrix_builders", "build_design_matrices"] import itertools import six import numpy as np from patsy import PatsyError from patsy.categorical import (guess_categorical, CategoricalSniffer, categorical_to_int) from patsy.util import (atleast_2d_column_default, have_pandas, asarray_or_pandas, safe_issubdtype) from patsy.design_info import (DesignMatrix, DesignInfo, FactorInfo, SubtermInfo) from patsy.redundancy import pick_contrasts_for_term from patsy.eval import EvalEnvironment from patsy.contrasts import code_contrast_matrix, Treatment from patsy.compat import OrderedDict from patsy.missing import NAAction if have_pandas: import pandas class _MockFactor(object): def __init__(self, name="MOCKMOCK"): self._name = name def eval(self, state, env): return env["mock"] def name(self): return self._name def _max_allowed_dim(dim, arr, factor): if arr.ndim > dim: msg = ("factor '%s' evaluates to an %s-dimensional array; I only " "handle arrays with dimension <= %s" % (factor.name(), arr.ndim, dim)) raise PatsyError(msg, factor) def test__max_allowed_dim(): import pytest f = _MockFactor() _max_allowed_dim(1, np.array(1), f) _max_allowed_dim(1, np.array([1]), f) pytest.raises(PatsyError, _max_allowed_dim, 1, np.array([[1]]), f) pytest.raises(PatsyError, _max_allowed_dim, 1, np.array([[[1]]]), f) _max_allowed_dim(2, np.array(1), f) _max_allowed_dim(2, np.array([1]), f) _max_allowed_dim(2, np.array([[1]]), f) pytest.raises(PatsyError, _max_allowed_dim, 2, np.array([[[1]]]), f) def _eval_factor(factor_info, data, NA_action): factor = factor_info.factor result = factor.eval(factor_info.state, data) # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask if factor_info.type == "numerical": result = atleast_2d_column_default(result, preserve_pandas=True) _max_allowed_dim(2, result, factor) if result.shape[1] != factor_info.num_columns: raise PatsyError("when evaluating factor %s, I got %s columns " "instead of the %s I was expecting" % (factor.name(), factor_info.num_columns, result.shape[1]), factor) if not safe_issubdtype(np.asarray(result).dtype, np.number): raise PatsyError("when evaluating numeric factor %s, " "I got non-numeric data of type '%s'" % (factor.name(), result.dtype), factor) return result, NA_action.is_numerical_NA(result) # returns either a 1d ndarray or a pandas.Series, plus is_NA mask else: assert factor_info.type == "categorical" result = categorical_to_int(result, factor_info.categories, NA_action, origin=factor_info.factor) assert result.ndim == 1 return result, np.asarray(result == -1) def test__eval_factor_numerical(): import pytest naa = NAAction() f = _MockFactor() fi1 = FactorInfo(f, "numerical", {}, num_columns=1, categories=None) assert fi1.factor is f eval123, is_NA = _eval_factor(fi1, {"mock": [1, 2, 3]}, naa) assert eval123.shape == (3, 1) assert np.all(eval123 == [[1], [2], [3]]) assert is_NA.shape == (3,) assert np.all(~is_NA) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [[[1]]]}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [[1, 2]]}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": ["a", "b"]}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [True, False]}, naa) fi2 = FactorInfo(_MockFactor(), "numerical", {}, num_columns=2, categories=None) eval123321, is_NA = _eval_factor(fi2, {"mock": [[1, 3], [2, 2], [3, 1]]}, naa) assert eval123321.shape == (3, 2) assert np.all(eval123321 == [[1, 3], [2, 2], [3, 1]]) assert is_NA.shape == (3,) assert np.all(~is_NA) pytest.raises(PatsyError, _eval_factor, fi2, {"mock": [1, 2, 3]}, naa) pytest.raises(PatsyError, _eval_factor, fi2, {"mock": [[1, 2, 3]]}, naa) ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]}, NAAction(NA_types=["NaN"])) assert np.array_equal(is_NA, [False, False, True]) ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]}, NAAction(NA_types=[])) assert np.array_equal(is_NA, [False, False, False]) if have_pandas: eval_ser, _ = _eval_factor(fi1, {"mock": pandas.Series([1, 2, 3], index=[10, 20, 30])}, naa) assert isinstance(eval_ser, pandas.DataFrame) assert np.array_equal(eval_ser, [[1], [2], [3]]) assert np.array_equal(eval_ser.index, [10, 20, 30]) eval_df1, _ = _eval_factor(fi1, {"mock": pandas.DataFrame([[2], [1], [3]], index=[20, 10, 30])}, naa) assert isinstance(eval_df1, pandas.DataFrame) assert np.array_equal(eval_df1, [[2], [1], [3]]) assert np.array_equal(eval_df1.index, [20, 10, 30]) eval_df2, _ = _eval_factor(fi2, {"mock": pandas.DataFrame([[2, 3], [1, 4], [3, -1]], index=[20, 30, 10])}, naa) assert isinstance(eval_df2, pandas.DataFrame) assert np.array_equal(eval_df2, [[2, 3], [1, 4], [3, -1]]) assert np.array_equal(eval_df2.index, [20, 30, 10]) pytest.raises(PatsyError, _eval_factor, fi2, {"mock": pandas.Series([1, 2, 3], index=[10, 20, 30])}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": pandas.DataFrame([[2, 3], [1, 4], [3, -1]], index=[20, 30, 10])}, naa) def test__eval_factor_categorical(): import pytest from patsy.categorical import C naa = NAAction() f = _MockFactor() fi1 = FactorInfo(f, "categorical", {}, num_columns=None, categories=("a", "b")) assert fi1.factor is f cat1, _ = _eval_factor(fi1, {"mock": ["b", "a", "b"]}, naa) assert cat1.shape == (3,) assert np.all(cat1 == [1, 0, 1]) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": ["c"]}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": C(["a", "c"])}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": C(["a", "b"], levels=["b", "a"])}, naa) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": [1, 0, 1]}, naa) bad_cat = np.asarray(["b", "a", "a", "b"]) bad_cat.resize((2, 2)) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": bad_cat}, naa) cat1_NA, is_NA = _eval_factor(fi1, {"mock": ["a", None, "b"]}, NAAction(NA_types=["None"])) assert np.array_equal(is_NA, [False, True, False]) assert np.array_equal(cat1_NA, [0, -1, 1]) pytest.raises(PatsyError, _eval_factor, fi1, {"mock": ["a", None, "b"]}, NAAction(NA_types=[])) fi2 = FactorInfo(_MockFactor(), "categorical", {}, num_columns=None, categories=[False, True]) cat2, _ = _eval_factor(fi2, {"mock": [True, False, False, True]}, naa) assert cat2.shape == (4,) assert np.all(cat2 == [1, 0, 0, 1]) if have_pandas: s = pandas.Series(["b", "a"], index=[10, 20]) cat_s, _ = _eval_factor(fi1, {"mock": s}, naa) assert isinstance(cat_s, pandas.Series) assert np.array_equal(cat_s, [1, 0]) assert np.array_equal(cat_s.index, [10, 20]) sbool = pandas.Series([True, False], index=[11, 21]) cat_sbool, _ = _eval_factor(fi2, {"mock": sbool}, naa) assert isinstance(cat_sbool, pandas.Series) assert np.array_equal(cat_sbool, [1, 0]) assert np.array_equal(cat_sbool.index, [11, 21]) def _column_combinations(columns_per_factor): # For consistency with R, the left-most item iterates fastest: iterators = [range(n) for n in reversed(columns_per_factor)] for reversed_combo in itertools.product(*iterators): yield reversed_combo[::-1] def test__column_combinations(): assert list(_column_combinations([2, 3])) == [(0, 0), (1, 0), (0, 1), (1, 1), (0, 2), (1, 2)] assert list(_column_combinations([3])) == [(0,), (1,), (2,)] assert list(_column_combinations([])) == [()] def _subterm_column_combinations(factor_infos, subterm): columns_per_factor = [] for factor in subterm.factors: if factor in subterm.contrast_matrices: columns = subterm.contrast_matrices[factor].matrix.shape[1] else: columns = factor_infos[factor].num_columns columns_per_factor.append(columns) return _column_combinations(columns_per_factor) def _subterm_column_names_iter(factor_infos, subterm): total = 0 for i, column_idxs in enumerate( _subterm_column_combinations(factor_infos, subterm)): name_pieces = [] for factor, column_idx in zip(subterm.factors, column_idxs): fi = factor_infos[factor] if fi.type == "numerical": if fi.num_columns > 1: name_pieces.append("%s[%s]" % (factor.name(), column_idx)) else: assert column_idx == 0 name_pieces.append(factor.name()) else: assert fi.type == "categorical" contrast = subterm.contrast_matrices[factor] suffix = contrast.column_suffixes[column_idx] name_pieces.append("%s%s" % (factor.name(), suffix)) if not name_pieces: yield "Intercept" else: yield ":".join(name_pieces) total += 1 assert total == subterm.num_columns def _build_subterm(subterm, factor_infos, factor_values, out): assert subterm.num_columns == out.shape[1] out[...] = 1 for i, column_idxs in enumerate( _subterm_column_combinations(factor_infos, subterm)): for factor, column_idx in zip(subterm.factors, column_idxs): if factor_infos[factor].type == "categorical": contrast = subterm.contrast_matrices[factor] if np.any(factor_values[factor] < 0): raise PatsyError("can't build a design matrix " "containing missing values", factor) out[:, i] *= contrast.matrix[factor_values[factor], column_idx] else: assert factor_infos[factor].type == "numerical" assert (factor_values[factor].shape[1] == factor_infos[factor].num_columns) out[:, i] *= factor_values[factor][:, column_idx] def test__subterm_column_names_iter_and__build_subterm(): import pytest from patsy.contrasts import ContrastMatrix from patsy.categorical import C f1 = _MockFactor("f1") f2 = _MockFactor("f2") f3 = _MockFactor("f3") contrast = ContrastMatrix(np.array([[0, 0.5], [3, 0]]), ["[c1]", "[c2]"]) factor_infos1 = {f1: FactorInfo(f1, "numerical", {}, num_columns=1, categories=None), f2: FactorInfo(f2, "categorical", {}, num_columns=None, categories=["a", "b"]), f3: FactorInfo(f3, "numerical", {}, num_columns=1, categories=None), } contrast_matrices = {f2: contrast} subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2) assert (list(_subterm_column_names_iter(factor_infos1, subterm1)) == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"]) mat = np.empty((3, 2)) _build_subterm(subterm1, factor_infos1, {f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat) assert np.allclose(mat, [[0, 0.5 * 1 * 7.5], [0, 0.5 * 2 * 2], [3 * 3 * -12, 0]]) # Check that missing categorical values blow up pytest.raises(PatsyError, _build_subterm, subterm1, factor_infos1, {f1: atleast_2d_column_default([1, 2, 3]), f2: np.asarray([0, -1, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat) factor_infos2 = dict(factor_infos1) factor_infos2[f1] = FactorInfo(f1, "numerical", {}, num_columns=2, categories=None) subterm2 = SubtermInfo([f1, f2, f3], contrast_matrices, 4) assert (list(_subterm_column_names_iter(factor_infos2, subterm2)) == ["f1[0]:f2[c1]:f3", "f1[1]:f2[c1]:f3", "f1[0]:f2[c2]:f3", "f1[1]:f2[c2]:f3"]) mat2 = np.empty((3, 4)) _build_subterm(subterm2, factor_infos2, {f1: atleast_2d_column_default([[1, 2], [3, 4], [5, 6]]), f2: np.asarray([0, 0, 1]), f3: atleast_2d_column_default([7.5, 2, -12])}, mat2) assert np.allclose(mat2, [[0, 0, 0.5 * 1 * 7.5, 0.5 * 2 * 7.5], [0, 0, 0.5 * 3 * 2, 0.5 * 4 * 2], [3 * 5 * -12, 3 * 6 * -12, 0, 0]]) subterm_int = SubtermInfo([], {}, 1) assert list(_subterm_column_names_iter({}, subterm_int)) == ["Intercept"] mat3 = np.empty((3, 1)) _build_subterm(subterm_int, {}, {f1: [1, 2, 3], f2: [1, 2, 3], f3: [1, 2, 3]}, mat3) assert np.allclose(mat3, 1) def _factors_memorize(factors, data_iter_maker, eval_env): # First, start off the memorization process by setting up each factor's # state and finding out how many passes it will need: factor_states = {} passes_needed = {} for factor in factors: state = {} which_pass = factor.memorize_passes_needed(state, eval_env) factor_states[factor] = state passes_needed[factor] = which_pass # Now, cycle through the data until all the factors have finished # memorizing everything: memorize_needed = set() for factor, passes in six.iteritems(passes_needed): if passes > 0: memorize_needed.add(factor) which_pass = 0 while memorize_needed: for data in data_iter_maker(): for factor in memorize_needed: state = factor_states[factor] factor.memorize_chunk(state, which_pass, data) for factor in list(memorize_needed): factor.memorize_finish(factor_states[factor], which_pass) if which_pass == passes_needed[factor] - 1: memorize_needed.remove(factor) which_pass += 1 return factor_states def test__factors_memorize(): class MockFactor(object): def __init__(self, requested_passes, token): self._requested_passes = requested_passes self._token = token self._chunk_in_pass = 0 self._seen_passes = 0 def memorize_passes_needed(self, state, eval_env): state["calls"] = [] state["token"] = self._token return self._requested_passes def memorize_chunk(self, state, which_pass, data): state["calls"].append(("memorize_chunk", which_pass)) assert data["chunk"] == self._chunk_in_pass self._chunk_in_pass += 1 def memorize_finish(self, state, which_pass): state["calls"].append(("memorize_finish", which_pass)) self._chunk_in_pass = 0 class Data(object): CHUNKS = 3 def __init__(self): self.calls = 0 self.data = [{"chunk": i} for i in range(self.CHUNKS)] def __call__(self): self.calls += 1 return iter(self.data) data = Data() f0 = MockFactor(0, "f0") f1 = MockFactor(1, "f1") f2a = MockFactor(2, "f2a") f2b = MockFactor(2, "f2b") factor_states = _factors_memorize(set([f0, f1, f2a, f2b]), data, {}) assert data.calls == 2 mem_chunks0 = [("memorize_chunk", 0)] * data.CHUNKS mem_chunks1 = [("memorize_chunk", 1)] * data.CHUNKS expected = { f0: { "calls": [], "token": "f0", }, f1: { "calls": mem_chunks0 + [("memorize_finish", 0)], "token": "f1", }, f2a: { "calls": mem_chunks0 + [("memorize_finish", 0)] + mem_chunks1 + [("memorize_finish", 1)], "token": "f2a", }, f2b: { "calls": mem_chunks0 + [("memorize_finish", 0)] + mem_chunks1 + [("memorize_finish", 1)], "token": "f2b", }, } assert factor_states == expected def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action): num_column_counts = {} cat_sniffers = {} examine_needed = set(factors) for data in data_iter_maker(): for factor in list(examine_needed): value = factor.eval(factor_states[factor], data) if factor in cat_sniffers or guess_categorical(value): if factor not in cat_sniffers: cat_sniffers[factor] = CategoricalSniffer(NA_action, factor.origin) done = cat_sniffers[factor].sniff(value) if done: examine_needed.remove(factor) else: # Numeric value = atleast_2d_column_default(value) _max_allowed_dim(2, value, factor) column_count = value.shape[1] num_column_counts[factor] = column_count examine_needed.remove(factor) if not examine_needed: break # Pull out the levels cat_levels_contrasts = {} for factor, sniffer in six.iteritems(cat_sniffers): cat_levels_contrasts[factor] = sniffer.levels_contrast() return (num_column_counts, cat_levels_contrasts) def test__examine_factor_types(): from patsy.categorical import C class MockFactor(object): def __init__(self): # You should check this using 'is', not '==' from patsy.origin import Origin self.origin = Origin("MOCK", 1, 2) def eval(self, state, data): return state[data] def name(self): return "MOCK MOCK" # This hacky class can only be iterated over once, but it keeps track of # how far it got. class DataIterMaker(object): def __init__(self): self.i = -1 def __call__(self): return self def __iter__(self): return self def next(self): self.i += 1 if self.i > 1: raise StopIteration return self.i __next__ = next num_1dim = MockFactor() num_1col = MockFactor() num_4col = MockFactor() categ_1col = MockFactor() bool_1col = MockFactor() string_1col = MockFactor() object_1col = MockFactor() object_levels = (object(), object(), object()) factor_states = { num_1dim: ([1, 2, 3], [4, 5, 6]), num_1col: ([[1], [2], [3]], [[4], [5], [6]]), num_4col: (np.zeros((3, 4)), np.ones((3, 4))), categ_1col: (C(["a", "b", "c"], levels=("a", "b", "c"), contrast="MOCK CONTRAST"), C(["c", "b", "a"], levels=("a", "b", "c"), contrast="MOCK CONTRAST")), bool_1col: ([True, True, False], [False, True, True]), # It has to read through all the data to see all the possible levels: string_1col: (["a", "a", "a"], ["c", "b", "a"]), object_1col: ([object_levels[0]] * 3, object_levels), } it = DataIterMaker() (num_column_counts, cat_levels_contrasts, ) = _examine_factor_types(factor_states.keys(), factor_states, it, NAAction()) assert it.i == 2 iterations = 0 assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4} assert cat_levels_contrasts == { categ_1col: (("a", "b", "c"), "MOCK CONTRAST"), bool_1col: ((False, True), None), string_1col: (("a", "b", "c"), None), object_1col: (tuple(sorted(object_levels, key=id)), None), } # Check that it doesn't read through all the data if that's not necessary: it = DataIterMaker() no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col] (num_column_counts, cat_levels_contrasts, ) = _examine_factor_types(no_read_necessary, factor_states, it, NAAction()) assert it.i == 0 assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4} assert cat_levels_contrasts == { categ_1col: (("a", "b", "c"), "MOCK CONTRAST"), bool_1col: ((False, True), None), } # Illegal inputs: bool_3col = MockFactor() num_3dim = MockFactor() # no such thing as a multi-dimensional Categorical # categ_3dim = MockFactor() string_3col = MockFactor() object_3col = MockFactor() illegal_factor_states = { num_3dim: (np.zeros((3, 3, 3)), np.ones((3, 3, 3))), string_3col: ([["a", "b", "c"]], [["b", "c", "a"]]), object_3col: ([[[object()]]], [[[object()]]]), } import pytest for illegal_factor in illegal_factor_states: it = DataIterMaker() try: _examine_factor_types([illegal_factor], illegal_factor_states, it, NAAction()) except PatsyError as e: assert e.origin is illegal_factor.origin else: assert False def _make_subterm_infos(terms, num_column_counts, cat_levels_contrasts): # Sort each term into a bucket based on the set of numeric factors it # contains: term_buckets = OrderedDict() bucket_ordering = [] for term in terms: num_factors = [] for factor in term.factors: if factor in num_column_counts: num_factors.append(factor) bucket = frozenset(num_factors) if bucket not in term_buckets: bucket_ordering.append(bucket) term_buckets.setdefault(bucket, []).append(term) # Special rule: if there is a no-numerics bucket, then it always comes # first: if frozenset() in term_buckets: bucket_ordering.remove(frozenset()) bucket_ordering.insert(0, frozenset()) term_to_subterm_infos = OrderedDict() new_term_order = [] # Then within each bucket, work out which sort of contrasts we want to use # for each term to avoid redundancy for bucket in bucket_ordering: bucket_terms = term_buckets[bucket] # Sort by degree of interaction bucket_terms.sort(key=lambda t: len(t.factors)) new_term_order += bucket_terms used_subterms = set() for term in bucket_terms: subterm_infos = [] factor_codings = pick_contrasts_for_term(term, num_column_counts, used_subterms) # Construct one SubtermInfo for each subterm for factor_coding in factor_codings: subterm_factors = [] contrast_matrices = {} subterm_columns = 1 # In order to preserve factor ordering information, the # coding_for_term just returns dicts, and we refer to # the original factors to figure out which are included in # each subterm, and in what order for factor in term.factors: # Numeric factors are included in every subterm if factor in num_column_counts: subterm_factors.append(factor) subterm_columns *= num_column_counts[factor] elif factor in factor_coding: subterm_factors.append(factor) levels, contrast = cat_levels_contrasts[factor] # This is where the default coding is set to # Treatment: coded = code_contrast_matrix(factor_coding[factor], levels, contrast, default=Treatment) contrast_matrices[factor] = coded subterm_columns *= coded.matrix.shape[1] subterm_infos.append(SubtermInfo(subterm_factors, contrast_matrices, subterm_columns)) term_to_subterm_infos[term] = subterm_infos assert new_term_order == list(term_to_subterm_infos) return term_to_subterm_infos def design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action="drop"): """Construct several :class:`DesignInfo` objects from termlists. This is one of Patsy's fundamental functions. This function and :func:`build_design_matrices` together form the API to the core formula interpretation machinery. :arg termlists: A list of termlists, where each termlist is a list of :class:`Term` objects which together specify a design matrix. :arg data_iter_maker: A zero-argument callable which returns an iterator over dict-like data objects. This must be a callable rather than a simple iterator because sufficiently complex formulas may require multiple passes over the data (e.g. if there are nested stateful transforms). :arg eval_env: Either a :class:`EvalEnvironment` which will be used to look up any variables referenced in `termlists` that cannot be found in `data_iter_maker`, or else a depth represented as an integer which will be passed to :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the context of the function calling :func:`design_matrix_builders` for lookups. If calling this function from a library, you probably want ``eval_env=1``, which means that variables should be resolved in *your* caller's namespace. :arg NA_action: An :class:`NAAction` object or string, used to determine what values count as 'missing' for purposes of determining the levels of categorical factors. :returns: A list of :class:`DesignInfo` objects, one for each termlist passed in. This function performs zero or more iterations over the data in order to sniff out any necessary information about factor types, set up stateful transforms, pick column names, etc. See :ref:`formulas` for details. .. versionadded:: 0.2.0 The ``NA_action`` argument. .. versionadded:: 0.4.0 The ``eval_env`` argument. """ # People upgrading from versions prior to 0.4.0 could potentially have # passed NA_action as the 3rd positional argument. Fortunately # EvalEnvironment.capture only accepts int and EvalEnvironment objects, # and we improved its error messages to make this clear. eval_env = EvalEnvironment.capture(eval_env, reference=1) if isinstance(NA_action, str): NA_action = NAAction(NA_action) all_factors = set() for termlist in termlists: for term in termlist: all_factors.update(term.factors) factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env) # Now all the factors have working eval methods, so we can evaluate them # on some data to find out what type of data they return. (num_column_counts, cat_levels_contrasts) = _examine_factor_types(all_factors, factor_states, data_iter_maker, NA_action) # Now we need the factor infos, which encapsulate the knowledge of # how to turn any given factor into a chunk of data: factor_infos = {} for factor in all_factors: if factor in num_column_counts: fi = FactorInfo(factor, "numerical", factor_states[factor], num_columns=num_column_counts[factor], categories=None) else: assert factor in cat_levels_contrasts categories = cat_levels_contrasts[factor][0] fi = FactorInfo(factor, "categorical", factor_states[factor], num_columns=None, categories=categories) factor_infos[factor] = fi # And now we can construct the DesignInfo for each termlist: design_infos = [] for termlist in termlists: term_to_subterm_infos = _make_subterm_infos(termlist, num_column_counts, cat_levels_contrasts) assert isinstance(term_to_subterm_infos, OrderedDict) assert frozenset(term_to_subterm_infos) == frozenset(termlist) this_design_factor_infos = {} for term in termlist: for factor in term.factors: this_design_factor_infos[factor] = factor_infos[factor] column_names = [] for subterms in six.itervalues(term_to_subterm_infos): for subterm in subterms: for column_name in _subterm_column_names_iter( factor_infos, subterm): column_names.append(column_name) design_infos.append(DesignInfo(column_names, factor_infos=this_design_factor_infos, term_codings=term_to_subterm_infos)) return design_infos def _build_design_matrix(design_info, factor_info_to_values, dtype): factor_to_values = {} need_reshape = False num_rows = None for factor_info, value in six.iteritems(factor_info_to_values): # It's possible that the same factor appears in multiple different # FactorInfo objects (e.g. if someone is simultaneously building two # DesignInfo objects that started out as part of different # formulas). Skip any factor_info that is not our expected # factor_info. if design_info.factor_infos.get(factor_info.factor) is not factor_info: continue factor_to_values[factor_info.factor] = value if num_rows is not None: assert num_rows == value.shape[0] else: num_rows = value.shape[0] if num_rows is None: # We have no dependence on the data -- e.g. an empty termlist, or # only an intercept term. num_rows = 1 need_reshape = True shape = (num_rows, len(design_info.column_names)) m = DesignMatrix(np.empty(shape, dtype=dtype), design_info) start_column = 0 for term, subterms in six.iteritems(design_info.term_codings): for subterm in subterms: end_column = start_column + subterm.num_columns m_slice = m[:, start_column:end_column] _build_subterm(subterm, design_info.factor_infos, factor_to_values, m_slice) start_column = end_column assert start_column == m.shape[1] return need_reshape, m class _CheckMatch(object): def __init__(self, name, eq_fn): self._name = name self._eq_fn = eq_fn self.value = None self._value_desc = None self._value_origin = None def check(self, seen_value, desc, origin): if self.value is None: self.value = seen_value self._value_desc = desc self._value_origin = origin else: if not self._eq_fn(self.value, seen_value): msg = ("%s mismatch between %s and %s" % (self._name, self._value_desc, desc)) if isinstance(self.value, int): msg += " (%r versus %r)" % (self.value, seen_value) # XX FIXME: this is a case where having discontiguous Origins # would be useful... raise PatsyError(msg, origin) def build_design_matrices(design_infos, data, NA_action="drop", return_type="matrix", dtype=np.dtype(float)): """Construct several design matrices from :class:`DesignMatrixBuilder` objects. This is one of Patsy's fundamental functions. This function and :func:`design_matrix_builders` together form the API to the core formula interpretation machinery. :arg design_infos: A list of :class:`DesignInfo` objects describing the design matrices to be built. :arg data: A dict-like object which will be used to look up data. :arg NA_action: What to do with rows that contain missing values. You can ``"drop"`` them, ``"raise"`` an error, or for customization, pass an :class:`NAAction` object. See :class:`NAAction` for details on what values count as 'missing' (and how to alter this). :arg return_type: Either ``"matrix"`` or ``"dataframe"``. See below. :arg dtype: The dtype of the returned matrix. Useful if you want to use single-precision or extended-precision. This function returns either a list of :class:`DesignMatrix` objects (for ``return_type="matrix"``) or a list of :class:`pandas.DataFrame` objects (for ``return_type="dataframe"``). In both cases, all returned design matrices will have ``.design_info`` attributes containing the appropriate :class:`DesignInfo` objects. Note that unlike :func:`design_matrix_builders`, this function takes only a simple data argument, not any kind of iterator. That's because this function doesn't need a global view of the data -- everything that depends on the whole data set is already encapsulated in the ``design_infos``. If you are incrementally processing a large data set, simply call this function for each chunk. Index handling: This function always checks for indexes in the following places: * If ``data`` is a :class:`pandas.DataFrame`, its ``.index`` attribute. * If any factors evaluate to a :class:`pandas.Series` or :class:`pandas.DataFrame`, then their ``.index`` attributes. If multiple indexes are found, they must be identical (same values in the same order). If no indexes are found, then a default index is generated using ``np.arange(num_rows)``. One way or another, we end up with a single index for all the data. If ``return_type="dataframe"``, then this index is used as the index of the returned DataFrame objects. Examining this index makes it possible to determine which rows were removed due to NAs. Determining the number of rows in design matrices: This is not as obvious as it might seem, because it's possible to have a formula like "~ 1" that doesn't depend on the data (it has no factors). For this formula, it's obvious what every row in the design matrix should look like (just the value ``1``); but, how many rows like this should there be? To determine the number of rows in a design matrix, this function always checks in the following places: * If ``data`` is a :class:`pandas.DataFrame`, then its number of rows. * The number of entries in any factors present in any of the design * matrices being built. All these values much match. In particular, if this function is called to generate multiple design matrices at once, then they must all have the same number of rows. .. versionadded:: 0.2.0 The ``NA_action`` argument. """ if isinstance(NA_action, str): NA_action = NAAction(NA_action) if return_type == "dataframe" and not have_pandas: raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed") if return_type not in ("matrix", "dataframe"): raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type,)) # Evaluate factors factor_info_to_values = {} factor_info_to_isNAs = {} rows_checker = _CheckMatch("Number of rows", lambda a, b: a == b) index_checker = _CheckMatch("Index", lambda a, b: a.equals(b)) if have_pandas and isinstance(data, pandas.DataFrame): index_checker.check(data.index, "data.index", None) rows_checker.check(data.shape[0], "data argument", None) for design_info in design_infos: # We look at evaluators rather than factors here, because it might # happen that we have the same factor twice, but with different # memorized state. for factor_info in six.itervalues(design_info.factor_infos): if factor_info not in factor_info_to_values: value, is_NA = _eval_factor(factor_info, data, NA_action) factor_info_to_isNAs[factor_info] = is_NA # value may now be a Series, DataFrame, or ndarray name = factor_info.factor.name() origin = factor_info.factor.origin rows_checker.check(value.shape[0], name, origin) if (have_pandas and isinstance(value, (pandas.Series, pandas.DataFrame))): index_checker.check(value.index, name, origin) # Strategy: we work with raw ndarrays for doing the actual # combining; DesignMatrixBuilder objects never sees pandas # objects. Then at the end, if a DataFrame was requested, we # convert. So every entry in this dict is either a 2-d array # of floats, or a 1-d array of integers (representing # categories). value = np.asarray(value) factor_info_to_values[factor_info] = value # Handle NAs values = list(factor_info_to_values.values()) is_NAs = list(factor_info_to_isNAs.values()) origins = [factor_info.factor.origin for factor_info in factor_info_to_values] pandas_index = index_checker.value num_rows = rows_checker.value # num_rows is None iff evaluator_to_values (and associated sets like # 'values') are empty, i.e., we have no actual evaluators involved # (formulas like "~ 1"). if return_type == "dataframe" and num_rows is not None: if pandas_index is None: pandas_index = np.arange(num_rows) values.append(pandas_index) is_NAs.append(np.zeros(len(pandas_index), dtype=bool)) origins.append(None) new_values = NA_action.handle_NA(values, is_NAs, origins) # NA_action may have changed the number of rows. if new_values: num_rows = new_values[0].shape[0] if return_type == "dataframe" and num_rows is not None: pandas_index = new_values.pop() factor_info_to_values = dict(zip(factor_info_to_values, new_values)) # Build factor values into matrices results = [] for design_info in design_infos: results.append(_build_design_matrix(design_info, factor_info_to_values, dtype)) matrices = [] for need_reshape, matrix in results: if need_reshape: # There is no data-dependence, at all -- a formula like "1 ~ 1". # In this case the builder just returns a single-row matrix, and # we have to broadcast it vertically to the appropriate size. If # we can figure out what that is... assert matrix.shape[0] == 1 if num_rows is not None: matrix = DesignMatrix(np.repeat(matrix, num_rows, axis=0), matrix.design_info) else: raise PatsyError( "No design matrix has any non-trivial factors, " "the data object is not a DataFrame. " "I can't tell how many rows the design matrix should " "have!" ) matrices.append(matrix) if return_type == "dataframe": assert have_pandas for i, matrix in enumerate(matrices): di = matrix.design_info matrices[i] = pandas.DataFrame(matrix, columns=di.column_names, index=pandas_index) matrices[i].design_info = di return matrices # It should be possible to do just the factors -> factor_infos stuff # alone, since that, well, makes logical sense to do. patsy-0.5.2/patsy/builtins.py000066400000000000000000000060731412400214200162150ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2013 Nathaniel Smith # See file LICENSE.txt for license information. # This module sets up the namespace of stuff that is available to formulas by # default. All formulas are interpreted in an environment that acts as if # from patsy.builtins import * # has been executed. (Of course, you can also execute this yourself if you # want to use these in your regular code for some reason.) __all__ = ["I", "Q"] from patsy.contrasts import ContrastMatrix, Treatment, Poly, Sum, Helmert, Diff __all__ += ["ContrastMatrix", "Treatment", "Poly", "Sum", "Helmert", "Diff"] from patsy.categorical import C __all__ += ["C"] from patsy.state import center, standardize, scale __all__ += ["center", "standardize", "scale"] from patsy.splines import bs __all__ += ["bs"] from patsy.mgcv_cubic_splines import cr, cc, te __all__ += ["cr", "cc", "te"] def I(x): """The identity function. Simply returns its input unchanged. Since Patsy's formula parser ignores anything inside a function call syntax, this is useful to 'hide' arithmetic operations from it. For instance:: y ~ x1 + x2 has ``x1`` and ``x2`` as two separate predictors. But in:: y ~ I(x1 + x2) we instead have a single predictor, defined to be the sum of ``x1`` and ``x2``.""" return x def test_I(): assert I(1) == 1 assert I(None) is None def Q(name): """A way to 'quote' variable names, especially ones that do not otherwise meet Python's variable name rules. If ``x`` is a variable, ``Q("x")`` returns the value of ``x``. (Note that ``Q`` takes the *string* ``"x"``, not the value of ``x`` itself.) This works even if instead of ``x``, we have a variable name that would not otherwise be legal in Python. For example, if you have a column of data named ``weight.in.kg``, then you can't write:: y ~ weight.in.kg because Python will try to find a variable named ``weight``, that has an attribute named ``in``, that has an attribute named ``kg``. (And worse yet, ``in`` is a reserved word, which makes this example doubly broken.) Instead, write:: y ~ Q("weight.in.kg") and all will be well. Note, though, that this requires embedding a Python string inside your formula, which may require some care with your quote marks. Some standard options include:: my_fit_function("y ~ Q('weight.in.kg')", ...) my_fit_function('y ~ Q("weight.in.kg")', ...) my_fit_function("y ~ Q(\\"weight.in.kg\\")", ...) Note also that ``Q`` is an ordinary Python function, which means that you can use it in more complex expressions. For example, this is a legal formula:: y ~ np.sqrt(Q("weight.in.kg")) """ from patsy.eval import EvalEnvironment env = EvalEnvironment.capture(1) try: return env.namespace[name] except KeyError: raise NameError("no data named %r found" % (name,)) def test_Q(): a = 1 assert Q("a") == 1 assert Q("Q") is Q import pytest pytest.raises(NameError, Q, "asdfsadfdsad") patsy-0.5.2/patsy/categorical.py000066400000000000000000000451221412400214200166370ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2013 Nathaniel Smith # See file LICENSE.txt for license information. __all__ = ["C", "guess_categorical", "CategoricalSniffer", "categorical_to_int"] # How we handle categorical data: the big picture # ----------------------------------------------- # # There is no Python/NumPy standard for how to represent categorical data. # There is no Python/NumPy standard for how to represent missing data. # # Together, these facts mean that when we receive some data object, we must be # able to heuristically infer what levels it has -- and this process must be # sensitive to the current missing data handling, because maybe 'None' is a # level and maybe it is missing data. # # We don't know how missing data is represented until we get into the actual # builder code, so anything which runs before this -- e.g., the 'C()' builtin # -- cannot actually do *anything* meaningful with the data. # # Therefore, C() simply takes some data and arguments, and boxes them all up # together into an object called (appropriately enough) _CategoricalBox. All # the actual work of handling the various different sorts of categorical data # (lists, string arrays, bool arrays, pandas.Categorical, etc.) happens inside # the builder code, and we just extend this so that it also accepts # _CategoricalBox objects as yet another categorical type. # # Originally this file contained a container type (called 'Categorical'), and # the various sniffing, conversion, etc., functions were written as methods on # that type. But we had to get rid of that type, so now this file just # provides a set of plain old functions which are used by patsy.build to # handle the different stages of categorical data munging. import numpy as np import six from patsy import PatsyError from patsy.util import (SortAnythingKey, safe_scalar_isnan, iterable, have_pandas, have_pandas_categorical, have_pandas_categorical_dtype, safe_is_pandas_categorical, pandas_Categorical_from_codes, pandas_Categorical_categories, pandas_Categorical_codes, safe_issubdtype, no_pickling, assert_no_pickling) if have_pandas: import pandas # Objects of this type will always be treated as categorical, with the # specified levels and contrast (if given). class _CategoricalBox(object): def __init__(self, data, contrast, levels): self.data = data self.contrast = contrast self.levels = levels __getstate__ = no_pickling def C(data, contrast=None, levels=None): """ Marks some `data` as being categorical, and specifies how to interpret it. This is used for three reasons: * To explicitly mark some data as categorical. For instance, integer data is by default treated as numerical. If you have data that is stored using an integer type, but where you want patsy to treat each different value as a different level of a categorical factor, you can wrap it in a call to `C` to accomplish this. E.g., compare:: dmatrix("a", {"a": [1, 2, 3]}) dmatrix("C(a)", {"a": [1, 2, 3]}) * To explicitly set the levels or override the default level ordering for categorical data, e.g.:: dmatrix("C(a, levels=["a2", "a1"])", balanced(a=2)) * To override the default coding scheme for categorical data. The `contrast` argument can be any of: * A :class:`ContrastMatrix` object * A simple 2d ndarray (which is treated the same as a ContrastMatrix object except that you can't specify column names) * An object with methods called `code_with_intercept` and `code_without_intercept`, like the built-in contrasts (:class:`Treatment`, :class:`Diff`, :class:`Poly`, etc.). See :ref:`categorical-coding` for more details. * A callable that returns one of the above. """ if isinstance(data, _CategoricalBox): if contrast is None: contrast = data.contrast if levels is None: levels = data.levels data = data.data return _CategoricalBox(data, contrast, levels) def test_C(): c1 = C("asdf") assert isinstance(c1, _CategoricalBox) assert c1.data == "asdf" assert c1.levels is None assert c1.contrast is None c2 = C("DATA", "CONTRAST", "LEVELS") assert c2.data == "DATA" assert c2.contrast == "CONTRAST" assert c2.levels == "LEVELS" c3 = C(c2, levels="NEW LEVELS") assert c3.data == "DATA" assert c3.contrast == "CONTRAST" assert c3.levels == "NEW LEVELS" c4 = C(c2, "NEW CONTRAST") assert c4.data == "DATA" assert c4.contrast == "NEW CONTRAST" assert c4.levels == "LEVELS" assert_no_pickling(c4) def guess_categorical(data): if safe_is_pandas_categorical(data): return True if isinstance(data, _CategoricalBox): return True data = np.asarray(data) if safe_issubdtype(data.dtype, np.number): return False return True def test_guess_categorical(): if have_pandas_categorical: c = pandas.Categorical([1, 2, 3]) assert guess_categorical(c) if have_pandas_categorical_dtype: assert guess_categorical(pandas.Series(c)) assert guess_categorical(C([1, 2, 3])) assert guess_categorical([True, False]) assert guess_categorical(["a", "b"]) assert guess_categorical(["a", "b", np.nan]) assert guess_categorical(["a", "b", None]) assert not guess_categorical([1, 2, 3]) assert not guess_categorical([1, 2, 3, np.nan]) assert not guess_categorical([1.0, 2.0, 3.0]) assert not guess_categorical([1.0, 2.0, 3.0, np.nan]) def _categorical_shape_fix(data): # helper function # data should not be a _CategoricalBox or pandas Categorical or anything # -- it should be an actual iterable of data, but which might have the # wrong shape. if hasattr(data, "ndim") and data.ndim > 1: raise PatsyError("categorical data cannot be >1-dimensional") # coerce scalars into 1d, which is consistent with what we do for numeric # factors. (See statsmodels/statsmodels#1881) if (not iterable(data) or isinstance(data, (six.text_type, six.binary_type))): data = [data] return data class CategoricalSniffer(object): def __init__(self, NA_action, origin=None): self._NA_action = NA_action self._origin = origin self._contrast = None self._levels = None self._level_set = set() def levels_contrast(self): if self._levels is None: levels = list(self._level_set) levels.sort(key=SortAnythingKey) self._levels = levels return tuple(self._levels), self._contrast def sniff(self, data): if hasattr(data, "contrast"): self._contrast = data.contrast # returns a bool: are we confident that we found all the levels? if isinstance(data, _CategoricalBox): if data.levels is not None: self._levels = tuple(data.levels) return True else: # unbox and fall through data = data.data if safe_is_pandas_categorical(data): # pandas.Categorical has its own NA detection, so don't try to # second-guess it. self._levels = tuple(pandas_Categorical_categories(data)) return True # fastpath to avoid doing an item-by-item iteration over boolean # arrays, as requested by #44 if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_): self._level_set = set([True, False]) return True data = _categorical_shape_fix(data) for value in data: if self._NA_action.is_categorical_NA(value): continue if value is True or value is False: self._level_set.update([True, False]) else: try: self._level_set.add(value) except TypeError: raise PatsyError("Error interpreting categorical data: " "all items must be hashable", self._origin) # If everything we've seen is boolean, assume that everything else # would be too. Otherwise we need to keep looking. return self._level_set == set([True, False]) __getstate__ = no_pickling def test_CategoricalSniffer(): from patsy.missing import NAAction def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None): sniffer = CategoricalSniffer(NAAction(NA_types=NA_types)) for data in datas: done = sniffer.sniff(data) if done: assert exp_finish_fast break else: assert not exp_finish_fast assert sniffer.levels_contrast() == (exp_levels, exp_contrast) if have_pandas_categorical: # We make sure to test with both boxed and unboxed pandas objects, # because we used to have a bug where boxed pandas objects would be # treated as categorical, but their levels would be lost... preps = [lambda x: x, C] if have_pandas_categorical_dtype: preps += [pandas.Series, lambda x: C(pandas.Series(x))] for prep in preps: t([], [prep(pandas.Categorical([1, 2, None]))], True, (1, 2)) # check order preservation t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))], True, ("a", "b")) t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))], True, ("b", "a")) # check that if someone sticks a .contrast field onto our object obj = prep(pandas.Categorical(["a", "b"])) obj.contrast = "CONTRAST" t([], [obj], True, ("a", "b"), "CONTRAST") t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3)) # check order preservation t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3)) t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1)) # do some actual sniffing with NAs in t(["None", "NaN"], [C([1, np.nan]), C([10, None])], False, (1, 10)) # But 'None' can be a type if we don't make it represent NA: sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"])) sniffer.sniff(C([1, np.nan, None])) # The level order here is different on py2 and py3 :-( Because there's no # consistent way to sort mixed-type values on both py2 and py3. Honestly # people probably shouldn't use this, but I don't know how to give a # sensible error. levels, _ = sniffer.levels_contrast() assert set(levels) == set([None, 1]) # bool special cases t(["None", "NaN"], [C([True, np.nan, None])], True, (False, True)) t([], [C([10, 20]), C([False]), C([30, 40])], False, (False, True, 10, 20, 30, 40)) # exercise the fast-path t([], [np.asarray([True, False]), ["foo"]], True, (False, True)) # check tuples too t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])], False, (("a", 1), ("b", 2), ("c", None))) # contrasts t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO") # no box t([], [[10, 30], [20]], False, (10, 20, 30)) t([], [["b", "a"], ["a"]], False, ("a", "b")) # 0d t([], ["b"], False, ("b",)) import pytest # unhashable level error: sniffer = CategoricalSniffer(NAAction()) pytest.raises(PatsyError, sniffer.sniff, [{}]) # >1d is illegal pytest.raises(PatsyError, sniffer.sniff, np.asarray([["b"]])) # returns either a 1d ndarray or a pandas.Series def categorical_to_int(data, levels, NA_action, origin=None): assert isinstance(levels, tuple) # In this function, missing values are always mapped to -1 if safe_is_pandas_categorical(data): data_levels_tuple = tuple(pandas_Categorical_categories(data)) if not data_levels_tuple == levels: raise PatsyError("mismatching levels: expected %r, got %r" % (levels, data_levels_tuple), origin) # pandas.Categorical also uses -1 to indicate NA, and we don't try to # second-guess its NA detection, so we can just pass it back. return pandas_Categorical_codes(data) if isinstance(data, _CategoricalBox): if data.levels is not None and tuple(data.levels) != levels: raise PatsyError("mismatching levels: expected %r, got %r" % (levels, tuple(data.levels)), origin) data = data.data data = _categorical_shape_fix(data) try: level_to_int = dict(zip(levels, range(len(levels)))) except TypeError: raise PatsyError("Error interpreting categorical data: " "all items must be hashable", origin) # fastpath to avoid doing an item-by-item iteration over boolean arrays, # as requested by #44 if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_): if level_to_int[False] == 0 and level_to_int[True] == 1: return data.astype(np.int_) out = np.empty(len(data), dtype=int) for i, value in enumerate(data): if NA_action.is_categorical_NA(value): out[i] = -1 else: try: out[i] = level_to_int[value] except KeyError: SHOW_LEVELS = 4 level_strs = [] if len(levels) <= SHOW_LEVELS: level_strs += [repr(level) for level in levels] else: level_strs += [repr(level) for level in levels[:SHOW_LEVELS//2]] level_strs.append("...") level_strs += [repr(level) for level in levels[-SHOW_LEVELS//2:]] level_str = "[%s]" % (", ".join(level_strs)) raise PatsyError("Error converting data to categorical: " "observation with value %r does not match " "any of the expected levels (expected: %s)" % (value, level_str), origin) except TypeError: raise PatsyError("Error converting data to categorical: " "encountered unhashable value %r" % (value,), origin) if have_pandas and isinstance(data, pandas.Series): out = pandas.Series(out, index=data.index) return out def test_categorical_to_int(): import pytest from patsy.missing import NAAction if have_pandas: s = pandas.Series(["a", "b", "c"], index=[10, 20, 30]) c_pandas = categorical_to_int(s, ("a", "b", "c"), NAAction()) assert np.all(c_pandas == [0, 1, 2]) assert np.all(c_pandas.index == [10, 20, 30]) # Input must be 1-dimensional pytest.raises(PatsyError, categorical_to_int, pandas.DataFrame({10: s}), ("a", "b", "c"), NAAction()) if have_pandas_categorical: constructors = [pandas_Categorical_from_codes] if have_pandas_categorical_dtype: def Series_from_codes(codes, categories): c = pandas_Categorical_from_codes(codes, categories) return pandas.Series(c) constructors.append(Series_from_codes) for con in constructors: cat = con([1, 0, -1], ("a", "b")) conv = categorical_to_int(cat, ("a", "b"), NAAction()) assert np.all(conv == [1, 0, -1]) # Trust pandas NA marking cat2 = con([1, 0, -1], ("a", "None")) conv2 = categorical_to_int(cat, ("a", "b"), NAAction(NA_types=["None"])) assert np.all(conv2 == [1, 0, -1]) # But levels must match pytest.raises(PatsyError, categorical_to_int, con([1, 0], ("a", "b")), ("a", "c"), NAAction()) pytest.raises(PatsyError, categorical_to_int, con([1, 0], ("a", "b")), ("b", "a"), NAAction()) def t(data, levels, expected, NA_action=NAAction()): got = categorical_to_int(data, levels, NA_action) assert np.array_equal(got, expected) t(["a", "b", "a"], ("a", "b"), [0, 1, 0]) t(np.asarray(["a", "b", "a"]), ("a", "b"), [0, 1, 0]) t(np.asarray(["a", "b", "a"], dtype=object), ("a", "b"), [0, 1, 0]) t([0, 1, 2], (1, 2, 0), [2, 0, 1]) t(np.asarray([0, 1, 2]), (1, 2, 0), [2, 0, 1]) t(np.asarray([0, 1, 2], dtype=float), (1, 2, 0), [2, 0, 1]) t(np.asarray([0, 1, 2], dtype=object), (1, 2, 0), [2, 0, 1]) t(["a", "b", "a"], ("a", "d", "z", "b"), [0, 3, 0]) t([("a", 1), ("b", 0), ("a", 1)], (("a", 1), ("b", 0)), [0, 1, 0]) pytest.raises(PatsyError, categorical_to_int, ["a", "b", "a"], ("a", "c"), NAAction()) t(C(["a", "b", "a"]), ("a", "b"), [0, 1, 0]) t(C(["a", "b", "a"]), ("b", "a"), [1, 0, 1]) t(C(["a", "b", "a"], levels=["b", "a"]), ("b", "a"), [1, 0, 1]) # Mismatch between C() levels and expected levels pytest.raises(PatsyError, categorical_to_int, C(["a", "b", "a"], levels=["a", "b"]), ("b", "a"), NAAction()) # ndim == 0 is okay t("a", ("a", "b"), [0]) t("b", ("a", "b"), [1]) t(True, (False, True), [1]) # ndim == 2 is disallowed pytest.raises(PatsyError, categorical_to_int, np.asarray([["a", "b"], ["b", "a"]]), ("a", "b"), NAAction()) # levels must be hashable pytest.raises(PatsyError, categorical_to_int, ["a", "b"], ("a", "b", {}), NAAction()) pytest.raises(PatsyError, categorical_to_int, ["a", "b", {}], ("a", "b"), NAAction()) t(["b", None, np.nan, "a"], ("a", "b"), [1, -1, -1, 0], NAAction(NA_types=["None", "NaN"])) t(["b", None, np.nan, "a"], ("a", "b", None), [1, -1, -1, 0], NAAction(NA_types=["None", "NaN"])) t(["b", None, np.nan, "a"], ("a", "b", None), [1, 2, -1, 0], NAAction(NA_types=["NaN"])) # Smoke test for the branch that formats the ellipsized list of levels in # the error message: pytest.raises(PatsyError, categorical_to_int, ["a", "b", "q"], ("a", "b", "c", "d", "e", "f", "g", "h"), NAAction()) patsy-0.5.2/patsy/compat.py000066400000000000000000000037051412400214200156460ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012 Nathaniel Smith # See file LICENSE.txt for license information. # This file contains compatibility code for supporting old versions of Python # and numpy. (If we can concentrate it here, hopefully it'll make it easier to # get rid of weird hacks once we drop support for old versions). ##### Numpy import os # To force use of the compat code, set this env var to a non-empty value: optional_dep_ok = not os.environ.get("PATSY_AVOID_OPTIONAL_DEPENDENCIES") ##### Python standard library # The Python license requires that all derivative works contain a "brief # summary of the changes made to Python". Both for license compliance, and for # our own sanity, therefore, please add a note at the top of any snippets you # add here explaining their provenance, any changes made, and what versions of # Python require them: # OrderedDict is only available in Python 2.7+. compat_ordereddict.py has # comments at the top. import collections if optional_dep_ok and hasattr(collections, "OrderedDict"): from collections import OrderedDict else: from patsy.compat_ordereddict import OrderedDict # 'raise from' available in Python 3+ import sys from patsy import PatsyError def call_and_wrap_exc(msg, origin, f, *args, **kwargs): try: return f(*args, **kwargs) except Exception as e: if sys.version_info[0] >= 3: new_exc = PatsyError("%s: %s: %s" % (msg, e.__class__.__name__, e), origin) # Use 'exec' to hide this syntax from the Python 2 parser: exec("raise new_exc from e") else: # In python 2, we just let the original exception escape -- better # than destroying the traceback. But if it's a PatsyError, we can # at least set the origin properly. if isinstance(e, PatsyError): e.set_origin(origin) raise patsy-0.5.2/patsy/compat_ordereddict.py000066400000000000000000000217221412400214200202150ustar00rootroot00000000000000# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. # Passes Python2.7's test suite and incorporates all the latest updates. #Author: Raymond Hettinger #License: MIT License #http://code.activestate.com/recipes/576693/ revision 9, downloaded 2012-03-28 try: from thread import get_ident as _get_ident except ImportError: # Hacked by njs -- I don't have dummy_thread and py3 doesn't have thread, # so the import fails when nosetests3 tries to load this file. #from dummy_thread import get_ident as _get_ident def _get_ident(): return "" try: from _abcoll import KeysView, ValuesView, ItemsView except ImportError: pass class OrderedDict(dict): # pragma: no cover 'Dictionary that remembers insertion order' # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. # Big-O running times for all methods are the same as for regular dictionaries. # The internal self.__map dictionary maps keys to links in a doubly linked list. # The circular doubly linked list starts and ends with a sentinel element. # The sentinel element never gets deleted (this simplifies the algorithm). # Each link is stored as a list of length three: [PREV, NEXT, KEY]. def __init__(self, *args, **kwds): '''Initialize an ordered dictionary. Signature is the same as for regular dictionaries, but keyword arguments are not recommended because their insertion order is arbitrary. ''' if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) try: self.__root except AttributeError: self.__root = root = [] # sentinel node root[:] = [root, root, None] self.__map = {} self.__update(*args, **kwds) def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 'od.__setitem__(i, y) <==> od[i]=y' # Setting a new item creates a new link which goes at the end of the linked # list, and the inherited dictionary is updated with the new key/value pair. if key not in self: root = self.__root last = root[0] last[1] = root[0] = self.__map[key] = [last, root, key] dict_setitem(self, key, value) def __delitem__(self, key, dict_delitem=dict.__delitem__): 'od.__delitem__(y) <==> del od[y]' # Deleting an existing item uses self.__map to find the link which is # then removed by updating the links in the predecessor and successor nodes. dict_delitem(self, key) link_prev, link_next, key = self.__map.pop(key) link_prev[1] = link_next link_next[0] = link_prev def __iter__(self): 'od.__iter__() <==> iter(od)' root = self.__root curr = root[1] while curr is not root: yield curr[2] curr = curr[1] def __reversed__(self): 'od.__reversed__() <==> reversed(od)' root = self.__root curr = root[0] while curr is not root: yield curr[2] curr = curr[0] def clear(self): 'od.clear() -> None. Remove all items from od.' try: for node in self.__map.itervalues(): del node[:] root = self.__root root[:] = [root, root, None] self.__map.clear() except AttributeError: pass dict.clear(self) def popitem(self, last=True): '''od.popitem() -> (k, v), return and remove a (key, value) pair. Pairs are returned in LIFO order if last is true or FIFO order if false. ''' if not self: raise KeyError('dictionary is empty') root = self.__root if last: link = root[0] link_prev = link[0] link_prev[1] = root root[0] = link_prev else: link = root[1] link_next = link[1] root[1] = link_next link_next[0] = root key = link[2] del self.__map[key] value = dict.pop(self, key) return key, value # -- the following methods do not depend on the internal structure -- def keys(self): 'od.keys() -> list of keys in od' return list(self) def values(self): 'od.values() -> list of values in od' return [self[key] for key in self] def items(self): 'od.items() -> list of (key, value) pairs in od' return [(key, self[key]) for key in self] def iterkeys(self): 'od.iterkeys() -> an iterator over the keys in od' return iter(self) def itervalues(self): 'od.itervalues -> an iterator over the values in od' for k in self: yield self[k] def iteritems(self): 'od.iteritems -> an iterator over the (key, value) items in od' for k in self: yield (k, self[k]) def update(*args, **kwds): '''od.update(E, **F) -> None. Update od from dict/iterable E and F. If E is a dict instance, does: for k in E: od[k] = E[k] If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] Or if E is an iterable of items, does: for k, v in E: od[k] = v In either case, this is followed by: for k, v in F.items(): od[k] = v ''' if len(args) > 2: raise TypeError('update() takes at most 2 positional ' 'arguments (%d given)' % (len(args),)) elif not args: raise TypeError('update() takes at least 1 argument (0 given)') self = args[0] # Make progressively weaker assumptions about "other" other = () if len(args) == 2: other = args[1] if isinstance(other, dict): for key in other: self[key] = other[key] elif hasattr(other, 'keys'): for key in other.keys(): self[key] = other[key] else: for key, value in other: self[key] = value for key, value in kwds.items(): self[key] = value __update = update # let subclasses override update without breaking __init__ __marker = object() def pop(self, key, default=__marker): '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. ''' if key in self: result = self[key] del self[key] return result if default is self.__marker: raise KeyError(key) return default def setdefault(self, key, default=None): 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' if key in self: return self[key] self[key] = default return default def __repr__(self, _repr_running={}): 'od.__repr__() <==> repr(od)' call_key = id(self), _get_ident() if call_key in _repr_running: return '...' _repr_running[call_key] = 1 try: if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, self.items()) finally: del _repr_running[call_key] def __reduce__(self): 'Return state information for pickling' items = [[k, self[k]] for k in self] inst_dict = vars(self).copy() for k in vars(OrderedDict()): inst_dict.pop(k, None) if inst_dict: return (self.__class__, (items,), inst_dict) return self.__class__, (items,) def copy(self): 'od.copy() -> a shallow copy of od' return self.__class__(self) @classmethod def fromkeys(cls, iterable, value=None): '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and values equal to v (which defaults to None). ''' d = cls() for key in iterable: d[key] = value return d def __eq__(self, other): '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive while comparison to a regular mapping is order-insensitive. ''' if isinstance(other, OrderedDict): return len(self)==len(other) and self.items() == other.items() return dict.__eq__(self, other) def __ne__(self, other): return not self == other # -- the following methods are only used in Python 2.7 -- def viewkeys(self): "od.viewkeys() -> a set-like object providing a view on od's keys" return KeysView(self) def viewvalues(self): "od.viewvalues() -> an object providing a view on od's values" return ValuesView(self) def viewitems(self): "od.viewitems() -> a set-like object providing a view on od's items" return ItemsView(self) patsy-0.5.2/patsy/constraint.py000066400000000000000000000474701412400214200165560ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2012 Nathaniel Smith # See file LICENSE.txt for license information. # Interpreting linear constraints like "2*x1 + x2 = 0" from __future__ import print_function # These are made available in the patsy.* namespace __all__ = ["LinearConstraint"] import re try: from collections.abc import Mapping except ImportError: from collections import Mapping import six import numpy as np from patsy import PatsyError from patsy.origin import Origin from patsy.util import (atleast_2d_column_default, repr_pretty_delegate, repr_pretty_impl, no_pickling, assert_no_pickling) from patsy.infix_parser import Token, Operator, infix_parse from patsy.parse_formula import _parsing_error_test class LinearConstraint(object): """A linear constraint in matrix form. This object represents a linear constraint of the form `Ax = b`. Usually you won't be constructing these by hand, but instead get them as the return value from :meth:`DesignInfo.linear_constraint`. .. attribute:: coefs A 2-dimensional ndarray with float dtype, representing `A`. .. attribute:: constants A 2-dimensional single-column ndarray with float dtype, representing `b`. .. attribute:: variable_names A list of strings giving the names of the variables being constrained. (Used only for consistency checking.) """ def __init__(self, variable_names, coefs, constants=None): self.variable_names = list(variable_names) self.coefs = np.atleast_2d(np.asarray(coefs, dtype=float)) if constants is None: constants = np.zeros(self.coefs.shape[0], dtype=float) constants = np.asarray(constants, dtype=float) self.constants = atleast_2d_column_default(constants) if self.constants.ndim != 2 or self.constants.shape[1] != 1: raise ValueError("constants is not (convertible to) a column matrix") if self.coefs.ndim != 2 or self.coefs.shape[1] != len(variable_names): raise ValueError("wrong shape for coefs") if self.coefs.shape[0] == 0: raise ValueError("must have at least one row in constraint matrix") if self.coefs.shape[0] != self.constants.shape[0]: raise ValueError("shape mismatch between coefs and constants") __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle return repr_pretty_impl(p, self, [self.variable_names, self.coefs, self.constants]) __getstate__ = no_pickling @classmethod def combine(cls, constraints): """Create a new LinearConstraint by ANDing together several existing LinearConstraints. :arg constraints: An iterable of LinearConstraint objects. Their :attr:`variable_names` attributes must all match. :returns: A new LinearConstraint object. """ if not constraints: raise ValueError("no constraints specified") variable_names = constraints[0].variable_names for constraint in constraints: if constraint.variable_names != variable_names: raise ValueError("variable names don't match") coefs = np.row_stack([c.coefs for c in constraints]) constants = np.row_stack([c.constants for c in constraints]) return cls(variable_names, coefs, constants) def test_LinearConstraint(): try: from numpy.testing import assert_equal except ImportError: from numpy.testing.utils import assert_equal lc = LinearConstraint(["foo", "bar"], [1, 1]) assert lc.variable_names == ["foo", "bar"] assert_equal(lc.coefs, [[1, 1]]) assert_equal(lc.constants, [[0]]) lc = LinearConstraint(["foo", "bar"], [[1, 1], [2, 3]], [10, 20]) assert_equal(lc.coefs, [[1, 1], [2, 3]]) assert_equal(lc.constants, [[10], [20]]) assert lc.coefs.dtype == np.dtype(float) assert lc.constants.dtype == np.dtype(float) # statsmodels wants to be able to create degenerate constraints like this, # see: # https://github.com/pydata/patsy/issues/89 # We used to forbid it, but I guess it's harmless, so why not. lc = LinearConstraint(["a"], [[0]]) assert_equal(lc.coefs, [[0]]) import pytest pytest.raises(ValueError, LinearConstraint, ["a"], [[1, 2]]) pytest.raises(ValueError, LinearConstraint, ["a"], [[[1]]]) pytest.raises(ValueError, LinearConstraint, ["a"], [[1, 2]], [3, 4]) pytest.raises(ValueError, LinearConstraint, ["a", "b"], [[1, 2]], [3, 4]) pytest.raises(ValueError, LinearConstraint, ["a"], [[1]], [[]]) pytest.raises(ValueError, LinearConstraint, ["a", "b"], []) pytest.raises(ValueError, LinearConstraint, ["a", "b"], np.zeros((0, 2))) assert_no_pickling(lc) def test_LinearConstraint_combine(): comb = LinearConstraint.combine([LinearConstraint(["a", "b"], [1, 0]), LinearConstraint(["a", "b"], [0, 1], [1])]) assert comb.variable_names == ["a", "b"] try: from numpy.testing import assert_equal except ImportError: from numpy.testing.utils import assert_equal assert_equal(comb.coefs, [[1, 0], [0, 1]]) assert_equal(comb.constants, [[0], [1]]) import pytest pytest.raises(ValueError, LinearConstraint.combine, []) pytest.raises(ValueError, LinearConstraint.combine, [LinearConstraint(["a"], [1]), LinearConstraint(["b"], [1])]) _ops = [ Operator(",", 2, -100), Operator("=", 2, 0), Operator("+", 1, 100), Operator("-", 1, 100), Operator("+", 2, 100), Operator("-", 2, 100), Operator("*", 2, 200), Operator("/", 2, 200), ] _atomic = ["NUMBER", "VARIABLE"] def _token_maker(type, string): def make_token(scanner, token_string): if type == "__OP__": actual_type = token_string else: actual_type = type return Token(actual_type, Origin(string, *scanner.match.span()), token_string) return make_token def _tokenize_constraint(string, variable_names): lparen_re = r"\(" rparen_re = r"\)" op_re = "|".join([re.escape(op.token_type) for op in _ops]) num_re = r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?" whitespace_re = r"\s+" # Prefer long matches: variable_names = sorted(variable_names, key=len, reverse=True) variable_re = "|".join([re.escape(n) for n in variable_names]) lexicon = [ (lparen_re, _token_maker(Token.LPAREN, string)), (rparen_re, _token_maker(Token.RPAREN, string)), (op_re, _token_maker("__OP__", string)), (variable_re, _token_maker("VARIABLE", string)), (num_re, _token_maker("NUMBER", string)), (whitespace_re, None), ] scanner = re.Scanner(lexicon) tokens, leftover = scanner.scan(string) if leftover: offset = len(string) - len(leftover) raise PatsyError("unrecognized token in constraint", Origin(string, offset, offset + 1)) return tokens def test__tokenize_constraint(): code = "2 * (a + b) = q" tokens = _tokenize_constraint(code, ["a", "b", "q"]) expecteds = [("NUMBER", 0, 1, "2"), ("*", 2, 3, "*"), (Token.LPAREN, 4, 5, "("), ("VARIABLE", 5, 6, "a"), ("+", 7, 8, "+"), ("VARIABLE", 9, 10, "b"), (Token.RPAREN, 10, 11, ")"), ("=", 12, 13, "="), ("VARIABLE", 14, 15, "q")] for got, expected in zip(tokens, expecteds): assert isinstance(got, Token) assert got.type == expected[0] assert got.origin == Origin(code, expected[1], expected[2]) assert got.extra == expected[3] import pytest pytest.raises(PatsyError, _tokenize_constraint, "1 + @b", ["b"]) # Shouldn't raise an error: _tokenize_constraint("1 + @b", ["@b"]) # Check we aren't confused by names which are proper prefixes of other # names: for names in (["a", "aa"], ["aa", "a"]): tokens = _tokenize_constraint("a aa a", names) assert len(tokens) == 3 assert [t.extra for t in tokens] == ["a", "aa", "a"] # Check that embedding ops and numbers inside a variable name works tokens = _tokenize_constraint("2 * a[1,1],", ["a[1,1]"]) assert len(tokens) == 4 assert [t.type for t in tokens] == ["NUMBER", "*", "VARIABLE", ","] assert [t.extra for t in tokens] == ["2", "*", "a[1,1]", ","] def parse_constraint(string, variable_names): return infix_parse(_tokenize_constraint(string, variable_names), _ops, _atomic) class _EvalConstraint(object): def __init__(self, variable_names): self._variable_names = variable_names self._N = len(variable_names) self._dispatch = { ("VARIABLE", 0): self._eval_variable, ("NUMBER", 0): self._eval_number, ("+", 1): self._eval_unary_plus, ("-", 1): self._eval_unary_minus, ("+", 2): self._eval_binary_plus, ("-", 2): self._eval_binary_minus, ("*", 2): self._eval_binary_multiply, ("/", 2): self._eval_binary_div, ("=", 2): self._eval_binary_eq, (",", 2): self._eval_binary_comma, } # General scheme: there are 2 types we deal with: # - linear combinations ("lincomb"s) of variables and constants, # represented as ndarrays with size N+1 # The last entry is the constant, so [10, 20, 30] means 10x + 20y + # 30. # - LinearConstraint objects def is_constant(self, coefs): return np.all(coefs[:self._N] == 0) def _eval_variable(self, tree): var = tree.token.extra coefs = np.zeros((self._N + 1,), dtype=float) coefs[self._variable_names.index(var)] = 1 return coefs def _eval_number(self, tree): coefs = np.zeros((self._N + 1,), dtype=float) coefs[-1] = float(tree.token.extra) return coefs def _eval_unary_plus(self, tree): return self.eval(tree.args[0]) def _eval_unary_minus(self, tree): return -1 * self.eval(tree.args[0]) def _eval_binary_plus(self, tree): return self.eval(tree.args[0]) + self.eval(tree.args[1]) def _eval_binary_minus(self, tree): return self.eval(tree.args[0]) - self.eval(tree.args[1]) def _eval_binary_div(self, tree): left = self.eval(tree.args[0]) right = self.eval(tree.args[1]) if not self.is_constant(right): raise PatsyError("Can't divide by a variable in a linear " "constraint", tree.args[1]) return left / right[-1] def _eval_binary_multiply(self, tree): left = self.eval(tree.args[0]) right = self.eval(tree.args[1]) if self.is_constant(left): return left[-1] * right elif self.is_constant(right): return left * right[-1] else: raise PatsyError("Can't multiply one variable by another " "in a linear constraint", tree) def _eval_binary_eq(self, tree): # Handle "a1 = a2 = a3", which is parsed as "(a1 = a2) = a3" args = list(tree.args) constraints = [] for i, arg in enumerate(args): if arg.type == "=": constraints.append(self.eval(arg, constraint=True)) # make our left argument be their right argument, or # vice-versa args[i] = arg.args[1 - i] left = self.eval(args[0]) right = self.eval(args[1]) coefs = left[:self._N] - right[:self._N] if np.all(coefs == 0): raise PatsyError("no variables appear in constraint", tree) constant = -left[-1] + right[-1] constraint = LinearConstraint(self._variable_names, coefs, constant) constraints.append(constraint) return LinearConstraint.combine(constraints) def _eval_binary_comma(self, tree): left = self.eval(tree.args[0], constraint=True) right = self.eval(tree.args[1], constraint=True) return LinearConstraint.combine([left, right]) def eval(self, tree, constraint=False): key = (tree.type, len(tree.args)) assert key in self._dispatch val = self._dispatch[key](tree) if constraint: # Force it to be a constraint if isinstance(val, LinearConstraint): return val else: assert val.size == self._N + 1 if np.all(val[:self._N] == 0): raise PatsyError("term is constant, with no variables", tree) return LinearConstraint(self._variable_names, val[:self._N], -val[-1]) else: # Force it to *not* be a constraint if isinstance(val, LinearConstraint): raise PatsyError("unexpected constraint object", tree) return val def linear_constraint(constraint_like, variable_names): """This is the internal interface implementing DesignInfo.linear_constraint, see there for docs.""" if isinstance(constraint_like, LinearConstraint): if constraint_like.variable_names != variable_names: raise ValueError("LinearConstraint has wrong variable_names " "(got %r, expected %r)" % (constraint_like.variable_names, variable_names)) return constraint_like if isinstance(constraint_like, Mapping): # Simple conjunction-of-equality constraints can be specified as # dicts. {"x": 1, "y": 2} -> tests x = 1 and y = 2. Keys can be # either variable names, or variable indices. coefs = np.zeros((len(constraint_like), len(variable_names)), dtype=float) constants = np.zeros(len(constraint_like)) used = set() for i, (name, value) in enumerate(six.iteritems(constraint_like)): if name in variable_names: idx = variable_names.index(name) elif isinstance(name, six.integer_types): idx = name else: raise ValueError("unrecognized variable name/index %r" % (name,)) if idx in used: raise ValueError("duplicated constraint on %r" % (variable_names[idx],)) used.add(idx) coefs[i, idx] = 1 constants[i] = value return LinearConstraint(variable_names, coefs, constants) if isinstance(constraint_like, str): constraint_like = [constraint_like] # fall-through if (isinstance(constraint_like, list) and constraint_like and isinstance(constraint_like[0], str)): constraints = [] for code in constraint_like: if not isinstance(code, str): raise ValueError("expected a string, not %r" % (code,)) tree = parse_constraint(code, variable_names) evaluator = _EvalConstraint(variable_names) constraints.append(evaluator.eval(tree, constraint=True)) return LinearConstraint.combine(constraints) if isinstance(constraint_like, tuple): if len(constraint_like) != 2: raise ValueError("constraint tuple must have length 2") coef, constants = constraint_like return LinearConstraint(variable_names, coef, constants) # assume a raw ndarray coefs = np.asarray(constraint_like, dtype=float) return LinearConstraint(variable_names, coefs) def _check_lincon(input, varnames, coefs, constants): try: from numpy.testing import assert_equal except ImportError: from numpy.testing.utils import assert_equal got = linear_constraint(input, varnames) print("got", got) expected = LinearConstraint(varnames, coefs, constants) print("expected", expected) assert_equal(got.variable_names, expected.variable_names) assert_equal(got.coefs, expected.coefs) assert_equal(got.constants, expected.constants) assert_equal(got.coefs.dtype, np.dtype(float)) assert_equal(got.constants.dtype, np.dtype(float)) def test_linear_constraint(): import pytest from patsy.compat import OrderedDict t = _check_lincon t(LinearConstraint(["a", "b"], [2, 3]), ["a", "b"], [[2, 3]], [[0]]) pytest.raises(ValueError, linear_constraint, LinearConstraint(["b", "a"], [2, 3]), ["a", "b"]) t({"a": 2}, ["a", "b"], [[1, 0]], [[2]]) t(OrderedDict([("a", 2), ("b", 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]]) t(OrderedDict([("a", 2), ("b", 3)]), ["b", "a"], [[0, 1], [1, 0]], [[2], [3]]) t({0: 2}, ["a", "b"], [[1, 0]], [[2]]) t(OrderedDict([(0, 2), (1, 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]]) t(OrderedDict([("a", 2), (1, 3)]), ["a", "b"], [[1, 0], [0, 1]], [[2], [3]]) pytest.raises(ValueError, linear_constraint, {"q": 1}, ["a", "b"]) pytest.raises(ValueError, linear_constraint, {"a": 1, 0: 2}, ["a", "b"]) t(np.array([2, 3]), ["a", "b"], [[2, 3]], [[0]]) t(np.array([[2, 3], [4, 5]]), ["a", "b"], [[2, 3], [4, 5]], [[0], [0]]) t("a = 2", ["a", "b"], [[1, 0]], [[2]]) t("a - 2", ["a", "b"], [[1, 0]], [[2]]) t("a + 1 = 3", ["a", "b"], [[1, 0]], [[2]]) t("a + b = 3", ["a", "b"], [[1, 1]], [[3]]) t("a = 2, b = 3", ["a", "b"], [[1, 0], [0, 1]], [[2], [3]]) t("b = 3, a = 2", ["a", "b"], [[0, 1], [1, 0]], [[3], [2]]) t(["a = 2", "b = 3"], ["a", "b"], [[1, 0], [0, 1]], [[2], [3]]) pytest.raises(ValueError, linear_constraint, ["a", {"b": 0}], ["a", "b"]) # Actual evaluator tests t("2 * (a + b/3) + b + 2*3/4 = 1 + 2*3", ["a", "b"], [[2, 2.0/3 + 1]], [[7 - 6.0/4]]) t("+2 * -a", ["a", "b"], [[-2, 0]], [[0]]) t("a - b, a + b = 2", ["a", "b"], [[1, -1], [1, 1]], [[0], [2]]) t("a = 1, a = 2, a = 3", ["a", "b"], [[1, 0], [1, 0], [1, 0]], [[1], [2], [3]]) t("a * 2", ["a", "b"], [[2, 0]], [[0]]) t("-a = 1", ["a", "b"], [[-1, 0]], [[1]]) t("(2 + a - a) * b", ["a", "b"], [[0, 2]], [[0]]) t("a = 1 = b", ["a", "b"], [[1, 0], [0, -1]], [[1], [-1]]) t("a = (1 = b)", ["a", "b"], [[0, -1], [1, 0]], [[-1], [1]]) t("a = 1, a = b = c", ["a", "b", "c"], [[1, 0, 0], [1, -1, 0], [0, 1, -1]], [[1], [0], [0]]) # One should never do this of course, but test that it works anyway... t("a + 1 = 2", ["a", "a + 1"], [[0, 1]], [[2]]) t(([10, 20], [30]), ["a", "b"], [[10, 20]], [[30]]) t(([[10, 20], [20, 40]], [[30], [35]]), ["a", "b"], [[10, 20], [20, 40]], [[30], [35]]) # wrong-length tuple pytest.raises(ValueError, linear_constraint, ([1, 0], [0], [0]), ["a", "b"]) pytest.raises(ValueError, linear_constraint, ([1, 0],), ["a", "b"]) t([10, 20], ["a", "b"], [[10, 20]], [[0]]) t([[10, 20], [20, 40]], ["a", "b"], [[10, 20], [20, 40]], [[0], [0]]) t(np.array([10, 20]), ["a", "b"], [[10, 20]], [[0]]) t(np.array([[10, 20], [20, 40]]), ["a", "b"], [[10, 20], [20, 40]], [[0], [0]]) # unknown object type pytest.raises(ValueError, linear_constraint, None, ["a", "b"]) _parse_eval_error_tests = [ # Bad token "a + oo", # No pure constant equalities "a = 1, <1 = 1>, b = 1", "a = 1, ", "a = 1, <1>, b = 2", "a = 1, <2 * b = b + b>, c", # No non-linearities "a + + c", "a + 2 / + c", # Constraints are not numbers "a = 1, 2 * <(a = b)>, c", "a = 1, a + <(a = b)>, c", "a = 1, <(a, b)> + 2, c", ] def test_eval_errors(): def doit(bad_code): return linear_constraint(bad_code, ["a", "b", "c"]) _parsing_error_test(doit, _parse_eval_error_tests) patsy-0.5.2/patsy/contrasts.py000066400000000000000000000571411412400214200164060ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2012 Nathaniel Smith # See file LICENSE.txt for license information. # http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm # http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm from __future__ import print_function # These are made available in the patsy.* namespace __all__ = ["ContrastMatrix", "Treatment", "Poly", "Sum", "Helmert", "Diff"] import sys import six import numpy as np from patsy import PatsyError from patsy.util import (repr_pretty_delegate, repr_pretty_impl, safe_issubdtype, no_pickling, assert_no_pickling) class ContrastMatrix(object): """A simple container for a matrix used for coding categorical factors. Attributes: .. attribute:: matrix A 2d ndarray, where each column corresponds to one column of the resulting design matrix, and each row contains the entries for a single categorical variable level. Usually n-by-n for a full rank coding or n-by-(n-1) for a reduced rank coding, though other options are possible. .. attribute:: column_suffixes A list of strings to be appended to the factor name, to produce the final column names. E.g. for treatment coding the entries will look like ``"[T.level1]"``. """ def __init__(self, matrix, column_suffixes): self.matrix = np.asarray(matrix) self.column_suffixes = column_suffixes if self.matrix.shape[1] != len(column_suffixes): raise PatsyError("matrix and column_suffixes don't conform") __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): repr_pretty_impl(p, self, [self.matrix, self.column_suffixes]) __getstate__ = no_pickling def test_ContrastMatrix(): cm = ContrastMatrix([[1, 0], [0, 1]], ["a", "b"]) assert np.array_equal(cm.matrix, np.eye(2)) assert cm.column_suffixes == ["a", "b"] # smoke test repr(cm) import pytest pytest.raises(PatsyError, ContrastMatrix, [[1], [0]], ["a", "b"]) assert_no_pickling(cm) # This always produces an object of the type that Python calls 'str' (whether # that be a Python 2 string-of-bytes or a Python 3 string-of-unicode). It does # *not* make any particular guarantees about being reversible or having other # such useful programmatic properties -- it just produces something that will # be nice for users to look at. def _obj_to_readable_str(obj): if isinstance(obj, str): return obj elif sys.version_info >= (3,) and isinstance(obj, bytes): try: return obj.decode("utf-8") except UnicodeDecodeError: return repr(obj) elif sys.version_info < (3,) and isinstance(obj, unicode): try: return obj.encode("ascii") except UnicodeEncodeError: return repr(obj) else: return repr(obj) def test__obj_to_readable_str(): def t(obj, expected): got = _obj_to_readable_str(obj) assert type(got) is str assert got == expected t(1, "1") t(1.0, "1.0") t("asdf", "asdf") t(six.u("asdf"), "asdf") if sys.version_info >= (3,): # we can use "foo".encode here b/c this is python 3! # a utf-8 encoded euro-sign comes out as a real euro sign. t("\u20ac".encode("utf-8"), six.u("\u20ac")) # but a iso-8859-15 euro sign can't be decoded, and we fall back on # repr() t("\u20ac".encode("iso-8859-15"), "b'\\xa4'") else: t(six.u("\u20ac"), "u'\\u20ac'") def _name_levels(prefix, levels): return ["[%s%s]" % (prefix, _obj_to_readable_str(level)) for level in levels] def test__name_levels(): assert _name_levels("a", ["b", "c"]) == ["[ab]", "[ac]"] def _dummy_code(levels): return ContrastMatrix(np.eye(len(levels)), _name_levels("", levels)) def _get_level(levels, level_ref): if level_ref in levels: return levels.index(level_ref) if isinstance(level_ref, six.integer_types): if level_ref < 0: level_ref += len(levels) if not (0 <= level_ref < len(levels)): raise PatsyError("specified level %r is out of range" % (level_ref,)) return level_ref raise PatsyError("specified level %r not found" % (level_ref,)) def test__get_level(): assert _get_level(["a", "b", "c"], 0) == 0 assert _get_level(["a", "b", "c"], -1) == 2 assert _get_level(["a", "b", "c"], "b") == 1 # For integer levels, we check identity before treating it as an index assert _get_level([2, 1, 0], 0) == 2 import pytest pytest.raises(PatsyError, _get_level, ["a", "b"], 2) pytest.raises(PatsyError, _get_level, ["a", "b"], -3) pytest.raises(PatsyError, _get_level, ["a", "b"], "c") if not six.PY3: assert _get_level(["a", "b", "c"], long(0)) == 0 assert _get_level(["a", "b", "c"], long(-1)) == 2 assert _get_level([2, 1, 0], long(0)) == 2 class Treatment(object): """Treatment coding (also known as dummy coding). This is the default coding. For reduced-rank coding, one level is chosen as the "reference", and its mean behaviour is represented by the intercept. Each column of the resulting matrix represents the difference between the mean of one level and this reference level. For full-rank coding, classic "dummy" coding is used, and each column of the resulting matrix represents the mean of the corresponding level. The reference level defaults to the first level, or can be specified explicitly. .. ipython:: python # reduced rank dmatrix("C(a, Treatment)", balanced(a=3)) # full rank dmatrix("0 + C(a, Treatment)", balanced(a=3)) # Setting a reference level dmatrix("C(a, Treatment(1))", balanced(a=3)) dmatrix("C(a, Treatment('a2'))", balanced(a=3)) Equivalent to R ``contr.treatment``. The R documentation suggests that using ``Treatment(reference=-1)`` will produce contrasts that are "equivalent to those produced by many (but not all) SAS procedures". """ def __init__(self, reference=None): self.reference = reference def code_with_intercept(self, levels): return _dummy_code(levels) def code_without_intercept(self, levels): if self.reference is None: reference = 0 else: reference = _get_level(levels, self.reference) eye = np.eye(len(levels) - 1) contrasts = np.vstack((eye[:reference, :], np.zeros((1, len(levels) - 1)), eye[reference:, :])) names = _name_levels("T.", levels[:reference] + levels[reference + 1:]) return ContrastMatrix(contrasts, names) __getstate__ = no_pickling def test_Treatment(): t1 = Treatment() matrix = t1.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[a]", "[b]", "[c]"] assert np.allclose(matrix.matrix, [[1, 0, 0], [0, 1, 0], [0, 0, 1]]) matrix = t1.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[T.b]", "[T.c]"] assert np.allclose(matrix.matrix, [[0, 0], [1, 0], [0, 1]]) matrix = Treatment(reference=1).code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[T.a]", "[T.c]"] assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]]) matrix = Treatment(reference=-2).code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[T.a]", "[T.c]"] assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]]) matrix = Treatment(reference="b").code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[T.a]", "[T.c]"] assert np.allclose(matrix.matrix, [[1, 0], [0, 0], [0, 1]]) # Make sure the default is always the first level, even if there is a # different level called 0. matrix = Treatment().code_without_intercept([2, 1, 0]) assert matrix.column_suffixes == ["[T.1]", "[T.0]"] assert np.allclose(matrix.matrix, [[0, 0], [1, 0], [0, 1]]) class Poly(object): """Orthogonal polynomial contrast coding. This coding scheme treats the levels as ordered samples from an underlying continuous scale, whose effect takes an unknown functional form which is `Taylor-decomposed`__ into the sum of a linear, quadratic, etc. components. .. __: https://en.wikipedia.org/wiki/Taylor_series For reduced-rank coding, you get a linear column, a quadratic column, etc., up to the number of levels provided. For full-rank coding, the same scheme is used, except that the zero-order constant polynomial is also included. I.e., you get an intercept column included as part of your categorical term. By default the levels are treated as equally spaced, but you can override this by providing a value for the `scores` argument. Examples: .. ipython:: python # Reduced rank dmatrix("C(a, Poly)", balanced(a=4)) # Full rank dmatrix("0 + C(a, Poly)", balanced(a=3)) # Explicit scores dmatrix("C(a, Poly([1, 2, 10]))", balanced(a=3)) This is equivalent to R's ``contr.poly``. (But note that in R, reduced rank encodings are always dummy-coded, regardless of what contrast you have set.) """ def __init__(self, scores=None): self.scores = scores def _code_either(self, intercept, levels): n = len(levels) scores = self.scores if scores is None: scores = np.arange(n) scores = np.asarray(scores, dtype=float) if len(scores) != n: raise PatsyError("number of levels (%s) does not match" " number of scores (%s)" % (n, len(scores))) # Strategy: just make a matrix whose columns are naive linear, # quadratic, etc., functions of the raw scores, and then use 'qr' to # orthogonalize each column against those to its left. scores -= scores.mean() raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1)) q, r = np.linalg.qr(raw_poly) q *= np.sign(np.diag(r)) q /= np.sqrt(np.sum(q ** 2, axis=1)) # The constant term is always all 1's -- we don't normalize it. q[:, 0] = 1 names = [".Constant", ".Linear", ".Quadratic", ".Cubic"] names += ["^%s" % (i,) for i in range(4, n)] names = names[:n] if intercept: return ContrastMatrix(q, names) else: # We always include the constant/intercept column as something to # orthogonalize against, but we don't always return it: return ContrastMatrix(q[:, 1:], names[1:]) def code_with_intercept(self, levels): return self._code_either(True, levels) def code_without_intercept(self, levels): return self._code_either(False, levels) __getstate__ = no_pickling def test_Poly(): t1 = Poly() matrix = t1.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"] # Values from R 'options(digits=15); contr.poly(3)' expected = [[1, -7.07106781186548e-01, 0.408248290463863], [1, 0, -0.816496580927726], [1, 7.07106781186547e-01, 0.408248290463863]] print(matrix.matrix) assert np.allclose(matrix.matrix, expected) matrix = t1.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == [".Linear", ".Quadratic"] # Values from R 'options(digits=15); contr.poly(3)' print(matrix.matrix) assert np.allclose(matrix.matrix, [[-7.07106781186548e-01, 0.408248290463863], [0, -0.816496580927726], [7.07106781186547e-01, 0.408248290463863]]) matrix = Poly(scores=[0, 10, 11]).code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"] # Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 11))' print(matrix.matrix) assert np.allclose(matrix.matrix, [[1, -0.813733471206735, 0.0671156055214024], [1, 0.348742916231458, -0.7382716607354268], [1, 0.464990554975277, 0.6711560552140243]]) # we had an integer/float handling bug for score vectors whose mean was # non-integer, so check one of those: matrix = Poly(scores=[0, 10, 12]).code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic"] # Values from R 'options(digits=15); contr.poly(3, scores=c(0, 10, 12))' print(matrix.matrix) assert np.allclose(matrix.matrix, [[1, -0.806559132617443, 0.127000127000191], [1, 0.293294230042706, -0.762000762001143], [1, 0.513264902574736, 0.635000635000952]]) import pytest pytest.raises(PatsyError, Poly(scores=[0, 1]).code_with_intercept, ["a", "b", "c"]) matrix = t1.code_with_intercept(list(range(6))) assert matrix.column_suffixes == [".Constant", ".Linear", ".Quadratic", ".Cubic", "^4", "^5"] class Sum(object): """Deviation coding (also known as sum-to-zero coding). Compares the mean of each level to the mean-of-means. (In a balanced design, compares the mean of each level to the overall mean.) For full-rank coding, a standard intercept term is added. One level must be omitted to avoid redundancy; by default this is the last level, but this can be adjusted via the `omit` argument. .. warning:: There are multiple definitions of 'deviation coding' in use. Make sure this is the one you expect before trying to interpret your results! Examples: .. ipython:: python # Reduced rank dmatrix("C(a, Sum)", balanced(a=4)) # Full rank dmatrix("0 + C(a, Sum)", balanced(a=4)) # Omit a different level dmatrix("C(a, Sum(1))", balanced(a=3)) dmatrix("C(a, Sum('a1'))", balanced(a=3)) This is equivalent to R's `contr.sum`. """ def __init__(self, omit=None): self.omit = omit def _omit_i(self, levels): if self.omit is None: # We assume below that this is positive return len(levels) - 1 else: return _get_level(levels, self.omit) def _sum_contrast(self, levels): n = len(levels) omit_i = self._omit_i(levels) eye = np.eye(n - 1) out = np.empty((n, n - 1)) out[:omit_i, :] = eye[:omit_i, :] out[omit_i, :] = -1 out[omit_i + 1:, :] = eye[omit_i:, :] return out def code_with_intercept(self, levels): contrast = self.code_without_intercept(levels) matrix = np.column_stack((np.ones(len(levels)), contrast.matrix)) column_suffixes = ["[mean]"] + contrast.column_suffixes return ContrastMatrix(matrix, column_suffixes) def code_without_intercept(self, levels): matrix = self._sum_contrast(levels) omit_i = self._omit_i(levels) included_levels = levels[:omit_i] + levels[omit_i + 1:] return ContrastMatrix(matrix, _name_levels("S.", included_levels)) __getstate__ = no_pickling def test_Sum(): t1 = Sum() matrix = t1.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[mean]", "[S.a]", "[S.b]"] assert np.allclose(matrix.matrix, [[1, 1, 0], [1, 0, 1], [1, -1, -1]]) matrix = t1.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[S.a]", "[S.b]"] assert np.allclose(matrix.matrix, [[1, 0], [0, 1], [-1, -1]]) # Check that it's not thrown off by negative integer term names matrix = t1.code_without_intercept([-1, -2, -3]) assert matrix.column_suffixes == ["[S.-1]", "[S.-2]"] assert np.allclose(matrix.matrix, [[1, 0], [0, 1], [-1, -1]]) t2 = Sum(omit=1) matrix = t2.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[mean]", "[S.a]", "[S.c]"] assert np.allclose(matrix.matrix, [[1, 1, 0], [1, -1, -1], [1, 0, 1]]) matrix = t2.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[S.a]", "[S.c]"] assert np.allclose(matrix.matrix, [[1, 0], [-1, -1], [0, 1]]) matrix = t2.code_without_intercept([1, 0, 2]) assert matrix.column_suffixes == ["[S.0]", "[S.2]"] assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]]) t3 = Sum(omit=-3) matrix = t3.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[mean]", "[S.b]", "[S.c]"] assert np.allclose(matrix.matrix, [[1, -1, -1], [1, 1, 0], [1, 0, 1]]) matrix = t3.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[S.b]", "[S.c]"] assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]]) t4 = Sum(omit="a") matrix = t3.code_with_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[mean]", "[S.b]", "[S.c]"] assert np.allclose(matrix.matrix, [[1, -1, -1], [1, 1, 0], [1, 0, 1]]) matrix = t3.code_without_intercept(["a", "b", "c"]) assert matrix.column_suffixes == ["[S.b]", "[S.c]"] assert np.allclose(matrix.matrix, [[-1, -1], [1, 0], [0, 1]]) class Helmert(object): """Helmert contrasts. Compares the second level with the first, the third with the average of the first two, and so on. For full-rank coding, a standard intercept term is added. .. warning:: There are multiple definitions of 'Helmert coding' in use. Make sure this is the one you expect before trying to interpret your results! Examples: .. ipython:: python # Reduced rank dmatrix("C(a, Helmert)", balanced(a=4)) # Full rank dmatrix("0 + C(a, Helmert)", balanced(a=4)) This is equivalent to R's `contr.helmert`. """ def _helmert_contrast(self, levels): n = len(levels) #http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter5/sasreg5.htm#HELMERT #contr = np.eye(n - 1) #int_range = np.arange(n - 1., 1, -1) #denom = np.repeat(int_range, np.arange(n - 2, 0, -1)) #contr[np.tril_indices(n - 1, -1)] = -1. / denom #http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#HELMERT #contr = np.zeros((n - 1., n - 1)) #int_range = np.arange(n, 1, -1) #denom = np.repeat(int_range[:-1], np.arange(n - 2, 0, -1)) #contr[np.diag_indices(n - 1)] = (int_range - 1.) / int_range #contr[np.tril_indices(n - 1, -1)] = -1. / denom #contr = np.vstack((contr, -1./int_range)) #r-like contr = np.zeros((n, n - 1)) contr[1:][np.diag_indices(n - 1)] = np.arange(1, n) contr[np.triu_indices(n - 1)] = -1 return contr def code_with_intercept(self, levels): contrast = np.column_stack((np.ones(len(levels)), self._helmert_contrast(levels))) column_suffixes = _name_levels("H.", ["intercept"] + list(levels[1:])) return ContrastMatrix(contrast, column_suffixes) def code_without_intercept(self, levels): contrast = self._helmert_contrast(levels) return ContrastMatrix(contrast, _name_levels("H.", levels[1:])) __getstate__ = no_pickling def test_Helmert(): t1 = Helmert() for levels in (["a", "b", "c", "d"], ("a", "b", "c", "d")): matrix = t1.code_with_intercept(levels) assert matrix.column_suffixes == ["[H.intercept]", "[H.b]", "[H.c]", "[H.d]"] assert np.allclose(matrix.matrix, [[1, -1, -1, -1], [1, 1, -1, -1], [1, 0, 2, -1], [1, 0, 0, 3]]) matrix = t1.code_without_intercept(levels) assert matrix.column_suffixes == ["[H.b]", "[H.c]", "[H.d]"] assert np.allclose(matrix.matrix, [[-1, -1, -1], [1, -1, -1], [0, 2, -1], [0, 0, 3]]) class Diff(object): """Backward difference coding. This coding scheme is useful for ordered factors, and compares the mean of each level with the preceding level. So you get the second level minus the first, the third level minus the second, etc. For full-rank coding, a standard intercept term is added (which gives the mean value for the first level). Examples: .. ipython:: python # Reduced rank dmatrix("C(a, Diff)", balanced(a=3)) # Full rank dmatrix("0 + C(a, Diff)", balanced(a=3)) """ def _diff_contrast(self, levels): nlevels = len(levels) contr = np.zeros((nlevels, nlevels-1)) int_range = np.arange(1, nlevels) upper_int = np.repeat(int_range, int_range) row_i, col_i = np.triu_indices(nlevels-1) # we want to iterate down the columns not across the rows # it would be nice if the index functions had a row/col order arg col_order = np.argsort(col_i) contr[row_i[col_order], col_i[col_order]] = (upper_int-nlevels)/float(nlevels) lower_int = np.repeat(int_range, int_range[::-1]) row_i, col_i = np.tril_indices(nlevels-1) # we want to iterate down the columns not across the rows col_order = np.argsort(col_i) contr[row_i[col_order]+1, col_i[col_order]] = lower_int/float(nlevels) return contr def code_with_intercept(self, levels): contrast = np.column_stack((np.ones(len(levels)), self._diff_contrast(levels))) return ContrastMatrix(contrast, _name_levels("D.", levels)) def code_without_intercept(self, levels): contrast = self._diff_contrast(levels) return ContrastMatrix(contrast, _name_levels("D.", levels[:-1])) __getstate__ = no_pickling def test_diff(): t1 = Diff() matrix = t1.code_with_intercept(["a", "b", "c", "d"]) assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]", "[D.d]"] assert np.allclose(matrix.matrix, [[1, -3/4., -1/2., -1/4.], [1, 1/4., -1/2., -1/4.], [1, 1/4., 1./2, -1/4.], [1, 1/4., 1/2., 3/4.]]) matrix = t1.code_without_intercept(["a", "b", "c", "d"]) assert matrix.column_suffixes == ["[D.a]", "[D.b]", "[D.c]"] assert np.allclose(matrix.matrix, [[-3/4., -1/2., -1/4.], [1/4., -1/2., -1/4.], [1/4., 2./4, -1/4.], [1/4., 1/2., 3/4.]]) # contrast can be: # -- a ContrastMatrix # -- a simple np.ndarray # -- an object with code_with_intercept and code_without_intercept methods # -- a function returning one of the above # -- None, in which case the above rules are applied to 'default' # This function always returns a ContrastMatrix. def code_contrast_matrix(intercept, levels, contrast, default=None): if contrast is None: contrast = default if callable(contrast): contrast = contrast() if isinstance(contrast, ContrastMatrix): return contrast as_array = np.asarray(contrast) if safe_issubdtype(as_array.dtype, np.number): return ContrastMatrix(as_array, _name_levels("custom", range(as_array.shape[1]))) if intercept: return contrast.code_with_intercept(levels) else: return contrast.code_without_intercept(levels) patsy-0.5.2/patsy/desc.py000066400000000000000000000537131412400214200153050ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2012 Nathaniel Smith # See file LICENSE.txt for license information. # This file defines the ModelDesc class, which describes a model at a high # level, as a list of interactions of factors. It also has the code to convert # a formula parse tree (from patsy.parse_formula) into a ModelDesc. from __future__ import print_function import six from patsy import PatsyError from patsy.parse_formula import ParseNode, Token, parse_formula from patsy.eval import EvalEnvironment, EvalFactor from patsy.util import uniqueify_list from patsy.util import repr_pretty_delegate, repr_pretty_impl from patsy.util import no_pickling, assert_no_pickling # These are made available in the patsy.* namespace __all__ = ["Term", "ModelDesc", "INTERCEPT"] # One might think it would make more sense for 'factors' to be a set, rather # than a tuple-with-guaranteed-unique-entries-that-compares-like-a-set. The # reason we do it this way is that it preserves the order that the user typed # and is expecting, which then ends up producing nicer names in our final # output, nicer column ordering, etc. (A similar comment applies to the # ordering of terms in ModelDesc objects as a whole.) class Term(object): """The interaction between a collection of factor objects. This is one of the basic types used in representing formulas, and corresponds to an expression like ``"a:b:c"`` in a formula string. For details, see :ref:`formulas` and :ref:`expert-model-specification`. Terms are hashable and compare by value. Attributes: .. attribute:: factors A tuple of factor objects. """ def __init__(self, factors): self.factors = tuple(uniqueify_list(factors)) def __eq__(self, other): return (isinstance(other, Term) and frozenset(other.factors) == frozenset(self.factors)) def __ne__(self, other): return not self == other def __hash__(self): return hash((Term, frozenset(self.factors))) __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle repr_pretty_impl(p, self, [list(self.factors)]) def name(self): """Return a human-readable name for this term.""" if self.factors: return ":".join([f.name() for f in self.factors]) else: return "Intercept" __getstate__ = no_pickling INTERCEPT = Term([]) class _MockFactor(object): def __init__(self, name): self._name = name def name(self): return self._name def test_Term(): assert Term([1, 2, 1]).factors == (1, 2) assert Term([1, 2]) == Term([2, 1]) assert hash(Term([1, 2])) == hash(Term([2, 1])) f1 = _MockFactor("a") f2 = _MockFactor("b") assert Term([f1, f2]).name() == "a:b" assert Term([f2, f1]).name() == "b:a" assert Term([]).name() == "Intercept" assert_no_pickling(Term([])) class ModelDesc(object): """A simple container representing the termlists parsed from a formula. This is a simple container object which has exactly the same representational power as a formula string, but is a Python object instead. You can construct one by hand, and pass it to functions like :func:`dmatrix` or :func:`incr_dbuilder` that are expecting a formula string, but without having to do any messy string manipulation. For details see :ref:`expert-model-specification`. Attributes: .. attribute:: lhs_termlist rhs_termlist Two termlists representing the left- and right-hand sides of a formula, suitable for passing to :func:`design_matrix_builders`. """ def __init__(self, lhs_termlist, rhs_termlist): self.lhs_termlist = uniqueify_list(lhs_termlist) self.rhs_termlist = uniqueify_list(rhs_termlist) __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle return repr_pretty_impl(p, self, [], [("lhs_termlist", self.lhs_termlist), ("rhs_termlist", self.rhs_termlist)]) def describe(self): """Returns a human-readable representation of this :class:`ModelDesc` in pseudo-formula notation. .. warning:: There is no guarantee that the strings returned by this function can be parsed as formulas. They are best-effort descriptions intended for human users. However, if this ModelDesc was created by parsing a formula, then it should work in practice. If you *really* have to. """ def term_code(term): if term == INTERCEPT: return "1" else: return term.name() result = " + ".join([term_code(term) for term in self.lhs_termlist]) if result: result += " ~ " else: result += "~ " if self.rhs_termlist == [INTERCEPT]: result += term_code(INTERCEPT) else: term_names = [] if INTERCEPT not in self.rhs_termlist: term_names.append("0") term_names += [term_code(term) for term in self.rhs_termlist if term != INTERCEPT] result += " + ".join(term_names) return result @classmethod def from_formula(cls, tree_or_string): """Construct a :class:`ModelDesc` from a formula string. :arg tree_or_string: A formula string. (Or an unevaluated formula parse tree, but the API for generating those isn't public yet. Shh, it can be our secret.) :returns: A new :class:`ModelDesc`. """ if isinstance(tree_or_string, ParseNode): tree = tree_or_string else: tree = parse_formula(tree_or_string) value = Evaluator().eval(tree, require_evalexpr=False) assert isinstance(value, cls) return value __getstate__ = no_pickling def test_ModelDesc(): f1 = _MockFactor("a") f2 = _MockFactor("b") m = ModelDesc([INTERCEPT, Term([f1])], [Term([f1]), Term([f1, f2])]) assert m.lhs_termlist == [INTERCEPT, Term([f1])] assert m.rhs_termlist == [Term([f1]), Term([f1, f2])] print(m.describe()) assert m.describe() == "1 + a ~ 0 + a + a:b" assert_no_pickling(m) assert ModelDesc([], []).describe() == "~ 0" assert ModelDesc([INTERCEPT], []).describe() == "1 ~ 0" assert ModelDesc([INTERCEPT], [INTERCEPT]).describe() == "1 ~ 1" assert (ModelDesc([INTERCEPT], [INTERCEPT, Term([f2])]).describe() == "1 ~ b") def test_ModelDesc_from_formula(): for input in ("y ~ x", parse_formula("y ~ x")): md = ModelDesc.from_formula(input) assert md.lhs_termlist == [Term([EvalFactor("y")]),] assert md.rhs_termlist == [INTERCEPT, Term([EvalFactor("x")])] class IntermediateExpr(object): "This class holds an intermediate result while we're evaluating a tree." def __init__(self, intercept, intercept_origin, intercept_removed, terms): self.intercept = intercept self.intercept_origin = intercept_origin self.intercept_removed =intercept_removed self.terms = tuple(uniqueify_list(terms)) if self.intercept: assert self.intercept_origin assert not (self.intercept and self.intercept_removed) __repr__ = repr_pretty_delegate def _pretty_repr_(self, p, cycle): # pragma: no cover assert not cycle return repr_pretty_impl(p, self, [self.intercept, self.intercept_origin, self.intercept_removed, self.terms]) __getstate__ = no_pickling def _maybe_add_intercept(doit, terms): if doit: return (INTERCEPT,) + terms else: return terms def _eval_any_tilde(evaluator, tree): exprs = [evaluator.eval(arg) for arg in tree.args] if len(exprs) == 1: # Formula was like: "~ foo" # We pretend that instead it was like: "0 ~ foo" exprs.insert(0, IntermediateExpr(False, None, True, [])) assert len(exprs) == 2 # Note that only the RHS gets an implicit intercept: return ModelDesc(_maybe_add_intercept(exprs[0].intercept, exprs[0].terms), _maybe_add_intercept(not exprs[1].intercept_removed, exprs[1].terms)) def _eval_binary_plus(evaluator, tree): left_expr = evaluator.eval(tree.args[0]) if tree.args[1].type == "ZERO": return IntermediateExpr(False, None, True, left_expr.terms) else: right_expr = evaluator.eval(tree.args[1]) if right_expr.intercept: return IntermediateExpr(True, right_expr.intercept_origin, False, left_expr.terms + right_expr.terms) else: return IntermediateExpr(left_expr.intercept, left_expr.intercept_origin, left_expr.intercept_removed, left_expr.terms + right_expr.terms) def _eval_binary_minus(evaluator, tree): left_expr = evaluator.eval(tree.args[0]) if tree.args[1].type == "ZERO": return IntermediateExpr(True, tree.args[1], False, left_expr.terms) elif tree.args[1].type == "ONE": return IntermediateExpr(False, None, True, left_expr.terms) else: right_expr = evaluator.eval(tree.args[1]) terms = [term for term in left_expr.terms if term not in right_expr.terms] if right_expr.intercept: return IntermediateExpr(False, None, True, terms) else: return IntermediateExpr(left_expr.intercept, left_expr.intercept_origin, left_expr.intercept_removed, terms) def _check_interactable(expr): if expr.intercept: raise PatsyError("intercept term cannot interact with " "anything else", expr.intercept_origin) def _interaction(left_expr, right_expr): for expr in (left_expr, right_expr): _check_interactable(expr) terms = [] for l_term in left_expr.terms: for r_term in right_expr.terms: terms.append(Term(l_term.factors + r_term.factors)) return IntermediateExpr(False, None, False, terms) def _eval_binary_prod(evaluator, tree): exprs = [evaluator.eval(arg) for arg in tree.args] return IntermediateExpr(False, None, False, exprs[0].terms + exprs[1].terms + _interaction(*exprs).terms) # Division (nesting) is right-ward distributive: # a / (b + c) -> a/b + a/c -> a + a:b + a:c # But left-ward, in S/R it has a quirky behavior: # (a + b)/c -> a + b + a:b:c # This is because it's meaningless for a factor to be "nested" under two # different factors. (This is documented in Chambers and Hastie (page 30) as a # "Slightly more subtle..." rule, with no further elaboration. Hopefully we # will do better.) def _eval_binary_div(evaluator, tree): left_expr = evaluator.eval(tree.args[0]) right_expr = evaluator.eval(tree.args[1]) terms = list(left_expr.terms) _check_interactable(left_expr) # Build a single giant combined term for everything on the left: left_factors = [] for term in left_expr.terms: left_factors += list(term.factors) left_combined_expr = IntermediateExpr(False, None, False, [Term(left_factors)]) # Then interact it with everything on the right: terms += list(_interaction(left_combined_expr, right_expr).terms) return IntermediateExpr(False, None, False, terms) def _eval_binary_interact(evaluator, tree): exprs = [evaluator.eval(arg) for arg in tree.args] return _interaction(*exprs) def _eval_binary_power(evaluator, tree): left_expr = evaluator.eval(tree.args[0]) _check_interactable(left_expr) power = -1 if tree.args[1].type in ("ONE", "NUMBER"): expr = tree.args[1].token.extra try: power = int(expr) except ValueError: pass if power < 1: raise PatsyError("'**' requires a positive integer", tree.args[1]) all_terms = left_expr.terms big_expr = left_expr # Small optimization: (a + b)**100 is just the same as (a + b)**2. power = min(len(left_expr.terms), power) for i in range(1, power): big_expr = _interaction(left_expr, big_expr) all_terms = all_terms + big_expr.terms return IntermediateExpr(False, None, False, all_terms) def _eval_unary_plus(evaluator, tree): return evaluator.eval(tree.args[0]) def _eval_unary_minus(evaluator, tree): if tree.args[0].type == "ZERO": return IntermediateExpr(True, tree.origin, False, []) elif tree.args[0].type == "ONE": return IntermediateExpr(False, None, True, []) else: raise PatsyError("Unary minus can only be applied to 1 or 0", tree) def _eval_zero(evaluator, tree): return IntermediateExpr(False, None, True, []) def _eval_one(evaluator, tree): return IntermediateExpr(True, tree.origin, False, []) def _eval_number(evaluator, tree): raise PatsyError("numbers besides '0' and '1' are " "only allowed with **", tree) def _eval_python_expr(evaluator, tree): factor = EvalFactor(tree.token.extra, origin=tree.origin) return IntermediateExpr(False, None, False, [Term([factor])]) class Evaluator(object): def __init__(self): self._evaluators = {} self.add_op("~", 2, _eval_any_tilde) self.add_op("~", 1, _eval_any_tilde) self.add_op("+", 2, _eval_binary_plus) self.add_op("-", 2, _eval_binary_minus) self.add_op("*", 2, _eval_binary_prod) self.add_op("/", 2, _eval_binary_div) self.add_op(":", 2, _eval_binary_interact) self.add_op("**", 2, _eval_binary_power) self.add_op("+", 1, _eval_unary_plus) self.add_op("-", 1, _eval_unary_minus) self.add_op("ZERO", 0, _eval_zero) self.add_op("ONE", 0, _eval_one) self.add_op("NUMBER", 0, _eval_number) self.add_op("PYTHON_EXPR", 0, _eval_python_expr) # Not used by Patsy -- provided for the convenience of eventual # user-defined operators. self.stash = {} # This should not be considered a public API yet (to use for actually # adding new operator semantics) because I wrote in some of the relevant # code sort of speculatively, but it isn't actually tested. def add_op(self, op, arity, evaluator): self._evaluators[op, arity] = evaluator def eval(self, tree, require_evalexpr=True): result = None assert isinstance(tree, ParseNode) key = (tree.type, len(tree.args)) if key not in self._evaluators: raise PatsyError("I don't know how to evaluate this " "'%s' operator" % (tree.type,), tree.token) result = self._evaluators[key](self, tree) if require_evalexpr and not isinstance(result, IntermediateExpr): if isinstance(result, ModelDesc): raise PatsyError("~ can only be used once, and " "only at the top level", tree) else: raise PatsyError("custom operator returned an " "object that I don't know how to " "handle", tree) return result ############# _eval_tests = { "": (True, []), " ": (True, []), " \n ": (True, []), "a": (True, ["a"]), "1": (True, []), "0": (False, []), "- 1": (False, []), "- 0": (True, []), "+ 1": (True, []), "+ 0": (False, []), "0 + 1": (True, []), "1 + 0": (False, []), "1 - 0": (True, []), "0 - 1": (False, []), "1 + a": (True, ["a"]), "0 + a": (False, ["a"]), "a - 1": (False, ["a"]), "a - 0": (True, ["a"]), "1 - a": (True, []), "a + b": (True, ["a", "b"]), "(a + b)": (True, ["a", "b"]), "a + ((((b))))": (True, ["a", "b"]), "a + ((((+b))))": (True, ["a", "b"]), "a + ((((b - a))))": (True, ["a", "b"]), "a + a + a": (True, ["a"]), "a + (b - a)": (True, ["a", "b"]), "a + np.log(a, base=10)": (True, ["a", "np.log(a, base=10)"]), # Note different spacing: "a + np.log(a, base=10) - np . log(a , base = 10)": (True, ["a"]), "a + (I(b) + c)": (True, ["a", "I(b)", "c"]), "a + I(b + c)": (True, ["a", "I(b + c)"]), "a:b": (True, [("a", "b")]), "a:b:a": (True, [("a", "b")]), "a:(b + c)": (True, [("a", "b"), ("a", "c")]), "(a + b):c": (True, [("a", "c"), ("b", "c")]), "a:(b - c)": (True, [("a", "b")]), "c + a:c + a:(b - c)": (True, ["c", ("a", "c"), ("a", "b")]), "(a - b):c": (True, [("a", "c")]), "b + b:c + (a - b):c": (True, ["b", ("b", "c"), ("a", "c")]), "a:b - a:b": (True, []), "a:b - b:a": (True, []), "1 - (a + b)": (True, []), "a + b - (a + b)": (True, []), "a * b": (True, ["a", "b", ("a", "b")]), "a * b * a": (True, ["a", "b", ("a", "b")]), "a * (b + c)": (True, ["a", "b", "c", ("a", "b"), ("a", "c")]), "(a + b) * c": (True, ["a", "b", "c", ("a", "c"), ("b", "c")]), "a * (b - c)": (True, ["a", "b", ("a", "b")]), "c + a:c + a * (b - c)": (True, ["c", ("a", "c"), "a", "b", ("a", "b")]), "(a - b) * c": (True, ["a", "c", ("a", "c")]), "b + b:c + (a - b) * c": (True, ["b", ("b", "c"), "a", "c", ("a", "c")]), "a/b": (True, ["a", ("a", "b")]), "(a + b)/c": (True, ["a", "b", ("a", "b", "c")]), "b + b:c + (a - b)/c": (True, ["b", ("b", "c"), "a", ("a", "c")]), "a/(b + c)": (True, ["a", ("a", "b"), ("a", "c")]), "a ** 2": (True, ["a"]), "(a + b + c + d) ** 2": (True, ["a", "b", "c", "d", ("a", "b"), ("a", "c"), ("a", "d"), ("b", "c"), ("b", "d"), ("c", "d")]), "(a + b + c + d) ** 3": (True, ["a", "b", "c", "d", ("a", "b"), ("a", "c"), ("a", "d"), ("b", "c"), ("b", "d"), ("c", "d"), ("a", "b", "c"), ("a", "b", "d"), ("a", "c", "d"), ("b", "c", "d")]), "a + +a": (True, ["a"]), "~ a + b": (True, ["a", "b"]), "~ a*b": (True, ["a", "b", ("a", "b")]), "~ a*b + 0": (False, ["a", "b", ("a", "b")]), "~ -1": (False, []), "0 ~ a + b": (True, ["a", "b"]), "1 ~ a + b": (True, [], True, ["a", "b"]), "y ~ a + b": (False, ["y"], True, ["a", "b"]), "0 + y ~ a + b": (False, ["y"], True, ["a", "b"]), "0 + y * z ~ a + b": (False, ["y", "z", ("y", "z")], True, ["a", "b"]), "-1 ~ 1": (False, [], True, []), "1 + y ~ a + b": (True, ["y"], True, ["a", "b"]), # Check precedence: "a + b * c": (True, ["a", "b", "c", ("b", "c")]), "a * b + c": (True, ["a", "b", ("a", "b"), "c"]), "a * b - a": (True, ["b", ("a", "b")]), "a + b / c": (True, ["a", "b", ("b", "c")]), "a / b + c": (True, ["a", ("a", "b"), "c"]), "a*b:c": (True, ["a", ("b", "c"), ("a", "b", "c")]), "a:b*c": (True, [("a", "b"), "c", ("a", "b", "c")]), # Intercept handling: "~ 1 + 1 + 0 + 1": (True, []), "~ 0 + 1 + 0": (False, []), "~ 0 - 1 - 1 + 0 + 1": (True, []), "~ 1 - 1": (False, []), "~ 0 + a + 1": (True, ["a"]), "~ 1 + (a + 0)": (True, ["a"]), # This is correct, but perhaps surprising! "~ 0 + (a + 1)": (True, ["a"]), # Also correct! "~ 1 - (a + 1)": (False, []), } # <> mark off where the error should be reported: _eval_error_tests = [ "a <+>", "a + <(>", "b + <(-a)>", "a:<1>", "(a + <1>)*b", "a + <2>", "a + <1.0>", # eh, catching this is a hassle, we'll just leave the user some rope if # they really want it: #"a + <0x1>", "a ** ", "a ** <(1 + 1)>", "a ** <1.5>", "a + b <# asdf>", "<)>", "a + <)>", "<*> a", "a + <*>", "a + ", "a + ", "a + ", "a + <[bar>", "a + <{bar>", "a + <{bar[]>", "a + foo<]>bar", "a + foo[]<]>bar", "a + foo{}<}>bar", "a + foo<)>bar", "a + b<)>", "(a) <.>", "<(>a + b", " ~ b", "y ~ <(a ~ b)>", "<~ a> ~ b", "~ <(a ~ b)>", "1 + <-(a + b)>", "<- a>", "a + <-a**2>", ] def _assert_terms_match(terms, expected_intercept, expecteds): # pragma: no cover if expected_intercept: expecteds = [()] + expecteds assert len(terms) == len(expecteds) for term, expected in zip(terms, expecteds): if isinstance(term, Term): if isinstance(expected, str): expected = (expected,) assert term.factors == tuple([EvalFactor(s) for s in expected]) else: assert term == expected def _do_eval_formula_tests(tests): # pragma: no cover for code, result in six.iteritems(tests): if len(result) == 2: result = (False, []) + result model_desc = ModelDesc.from_formula(code) print(repr(code)) print(result) print(model_desc) lhs_intercept, lhs_termlist, rhs_intercept, rhs_termlist = result _assert_terms_match(model_desc.lhs_termlist, lhs_intercept, lhs_termlist) _assert_terms_match(model_desc.rhs_termlist, rhs_intercept, rhs_termlist) def test_eval_formula(): _do_eval_formula_tests(_eval_tests) def test_eval_formula_error_reporting(): from patsy.parse_formula import _parsing_error_test parse_fn = lambda formula: ModelDesc.from_formula(formula) _parsing_error_test(parse_fn, _eval_error_tests) def test_formula_factor_origin(): from patsy.origin import Origin desc = ModelDesc.from_formula("a + b") assert (desc.rhs_termlist[1].factors[0].origin == Origin("a + b", 0, 1)) assert (desc.rhs_termlist[2].factors[0].origin == Origin("a + b", 4, 5)) patsy-0.5.2/patsy/design_info.py000066400000000000000000001430041412400214200166440ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2015 Nathaniel Smith # See file LICENSE.txt for license information. # This file defines the main class for storing metadata about a model # design. It also defines a 'value-added' design matrix type -- a subclass of # ndarray that represents a design matrix and holds metadata about its # columns. The intent is that these are useful and usable data structures # even if you're not using *any* of the rest of patsy to actually build # your matrices. # XX TMP TODO: # # - update design_matrix_builders and build_design_matrices docs # - add tests and docs for new design info stuff # - consider renaming design_matrix_builders (and I guess # build_design_matrices too). Ditto for highlevel dbuilder functions. from __future__ import print_function # These are made available in the patsy.* namespace __all__ = ["DesignInfo", "FactorInfo", "SubtermInfo", "DesignMatrix"] import warnings import numbers import six import numpy as np from patsy import PatsyError from patsy.util import atleast_2d_column_default from patsy.compat import OrderedDict from patsy.util import (repr_pretty_delegate, repr_pretty_impl, safe_issubdtype, no_pickling, assert_no_pickling) from patsy.constraint import linear_constraint from patsy.contrasts import ContrastMatrix from patsy.desc import ModelDesc, Term class FactorInfo(object): """A FactorInfo object is a simple class that provides some metadata about the role of a factor within a model. :attr:`DesignInfo.factor_infos` is a dictionary which maps factor objects to FactorInfo objects for each factor in the model. .. versionadded:: 0.4.0 Attributes: .. attribute:: factor The factor object being described. .. attribute:: type The type of the factor -- either the string ``"numerical"`` or the string ``"categorical"``. .. attribute:: state An opaque object which holds the state needed to evaluate this factor on new data (e.g., for prediction). See :meth:`factor_protocol.eval`. .. attribute:: num_columns For numerical factors, the number of columns this factor produces. For categorical factors, this attribute will always be ``None``. .. attribute:: categories For categorical factors, a tuple of the possible categories this factor takes on, in order. For numerical factors, this attribute will always be ``None``. """ def __init__(self, factor, type, state, num_columns=None, categories=None): self.factor = factor self.type = type if self.type not in ["numerical", "categorical"]: raise ValueError("FactorInfo.type must be " "'numerical' or 'categorical', not %r" % (self.type,)) self.state = state if self.type == "numerical": if not isinstance(num_columns, six.integer_types): raise ValueError("For numerical factors, num_columns " "must be an integer") if categories is not None: raise ValueError("For numerical factors, categories " "must be None") else: assert self.type == "categorical" if num_columns is not None: raise ValueError("For categorical factors, num_columns " "must be None") categories = tuple(categories) self.num_columns = num_columns self.categories = categories __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle class FactorState(object): def __repr__(self): return "" kwlist = [("factor", self.factor), ("type", self.type), # Don't put the state in people's faces, it will # just encourage them to pay attention to the # contents :-). Plus it's a bunch of gobbledygook # they don't care about. They can always look at # self.state if they want to know... ("state", FactorState()), ] if self.type == "numerical": kwlist.append(("num_columns", self.num_columns)) else: kwlist.append(("categories", self.categories)) repr_pretty_impl(p, self, [], kwlist) __getstate__ = no_pickling def test_FactorInfo(): fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10) assert fi1.factor == "asdf" assert fi1.state == {"a": 1} assert fi1.type == "numerical" assert fi1.num_columns == 10 assert fi1.categories is None # smoke test repr(fi1) fi2 = FactorInfo("asdf", "categorical", {"a": 2}, categories=["z", "j"]) assert fi2.factor == "asdf" assert fi2.state == {"a": 2} assert fi2.type == "categorical" assert fi2.num_columns is None assert fi2.categories == ("z", "j") # smoke test repr(fi2) import pytest pytest.raises(ValueError, FactorInfo, "asdf", "non-numerical", {}) pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {}) pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {}, num_columns="asdf") pytest.raises(ValueError, FactorInfo, "asdf", "numerical", {}, num_columns=1, categories=1) pytest.raises(TypeError, FactorInfo, "asdf", "categorical", {}) pytest.raises(ValueError, FactorInfo, "asdf", "categorical", {}, num_columns=1) pytest.raises(TypeError, FactorInfo, "asdf", "categorical", {}, categories=1) # Make sure longs are legal for num_columns # (Important on python2+win64, where array shapes are tuples-of-longs) if not six.PY3: fi_long = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=long(10)) assert fi_long.num_columns == 10 class SubtermInfo(object): """A SubtermInfo object is a simple metadata container describing a single primitive interaction and how it is coded in our design matrix. Our final design matrix is produced by coding each primitive interaction in order from left to right, and then stacking the resulting columns. For each :class:`Term`, we have one or more of these objects which describe how that term is encoded. :attr:`DesignInfo.term_codings` is a dictionary which maps term objects to lists of SubtermInfo objects. To code a primitive interaction, the following steps are performed: * Evaluate each factor on the provided data. * Encode each factor into one or more proto-columns. For numerical factors, these proto-columns are identical to whatever the factor evaluates to; for categorical factors, they are encoded using a specified contrast matrix. * Form all pairwise, elementwise products between proto-columns generated by different factors. (For example, if factor 1 generated proto-columns A and B, and factor 2 generated proto-columns C and D, then our final columns are ``A * C``, ``B * C``, ``A * D``, ``B * D``.) * The resulting columns are stored directly into the final design matrix. Sometimes multiple primitive interactions are needed to encode a single term; this occurs, for example, in the formula ``"1 + a:b"`` when ``a`` and ``b`` are categorical. See :ref:`formulas-building` for full details. .. versionadded:: 0.4.0 Attributes: .. attribute:: factors The factors which appear in this subterm's interaction. .. attribute:: contrast_matrices A dict mapping factor objects to :class:`ContrastMatrix` objects, describing how each categorical factor in this interaction is coded. .. attribute:: num_columns The number of design matrix columns which this interaction generates. """ def __init__(self, factors, contrast_matrices, num_columns): self.factors = tuple(factors) factor_set = frozenset(factors) if not isinstance(contrast_matrices, dict): raise ValueError("contrast_matrices must be dict") for factor, contrast_matrix in six.iteritems(contrast_matrices): if factor not in factor_set: raise ValueError("Unexpected factor in contrast_matrices dict") if not isinstance(contrast_matrix, ContrastMatrix): raise ValueError("Expected a ContrastMatrix, not %r" % (contrast_matrix,)) self.contrast_matrices = contrast_matrices if not isinstance(num_columns, six.integer_types): raise ValueError("num_columns must be an integer") self.num_columns = num_columns __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle repr_pretty_impl(p, self, [], [("factors", self.factors), ("contrast_matrices", self.contrast_matrices), ("num_columns", self.num_columns)]) __getstate__ = no_pickling def test_SubtermInfo(): cm = ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]) s = SubtermInfo(["a", "x"], {"a": cm}, 4) assert s.factors == ("a", "x") assert s.contrast_matrices == {"a": cm} assert s.num_columns == 4 # Make sure longs are accepted for num_columns if not six.PY3: s = SubtermInfo(["a", "x"], {"a": cm}, long(4)) assert s.num_columns == 4 # smoke test repr(s) import pytest pytest.raises(TypeError, SubtermInfo, 1, {}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], 1, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"z": cm}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {"a": 1}, 1) pytest.raises(ValueError, SubtermInfo, ["a", "x"], {}, 1.5) class DesignInfo(object): """A DesignInfo object holds metadata about a design matrix. This is the main object that Patsy uses to pass metadata about a design matrix to statistical libraries, in order to allow further downstream processing like intelligent tests, prediction on new data, etc. Usually encountered as the `.design_info` attribute on design matrices. """ def __init__(self, column_names, factor_infos=None, term_codings=None): self.column_name_indexes = OrderedDict(zip(column_names, range(len(column_names)))) if (factor_infos is None) != (term_codings is None): raise ValueError("Must specify either both or neither of " "factor_infos= and term_codings=") self.factor_infos = factor_infos self.term_codings = term_codings # factor_infos is a dict containing one entry for every factor # mentioned in our terms # and mapping each to FactorInfo object if self.factor_infos is not None: if not isinstance(self.factor_infos, dict): raise ValueError("factor_infos should be a dict") if not isinstance(self.term_codings, OrderedDict): raise ValueError("term_codings must be an OrderedDict") for term, subterms in six.iteritems(self.term_codings): if not isinstance(term, Term): raise ValueError("expected a Term, not %r" % (term,)) if not isinstance(subterms, list): raise ValueError("term_codings must contain lists") term_factors = set(term.factors) for subterm in subterms: if not isinstance(subterm, SubtermInfo): raise ValueError("expected SubtermInfo, " "not %r" % (subterm,)) if not term_factors.issuperset(subterm.factors): raise ValueError("unexpected factors in subterm") all_factors = set() for term in self.term_codings: all_factors.update(term.factors) if all_factors != set(self.factor_infos): raise ValueError("Provided Term objects and factor_infos " "do not match") for factor, factor_info in six.iteritems(self.factor_infos): if not isinstance(factor_info, FactorInfo): raise ValueError("expected FactorInfo object, not %r" % (factor_info,)) if factor != factor_info.factor: raise ValueError("mismatched factor_info.factor") for term, subterms in six.iteritems(self.term_codings): for subterm in subterms: exp_cols = 1 cat_factors = set() for factor in subterm.factors: fi = self.factor_infos[factor] if fi.type == "numerical": exp_cols *= fi.num_columns else: assert fi.type == "categorical" cm = subterm.contrast_matrices[factor].matrix if cm.shape[0] != len(fi.categories): raise ValueError("Mismatched contrast matrix " "for factor %r" % (factor,)) cat_factors.add(factor) exp_cols *= cm.shape[1] if cat_factors != set(subterm.contrast_matrices): raise ValueError("Mismatch between contrast_matrices " "and categorical factors") if exp_cols != subterm.num_columns: raise ValueError("Unexpected num_columns") if term_codings is None: # Need to invent term information self.term_slices = None # We invent one term per column, with the same name as the column term_names = column_names slices = [slice(i, i + 1) for i in range(len(column_names))] self.term_name_slices = OrderedDict(zip(term_names, slices)) else: # Need to derive term information from term_codings self.term_slices = OrderedDict() idx = 0 for term, subterm_infos in six.iteritems(self.term_codings): term_columns = 0 for subterm_info in subterm_infos: term_columns += subterm_info.num_columns self.term_slices[term] = slice(idx, idx + term_columns) idx += term_columns if idx != len(self.column_names): raise ValueError("mismatch between column_names and columns " "coded by given terms") self.term_name_slices = OrderedDict( [(term.name(), slice_) for (term, slice_) in six.iteritems(self.term_slices)]) # Guarantees: # term_name_slices is never None # The slices in term_name_slices are in order and exactly cover the # whole range of columns. # term_slices may be None # If term_slices is not None, then its slices match the ones in # term_name_slices. assert self.term_name_slices is not None if self.term_slices is not None: assert (list(self.term_slices.values()) == list(self.term_name_slices.values())) # These checks probably aren't necessary anymore now that we always # generate the slices ourselves, but we'll leave them in just to be # safe. covered = 0 for slice_ in six.itervalues(self.term_name_slices): start, stop, step = slice_.indices(len(column_names)) assert start == covered assert step == 1 covered = stop assert covered == len(column_names) # If there is any name overlap between terms and columns, they refer # to the same columns. for column_name, index in six.iteritems(self.column_name_indexes): if column_name in self.term_name_slices: slice_ = self.term_name_slices[column_name] if slice_ != slice(index, index + 1): raise ValueError("term/column name collision") __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle repr_pretty_impl(p, self, [self.column_names], [("factor_infos", self.factor_infos), ("term_codings", self.term_codings)]) @property def column_names(self): "A list of the column names, in order." return list(self.column_name_indexes) @property def terms(self): "A list of :class:`Terms`, in order, or else None." if self.term_slices is None: return None return list(self.term_slices) @property def term_names(self): "A list of terms, in order." return list(self.term_name_slices) @property def builder(self): ".. deprecated:: 0.4.0" warnings.warn(DeprecationWarning( "The DesignInfo.builder attribute is deprecated starting in " "patsy v0.4.0; distinct builder objects have been eliminated " "and design_info.builder is now just a long-winded way of " "writing 'design_info' (i.e. the .builder attribute just " "returns self)"), stacklevel=2) return self @property def design_info(self): ".. deprecated:: 0.4.0" warnings.warn(DeprecationWarning( "Starting in patsy v0.4.0, the DesignMatrixBuilder class has " "been merged into the DesignInfo class. So there's no need to " "use builder.design_info to access the DesignInfo; 'builder' " "already *is* a DesignInfo."), stacklevel=2) return self def slice(self, columns_specifier): """Locate a subset of design matrix columns, specified symbolically. A patsy design matrix has two levels of structure: the individual columns (which are named), and the :ref:`terms ` in the formula that generated those columns. This is a one-to-many relationship: a single term may span several columns. This method provides a user-friendly API for locating those columns. (While we talk about columns here, this is probably most useful for indexing into other arrays that are derived from the design matrix, such as regression coefficients or covariance matrices.) The `columns_specifier` argument can take a number of forms: * A term name * A column name * A :class:`Term` object * An integer giving a raw index * A raw slice object In all cases, a Python :func:`slice` object is returned, which can be used directly for indexing. Example:: y, X = dmatrices("y ~ a", demo_data("y", "a", nlevels=3)) betas = np.linalg.lstsq(X, y)[0] a_betas = betas[X.design_info.slice("a")] (If you want to look up a single individual column by name, use ``design_info.column_name_indexes[name]``.) """ if isinstance(columns_specifier, slice): return columns_specifier if np.issubsctype(type(columns_specifier), np.integer): return slice(columns_specifier, columns_specifier + 1) if (self.term_slices is not None and columns_specifier in self.term_slices): return self.term_slices[columns_specifier] if columns_specifier in self.term_name_slices: return self.term_name_slices[columns_specifier] if columns_specifier in self.column_name_indexes: idx = self.column_name_indexes[columns_specifier] return slice(idx, idx + 1) raise PatsyError("unknown column specified '%s'" % (columns_specifier,)) def linear_constraint(self, constraint_likes): """Construct a linear constraint in matrix form from a (possibly symbolic) description. Possible inputs: * A dictionary which is taken as a set of equality constraint. Keys can be either string column names, or integer column indexes. * A string giving a arithmetic expression referring to the matrix columns by name. * A list of such strings which are ANDed together. * A tuple (A, b) where A and b are array_likes, and the constraint is Ax = b. If necessary, these will be coerced to the proper dimensionality by appending dimensions with size 1. The string-based language has the standard arithmetic operators, / * + - and parentheses, plus "=" is used for equality and "," is used to AND together multiple constraint equations within a string. You can If no = appears in some expression, then that expression is assumed to be equal to zero. Division is always float-based, even if ``__future__.true_division`` isn't in effect. Returns a :class:`LinearConstraint` object. Examples:: di = DesignInfo(["x1", "x2", "x3"]) # Equivalent ways to write x1 == 0: di.linear_constraint({"x1": 0}) # by name di.linear_constraint({0: 0}) # by index di.linear_constraint("x1 = 0") # string based di.linear_constraint("x1") # can leave out "= 0" di.linear_constraint("2 * x1 = (x1 + 2 * x1) / 3") di.linear_constraint(([1, 0, 0], 0)) # constraint matrices # Equivalent ways to write x1 == 0 and x3 == 10 di.linear_constraint({"x1": 0, "x3": 10}) di.linear_constraint({0: 0, 2: 10}) di.linear_constraint({0: 0, "x3": 10}) di.linear_constraint("x1 = 0, x3 = 10") di.linear_constraint("x1, x3 = 10") di.linear_constraint(["x1", "x3 = 0"]) # list of strings di.linear_constraint("x1 = 0, x3 - 10 = x1") di.linear_constraint([[1, 0, 0], [0, 0, 1]], [0, 10]) # You can also chain together equalities, just like Python: di.linear_constraint("x1 = x2 = 3") """ return linear_constraint(constraint_likes, self.column_names) def describe(self): """Returns a human-readable string describing this design info. Example: .. ipython:: In [1]: y, X = dmatrices("y ~ x1 + x2", demo_data("y", "x1", "x2")) In [2]: y.design_info.describe() Out[2]: 'y' In [3]: X.design_info.describe() Out[3]: '1 + x1 + x2' .. warning:: There is no guarantee that the strings returned by this function can be parsed as formulas, or that if they can be parsed as a formula that they will produce a model equivalent to the one you started with. This function produces a best-effort description intended for humans to read. """ names = [] for name in self.term_names: if name == "Intercept": names.append("1") else: names.append(name) return " + ".join(names) def subset(self, which_terms): """Create a new :class:`DesignInfo` for design matrices that contain a subset of the terms that the current :class:`DesignInfo` does. For example, if ``design_info`` has terms ``x``, ``y``, and ``z``, then:: design_info2 = design_info.subset(["x", "z"]) will return a new DesignInfo that can be used to construct design matrices with only the columns corresponding to the terms ``x`` and ``z``. After we do this, then in general these two expressions will return the same thing (here we assume that ``x``, ``y``, and ``z`` each generate a single column of the output):: build_design_matrix([design_info], data)[0][:, [0, 2]] build_design_matrix([design_info2], data)[0] However, a critical difference is that in the second case, ``data`` need not contain any values for ``y``. This is very useful when doing prediction using a subset of a model, in which situation R usually forces you to specify dummy values for ``y``. If using a formula to specify the terms to include, remember that like any formula, the intercept term will be included by default, so use ``0`` or ``-1`` in your formula if you want to avoid this. This method can also be used to reorder the terms in your design matrix, in case you want to do that for some reason. I can't think of any. Note that this method will generally *not* produce the same result as creating a new model directly. Consider these DesignInfo objects:: design1 = dmatrix("1 + C(a)", data) design2 = design1.subset("0 + C(a)") design3 = dmatrix("0 + C(a)", data) Here ``design2`` and ``design3`` will both produce design matrices that contain an encoding of ``C(a)`` without any intercept term. But ``design3`` uses a full-rank encoding for the categorical term ``C(a)``, while ``design2`` uses the same reduced-rank encoding as ``design1``. :arg which_terms: The terms which should be kept in the new :class:`DesignMatrixBuilder`. If this is a string, then it is parsed as a formula, and then the names of the resulting terms are taken as the terms to keep. If it is a list, then it can contain a mixture of term names (as strings) and :class:`Term` objects. .. versionadded: 0.2.0 New method on the class DesignMatrixBuilder. .. versionchanged: 0.4.0 Moved from DesignMatrixBuilder to DesignInfo, as part of the removal of DesignMatrixBuilder. """ if isinstance(which_terms, str): desc = ModelDesc.from_formula(which_terms) if desc.lhs_termlist: raise PatsyError("right-hand-side-only formula required") which_terms = [term.name() for term in desc.rhs_termlist] if self.term_codings is None: # This is a minimal DesignInfo # If the name is unknown we just let the KeyError escape new_names = [] for t in which_terms: new_names += self.column_names[self.term_name_slices[t]] return DesignInfo(new_names) else: term_name_to_term = {} for term in self.term_codings: term_name_to_term[term.name()] = term new_column_names = [] new_factor_infos = {} new_term_codings = OrderedDict() for name_or_term in which_terms: term = term_name_to_term.get(name_or_term, name_or_term) # If the name is unknown we just let the KeyError escape s = self.term_slices[term] new_column_names += self.column_names[s] for f in term.factors: new_factor_infos[f] = self.factor_infos[f] new_term_codings[term] = self.term_codings[term] return DesignInfo(new_column_names, factor_infos=new_factor_infos, term_codings=new_term_codings) @classmethod def from_array(cls, array_like, default_column_prefix="column"): """Find or construct a DesignInfo appropriate for a given array_like. If the input `array_like` already has a ``.design_info`` attribute, then it will be returned. Otherwise, a new DesignInfo object will be constructed, using names either taken from the `array_like` (e.g., for a pandas DataFrame with named columns), or constructed using `default_column_prefix`. This is how :func:`dmatrix` (for example) creates a DesignInfo object if an arbitrary matrix is passed in. :arg array_like: An ndarray or pandas container. :arg default_column_prefix: If it's necessary to invent column names, then this will be used to construct them. :returns: a DesignInfo object """ if hasattr(array_like, "design_info") and isinstance(array_like.design_info, cls): return array_like.design_info arr = atleast_2d_column_default(array_like, preserve_pandas=True) if arr.ndim > 2: raise ValueError("design matrix can't have >2 dimensions") columns = getattr(arr, "columns", range(arr.shape[1])) if (hasattr(columns, "dtype") and not safe_issubdtype(columns.dtype, np.integer)): column_names = [str(obj) for obj in columns] else: column_names = ["%s%s" % (default_column_prefix, i) for i in columns] return DesignInfo(column_names) __getstate__ = no_pickling def test_DesignInfo(): import pytest class _MockFactor(object): def __init__(self, name): self._name = name def name(self): return self._name f_x = _MockFactor("x") f_y = _MockFactor("y") t_x = Term([f_x]) t_y = Term([f_y]) factor_infos = {f_x: FactorInfo(f_x, "numerical", {}, num_columns=3), f_y: FactorInfo(f_y, "numerical", {}, num_columns=1), } term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]), (t_y, [SubtermInfo([f_y], {}, 1)])]) di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings) assert di.column_names == ["x1", "x2", "x3", "y"] assert di.term_names == ["x", "y"] assert di.terms == [t_x, t_y] assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3} assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)} assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)} assert di.describe() == "x + y" assert di.slice(1) == slice(1, 2) assert di.slice("x1") == slice(0, 1) assert di.slice("x2") == slice(1, 2) assert di.slice("x3") == slice(2, 3) assert di.slice("x") == slice(0, 3) assert di.slice(t_x) == slice(0, 3) assert di.slice("y") == slice(3, 4) assert di.slice(t_y) == slice(3, 4) assert di.slice(slice(2, 4)) == slice(2, 4) pytest.raises(PatsyError, di.slice, "asdf") # smoke test repr(di) assert_no_pickling(di) # One without term objects di = DesignInfo(["a1", "a2", "a3", "b"]) assert di.column_names == ["a1", "a2", "a3", "b"] assert di.term_names == ["a1", "a2", "a3", "b"] assert di.terms is None assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3} assert di.term_name_slices == {"a1": slice(0, 1), "a2": slice(1, 2), "a3": slice(2, 3), "b": slice(3, 4)} assert di.term_slices is None assert di.describe() == "a1 + a2 + a3 + b" assert di.slice(1) == slice(1, 2) assert di.slice("a1") == slice(0, 1) assert di.slice("a2") == slice(1, 2) assert di.slice("a3") == slice(2, 3) assert di.slice("b") == slice(3, 4) # Check intercept handling in describe() assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b" # Failure modes # must specify either both or neither of factor_infos and term_codings: pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos=factor_infos) pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], term_codings=term_codings) # factor_infos must be a dict pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], list(factor_infos), term_codings) # wrong number of column names: pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings) pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings) # name overlap problems pytest.raises(ValueError, DesignInfo, ["x1", "x2", "y", "y2"], factor_infos, term_codings) # duplicate name pytest.raises(ValueError, DesignInfo, ["x1", "x1", "x1", "y"], factor_infos, term_codings) # f_y is in factor_infos, but not mentioned in any term term_codings_x_only = OrderedDict(term_codings) del term_codings_x_only[t_y] pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3"], factor_infos, term_codings_x_only) # f_a is in a term, but not in factor_infos f_a = _MockFactor("a") t_a = Term([f_a]) term_codings_with_a = OrderedDict(term_codings) term_codings_with_a[t_a] = [SubtermInfo([f_a], {}, 1)] pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y", "a"], factor_infos, term_codings_with_a) # bad factor_infos not_factor_infos = dict(factor_infos) not_factor_infos[f_x] = "what is this I don't even" pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], not_factor_infos, term_codings) mismatch_factor_infos = dict(factor_infos) mismatch_factor_infos[f_x] = FactorInfo(f_a, "numerical", {}, num_columns=3) pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], mismatch_factor_infos, term_codings) # bad term_codings pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, dict(term_codings)) not_term_codings = OrderedDict(term_codings) not_term_codings["this is a string"] = term_codings[t_x] pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, not_term_codings) non_list_term_codings = OrderedDict(term_codings) non_list_term_codings[t_y] = tuple(term_codings[t_y]) pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, non_list_term_codings) non_subterm_term_codings = OrderedDict(term_codings) non_subterm_term_codings[t_y][0] = "not a SubtermInfo" pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, non_subterm_term_codings) bad_subterm = OrderedDict(term_codings) # f_x is a factor in this model, but it is not a factor in t_y term_codings[t_y][0] = SubtermInfo([f_x], {}, 1) pytest.raises(ValueError, DesignInfo, ["x1", "x2", "x3", "y"], factor_infos, bad_subterm) # contrast matrix has wrong number of rows factor_codings_a = {f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"])} term_codings_a_bad_rows = OrderedDict([ (t_a, [SubtermInfo([f_a], {f_a: ContrastMatrix(np.ones((3, 2)), ["[1]", "[2]"])}, 2)])]) pytest.raises(ValueError, DesignInfo, ["a[1]", "a[2]"], factor_codings_a, term_codings_a_bad_rows) # have a contrast matrix for a non-categorical factor t_ax = Term([f_a, f_x]) factor_codings_ax = {f_a: FactorInfo(f_a, "categorical", {}, categories=["a1", "a2"]), f_x: FactorInfo(f_x, "numerical", {}, num_columns=2)} term_codings_ax_extra_cm = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {f_a: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"]), f_x: ContrastMatrix(np.ones((2, 2)), ["[1]", "[2]"])}, 4)])]) pytest.raises(ValueError, DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"], factor_codings_ax, term_codings_ax_extra_cm) # no contrast matrix for a categorical factor term_codings_ax_missing_cm = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {}, 4)])]) # This actually fails before it hits the relevant check with a KeyError, # but that's okay... the previous test still exercises the check. pytest.raises((ValueError, KeyError), DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[1]:x[2]", "a[2]:x[2]"], factor_codings_ax, term_codings_ax_missing_cm) # subterm num_columns doesn't match the value computed from the individual # factors term_codings_ax_wrong_subterm_columns = OrderedDict([ (t_ax, [SubtermInfo([f_a, f_x], {f_a: ContrastMatrix(np.ones((2, 3)), ["[1]", "[2]", "[3]"])}, # should be 2 * 3 = 6 5)])]) pytest.raises(ValueError, DesignInfo, ["a[1]:x[1]", "a[2]:x[1]", "a[3]:x[1]", "a[1]:x[2]", "a[2]:x[2]", "a[3]:x[2]"], factor_codings_ax, term_codings_ax_wrong_subterm_columns) def test_DesignInfo_from_array(): di = DesignInfo.from_array([1, 2, 3]) assert di.column_names == ["column0"] di2 = DesignInfo.from_array([[1, 2], [2, 3], [3, 4]]) assert di2.column_names == ["column0", "column1"] di3 = DesignInfo.from_array([1, 2, 3], default_column_prefix="x") assert di3.column_names == ["x0"] di4 = DesignInfo.from_array([[1, 2], [2, 3], [3, 4]], default_column_prefix="x") assert di4.column_names == ["x0", "x1"] m = DesignMatrix([1, 2, 3], di3) assert DesignInfo.from_array(m) is di3 # But weird objects are ignored m.design_info = "asdf" di_weird = DesignInfo.from_array(m) assert di_weird.column_names == ["column0"] import pytest pytest.raises(ValueError, DesignInfo.from_array, np.ones((2, 2, 2))) from patsy.util import have_pandas if have_pandas: import pandas # with named columns di5 = DesignInfo.from_array(pandas.DataFrame([[1, 2]], columns=["a", "b"])) assert di5.column_names == ["a", "b"] # with irregularly numbered columns di6 = DesignInfo.from_array(pandas.DataFrame([[1, 2]], columns=[0, 10])) assert di6.column_names == ["column0", "column10"] # with .design_info attr df = pandas.DataFrame([[1, 2]]) df.design_info = di6 assert DesignInfo.from_array(df) is di6 def test_DesignInfo_linear_constraint(): di = DesignInfo(["a1", "a2", "a3", "b"]) con = di.linear_constraint(["2 * a1 = b + 1", "a3"]) assert con.variable_names == ["a1", "a2", "a3", "b"] assert np.all(con.coefs == [[2, 0, 0, -1], [0, 0, 1, 0]]) assert np.all(con.constants == [[1], [0]]) def test_DesignInfo_deprecated_attributes(): d = DesignInfo(["a1", "a2"]) def check(attr): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") assert getattr(d, attr) is d assert len(w) == 1 assert w[0].category is DeprecationWarning check("builder") check("design_info") # Idea: format with a reasonable amount of precision, then if that turns out # to be higher than necessary, remove as many zeros as we can. But only do # this while we can do it to *all* the ordinarily-formatted numbers, to keep # decimal points aligned. def _format_float_column(precision, col): format_str = "%." + str(precision) + "f" assert col.ndim == 1 # We don't want to look at numbers like "1e-5" or "nan" when stripping. simple_float_chars = set("+-0123456789.") col_strs = np.array([format_str % (x,) for x in col], dtype=object) # Really every item should have a decimal, but just in case, we don't want # to strip zeros off the end of "10" or something like that. mask = np.array([simple_float_chars.issuperset(col_str) and "." in col_str for col_str in col_strs]) mask_idxes = np.nonzero(mask)[0] strip_char = "0" if np.any(mask): while True: if np.all([s.endswith(strip_char) for s in col_strs[mask]]): for idx in mask_idxes: col_strs[idx] = col_strs[idx][:-1] else: if strip_char == "0": strip_char = "." else: break return col_strs def test__format_float_column(): def t(precision, numbers, expected): got = _format_float_column(precision, np.asarray(numbers)) print(got, expected) assert np.array_equal(got, expected) # This acts weird on old python versions (e.g. it can be "-nan"), so don't # hardcode it: nan_string = "%.3f" % (np.nan,) t(3, [1, 2.1234, 2.1239, np.nan], ["1.000", "2.123", "2.124", nan_string]) t(3, [1, 2, 3, np.nan], ["1", "2", "3", nan_string]) t(3, [1.0001, 2, 3, np.nan], ["1", "2", "3", nan_string]) t(4, [1.0001, 2, 3, np.nan], ["1.0001", "2.0000", "3.0000", nan_string]) # http://docs.scipy.org/doc/numpy/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array class DesignMatrix(np.ndarray): """A simple numpy array subclass that carries design matrix metadata. .. attribute:: design_info A :class:`DesignInfo` object containing metadata about this design matrix. This class also defines a fancy __repr__ method with labeled columns. Otherwise it is identical to a regular numpy ndarray. .. warning:: You should never check for this class using :func:`isinstance`. Limitations of the numpy API mean that it is impossible to prevent the creation of numpy arrays that have type DesignMatrix, but that are not actually design matrices (and such objects will behave like regular ndarrays in every way). Instead, check for the presence of a ``.design_info`` attribute -- this will be present only on "real" DesignMatrix objects. """ def __new__(cls, input_array, design_info=None, default_column_prefix="column"): """Create a DesignMatrix, or cast an existing matrix to a DesignMatrix. A call like:: DesignMatrix(my_array) will convert an arbitrary array_like object into a DesignMatrix. The return from this function is guaranteed to be a two-dimensional ndarray with a real-valued floating point dtype, and a ``.design_info`` attribute which matches its shape. If the `design_info` argument is not given, then one is created via :meth:`DesignInfo.from_array` using the given `default_column_prefix`. Depending on the input array, it is possible this will pass through its input unchanged, or create a view. """ # Pass through existing DesignMatrixes. The design_info check is # necessary because numpy is sort of annoying and cannot be stopped # from turning non-design-matrix arrays into DesignMatrix # instances. (E.g., my_dm.diagonal() will return a DesignMatrix # object, but one without a design_info attribute.) if (isinstance(input_array, DesignMatrix) and hasattr(input_array, "design_info")): return input_array self = atleast_2d_column_default(input_array).view(cls) # Upcast integer to floating point if safe_issubdtype(self.dtype, np.integer): self = np.asarray(self, dtype=float).view(cls) if self.ndim > 2: raise ValueError("DesignMatrix must be 2d") assert self.ndim == 2 if design_info is None: design_info = DesignInfo.from_array(self, default_column_prefix) if len(design_info.column_names) != self.shape[1]: raise ValueError("wrong number of column names for design matrix " "(got %s, wanted %s)" % (len(design_info.column_names), self.shape[1])) self.design_info = design_info if not safe_issubdtype(self.dtype, np.floating): raise ValueError("design matrix must be real-valued floating point") return self __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): if not hasattr(self, "design_info"): # Not a real DesignMatrix p.pretty(np.asarray(self)) return assert not cycle # XX: could try calculating width of the current terminal window: # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python # sadly it looks like ipython does not actually pass this information # in, even if we use _repr_pretty_ -- the pretty-printer object has a # fixed width it always uses. (As of IPython 0.12.) MAX_TOTAL_WIDTH = 78 SEP = 2 INDENT = 2 MAX_ROWS = 30 PRECISION = 5 names = self.design_info.column_names column_name_widths = [len(name) for name in names] min_total_width = (INDENT + SEP * (self.shape[1] - 1) + np.sum(column_name_widths)) if min_total_width <= MAX_TOTAL_WIDTH: printable_part = np.asarray(self)[:MAX_ROWS, :] formatted_cols = [_format_float_column(PRECISION, printable_part[:, i]) for i in range(self.shape[1])] def max_width(col): assert col.ndim == 1 if not col.shape[0]: return 0 else: return max([len(s) for s in col]) column_num_widths = [max_width(col) for col in formatted_cols] column_widths = [max(name_width, num_width) for (name_width, num_width) in zip(column_name_widths, column_num_widths)] total_width = (INDENT + SEP * (self.shape[1] - 1) + np.sum(column_widths)) print_numbers = (total_width < MAX_TOTAL_WIDTH) else: print_numbers = False p.begin_group(INDENT, "DesignMatrix with shape %s" % (self.shape,)) p.breakable("\n" + " " * p.indentation) if print_numbers: # We can fit the numbers on the screen sep = " " * SEP # list() is for Py3 compatibility for row in [names] + list(zip(*formatted_cols)): cells = [cell.rjust(width) for (width, cell) in zip(column_widths, row)] p.text(sep.join(cells)) p.text("\n" + " " * p.indentation) if MAX_ROWS < self.shape[0]: p.text("[%s rows omitted]" % (self.shape[0] - MAX_ROWS,)) p.text("\n" + " " * p.indentation) else: p.begin_group(2, "Columns:") p.breakable("\n" + " " * p.indentation) p.pretty(names) p.end_group(2, "") p.breakable("\n" + " " * p.indentation) p.begin_group(2, "Terms:") p.breakable("\n" + " " * p.indentation) for term_name, span in six.iteritems(self.design_info.term_name_slices): if span.start != 0: p.breakable(", ") p.pretty(term_name) if span.stop - span.start == 1: coltext = "column %s" % (span.start,) else: coltext = "columns %s:%s" % (span.start, span.stop) p.text(" (%s)" % (coltext,)) p.end_group(2, "") if not print_numbers or self.shape[0] > MAX_ROWS: # some data was not shown p.breakable("\n" + " " * p.indentation) p.text("(to view full data, use np.asarray(this_obj))") p.end_group(INDENT, "") # No __array_finalize__ method, because we don't want slices of this # object to keep the design_info (they may have different columns!), or # anything fancy like that. __reduce__ = no_pickling def test_design_matrix(): import pytest di = DesignInfo(["a1", "a2", "a3", "b"]) mm = DesignMatrix([[12, 14, 16, 18]], di) assert mm.design_info.column_names == ["a1", "a2", "a3", "b"] bad_di = DesignInfo(["a1"]) pytest.raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di) mm2 = DesignMatrix([[12, 14, 16, 18]]) assert mm2.design_info.column_names == ["column0", "column1", "column2", "column3"] mm3 = DesignMatrix([12, 14, 16, 18]) assert mm3.shape == (4, 1) # DesignMatrix always has exactly 2 dimensions pytest.raises(ValueError, DesignMatrix, [[[1]]]) # DesignMatrix constructor passes through existing DesignMatrixes mm4 = DesignMatrix(mm) assert mm4 is mm # But not if they are really slices: mm5 = DesignMatrix(mm.diagonal()) assert mm5 is not mm mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x") assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"] assert_no_pickling(mm6) # Only real-valued matrices can be DesignMatrixs pytest.raises(ValueError, DesignMatrix, [1, 2, 3j]) pytest.raises(ValueError, DesignMatrix, ["a", "b", "c"]) pytest.raises(ValueError, DesignMatrix, [1, 2, object()]) # Just smoke tests repr(mm) repr(DesignMatrix(np.arange(100))) repr(DesignMatrix(np.arange(100) * 2.0)) repr(mm[1:, :]) repr(DesignMatrix(np.arange(100).reshape((1, 100)))) repr(DesignMatrix([np.nan, np.inf])) repr(DesignMatrix([np.nan, 0, 1e20, 20.5])) # handling of zero-size matrices repr(DesignMatrix(np.zeros((1, 0)))) repr(DesignMatrix(np.zeros((0, 1)))) repr(DesignMatrix(np.zeros((0, 0)))) patsy-0.5.2/patsy/eval.py000066400000000000000000000766721412400214200153270ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # Utilities that require an over-intimate knowledge of Python's execution # environment. # NB: if you add any __future__ imports to this file then you'll have to # adjust the tests that deal with checking the caller's execution environment # for __future__ flags! # These are made available in the patsy.* namespace __all__ = ["EvalEnvironment", "EvalFactor"] import sys import __future__ import inspect import tokenize import ast import numbers import six from patsy import PatsyError from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc def _all_future_flags(): flags = 0 for feature_name in __future__.all_feature_names: feature = getattr(__future__, feature_name) if feature.getMandatoryRelease() > sys.version_info: flags |= feature.compiler_flag return flags _ALL_FUTURE_FLAGS = _all_future_flags() # This is just a minimal dict-like object that does lookup in a 'stack' of # dicts -- first it checks the first, then the second, etc. Assignments go # into an internal, zeroth dict. class VarLookupDict(object): def __init__(self, dicts): self._dicts = [{}] + list(dicts) def __getitem__(self, key): for d in self._dicts: try: return d[key] except KeyError: pass raise KeyError(key) def __setitem__(self, key, value): self._dicts[0][key] = value def __contains__(self, key): try: self[key] except KeyError: return False else: return True def get(self, key, default=None): try: return self[key] except KeyError: return default def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._dicts) __getstate__ = no_pickling def test_VarLookupDict(): d1 = {"a": 1} d2 = {"a": 2, "b": 3} ds = VarLookupDict([d1, d2]) assert ds["a"] == 1 assert ds["b"] == 3 assert "a" in ds assert "c" not in ds import pytest pytest.raises(KeyError, ds.__getitem__, "c") ds["a"] = 10 assert ds["a"] == 10 assert d1["a"] == 1 assert ds.get("c") is None assert isinstance(repr(ds), six.string_types) assert_no_pickling(ds) def ast_names(code): """Iterator that yields all the (ast) names in a Python expression. :arg code: A string containing a Python expression. """ # Syntax that allows new name bindings to be introduced is tricky to # handle here, so we just refuse to do so. disallowed_ast_nodes = (ast.Lambda, ast.ListComp, ast.GeneratorExp) if sys.version_info >= (2, 7): disallowed_ast_nodes += (ast.DictComp, ast.SetComp) for node in ast.walk(ast.parse(code)): if isinstance(node, disallowed_ast_nodes): raise PatsyError("Lambda, list/dict/set comprehension, generator " "expression in patsy formula not currently supported.") if isinstance(node, ast.Name): yield node.id def test_ast_names(): test_data = [('np.log(x)', ['np', 'x']), ('x', ['x']), ('center(x + 1)', ['center', 'x']), ('dt.date.dt.month', ['dt'])] for code, expected in test_data: assert set(ast_names(code)) == set(expected) def test_ast_names_disallowed_nodes(): import pytest def list_ast_names(code): return list(ast_names(code)) pytest.raises(PatsyError, list_ast_names, "lambda x: x + y") pytest.raises(PatsyError, list_ast_names, "[x + 1 for x in range(10)]") pytest.raises(PatsyError, list_ast_names, "(x + 1 for x in range(10))") if sys.version_info >= (2, 7): pytest.raises(PatsyError, list_ast_names, "{x: True for x in range(10)}") pytest.raises(PatsyError, list_ast_names, "{x + 1 for x in range(10)}") class EvalEnvironment(object): """Represents a Python execution environment. Encapsulates a namespace for variable lookup and set of __future__ flags.""" def __init__(self, namespaces, flags=0): assert not flags & ~_ALL_FUTURE_FLAGS self._namespaces = list(namespaces) self.flags = flags @property def namespace(self): """A dict-like object that can be used to look up variables accessible from the encapsulated environment.""" return VarLookupDict(self._namespaces) def with_outer_namespace(self, outer_namespace): """Return a new EvalEnvironment with an extra namespace added. This namespace will be used only for variables that are not found in any existing namespace, i.e., it is "outside" them all.""" return self.__class__(self._namespaces + [outer_namespace], self.flags) def eval(self, expr, source_name="", inner_namespace={}): """Evaluate some Python code in the encapsulated environment. :arg expr: A string containing a Python expression. :arg source_name: A name for this string, for use in tracebacks. :arg inner_namespace: A dict-like object that will be checked first when `expr` attempts to access any variables. :returns: The value of `expr`. """ code = compile(expr, source_name, "eval", self.flags, False) return eval(code, {}, VarLookupDict([inner_namespace] + self._namespaces)) @classmethod def capture(cls, eval_env=0, reference=0): """Capture an execution environment from the stack. If `eval_env` is already an :class:`EvalEnvironment`, it is returned unchanged. Otherwise, we walk up the stack by ``eval_env + reference`` steps and capture that function's evaluation environment. For ``eval_env=0`` and ``reference=0``, the default, this captures the stack frame of the function that calls :meth:`capture`. If ``eval_env + reference`` is 1, then we capture that function's caller, etc. This somewhat complicated calling convention is designed to be convenient for functions which want to capture their caller's environment by default, but also allow explicit environments to be specified. See the second example. Example:: x = 1 this_env = EvalEnvironment.capture() assert this_env.namespace["x"] == 1 def child_func(): return EvalEnvironment.capture(1) this_env_from_child = child_func() assert this_env_from_child.namespace["x"] == 1 Example:: # This function can be used like: # my_model(formula_like, data) # -> evaluates formula_like in caller's environment # my_model(formula_like, data, eval_env=1) # -> evaluates formula_like in caller's caller's environment # my_model(formula_like, data, eval_env=my_env) # -> evaluates formula_like in environment 'my_env' def my_model(formula_like, data, eval_env=0): eval_env = EvalEnvironment.capture(eval_env, reference=1) return model_setup_helper(formula_like, data, eval_env) This is how :func:`dmatrix` works. .. versionadded: 0.2.0 The ``reference`` argument. """ if isinstance(eval_env, cls): return eval_env elif isinstance(eval_env, numbers.Integral): depth = eval_env + reference else: raise TypeError("Parameter 'eval_env' must be either an integer " "or an instance of patsy.EvalEnvironment.") frame = inspect.currentframe() try: for i in range(depth + 1): if frame is None: raise ValueError("call-stack is not that deep!") frame = frame.f_back return cls([frame.f_locals, frame.f_globals], frame.f_code.co_flags & _ALL_FUTURE_FLAGS) # The try/finally is important to avoid a potential reference cycle -- # any exception traceback will carry a reference to *our* frame, which # contains a reference to our local variables, which would otherwise # carry a reference to some parent frame, where the exception was # caught...: finally: del frame def subset(self, names): """Creates a new, flat EvalEnvironment that contains only the variables specified.""" vld = VarLookupDict(self._namespaces) new_ns = dict((name, vld[name]) for name in names) return EvalEnvironment([new_ns], self.flags) def _namespace_ids(self): return [id(n) for n in self._namespaces] def __eq__(self, other): return (isinstance(other, EvalEnvironment) and self.flags == other.flags and self._namespace_ids() == other._namespace_ids()) def __ne__(self, other): return not self == other def __hash__(self): return hash((EvalEnvironment, self.flags, tuple(self._namespace_ids()))) __getstate__ = no_pickling def _a(): # pragma: no cover _a = 1 return _b() def _b(): # pragma: no cover _b = 1 return _c() def _c(): # pragma: no cover _c = 1 return [EvalEnvironment.capture(), EvalEnvironment.capture(0), EvalEnvironment.capture(1), EvalEnvironment.capture(0, reference=1), EvalEnvironment.capture(2), EvalEnvironment.capture(0, 2), ] def test_EvalEnvironment_capture_namespace(): c0, c, b1, b2, a1, a2 = _a() assert "test_EvalEnvironment_capture_namespace" in c0.namespace assert "test_EvalEnvironment_capture_namespace" in c.namespace assert "test_EvalEnvironment_capture_namespace" in b1.namespace assert "test_EvalEnvironment_capture_namespace" in b2.namespace assert "test_EvalEnvironment_capture_namespace" in a1.namespace assert "test_EvalEnvironment_capture_namespace" in a2.namespace assert c0.namespace["_c"] == 1 assert c.namespace["_c"] == 1 assert b1.namespace["_b"] == 1 assert b2.namespace["_b"] == 1 assert a1.namespace["_a"] == 1 assert a2.namespace["_a"] == 1 assert b1.namespace["_c"] is _c assert b2.namespace["_c"] is _c import pytest pytest.raises(ValueError, EvalEnvironment.capture, 10 ** 6) assert EvalEnvironment.capture(b1) is b1 pytest.raises(TypeError, EvalEnvironment.capture, 1.2) assert_no_pickling(EvalEnvironment.capture()) def test_EvalEnvironment_capture_flags(): if sys.version_info >= (3,): # This is the only __future__ feature currently usable in Python # 3... fortunately it is probably not going anywhere. TEST_FEATURE = "barry_as_FLUFL" else: TEST_FEATURE = "division" test_flag = getattr(__future__, TEST_FEATURE).compiler_flag assert test_flag & _ALL_FUTURE_FLAGS source = ("def f():\n" " in_f = 'hi from f'\n" " global RETURN_INNER, RETURN_OUTER, RETURN_INNER_FROM_OUTER\n" " RETURN_INNER = EvalEnvironment.capture(0)\n" " RETURN_OUTER = call_capture_0()\n" " RETURN_INNER_FROM_OUTER = call_capture_1()\n" "f()\n") code = compile(source, "", "exec", 0, 1) env = {"EvalEnvironment": EvalEnvironment, "call_capture_0": lambda: EvalEnvironment.capture(0), "call_capture_1": lambda: EvalEnvironment.capture(1), } env2 = dict(env) six.exec_(code, env) assert env["RETURN_INNER"].namespace["in_f"] == "hi from f" assert env["RETURN_INNER_FROM_OUTER"].namespace["in_f"] == "hi from f" assert "in_f" not in env["RETURN_OUTER"].namespace assert env["RETURN_INNER"].flags & _ALL_FUTURE_FLAGS == 0 assert env["RETURN_OUTER"].flags & _ALL_FUTURE_FLAGS == 0 assert env["RETURN_INNER_FROM_OUTER"].flags & _ALL_FUTURE_FLAGS == 0 code2 = compile(("from __future__ import %s\n" % (TEST_FEATURE,)) + source, "", "exec", 0, 1) six.exec_(code2, env2) assert env2["RETURN_INNER"].namespace["in_f"] == "hi from f" assert env2["RETURN_INNER_FROM_OUTER"].namespace["in_f"] == "hi from f" assert "in_f" not in env2["RETURN_OUTER"].namespace assert env2["RETURN_INNER"].flags & _ALL_FUTURE_FLAGS == test_flag assert env2["RETURN_OUTER"].flags & _ALL_FUTURE_FLAGS == 0 assert env2["RETURN_INNER_FROM_OUTER"].flags & _ALL_FUTURE_FLAGS == test_flag def test_EvalEnvironment_eval_namespace(): env = EvalEnvironment([{"a": 1}]) assert env.eval("2 * a") == 2 assert env.eval("2 * a", inner_namespace={"a": 2}) == 4 import pytest pytest.raises(NameError, env.eval, "2 * b") a = 3 env2 = EvalEnvironment.capture(0) assert env2.eval("2 * a") == 6 env3 = env.with_outer_namespace({"a": 10, "b": 3}) assert env3.eval("2 * a") == 2 assert env3.eval("2 * b") == 6 def test_EvalEnvironment_eval_flags(): import pytest if sys.version_info >= (3,): # This joke __future__ statement replaces "!=" with "<>": # http://www.python.org/dev/peps/pep-0401/ test_flag = __future__.barry_as_FLUFL.compiler_flag assert test_flag & _ALL_FUTURE_FLAGS env = EvalEnvironment([{"a": 11}], flags=0) assert env.eval("a != 0") == True pytest.raises(SyntaxError, env.eval, "a <> 0") assert env.subset(["a"]).flags == 0 assert env.with_outer_namespace({"b": 10}).flags == 0 env2 = EvalEnvironment([{"a": 11}], flags=test_flag) assert env2.eval("a <> 0") == True pytest.raises(SyntaxError, env2.eval, "a != 0") assert env2.subset(["a"]).flags == test_flag assert env2.with_outer_namespace({"b": 10}).flags == test_flag else: test_flag = __future__.division.compiler_flag assert test_flag & _ALL_FUTURE_FLAGS env = EvalEnvironment([{"a": 11}], flags=0) assert env.eval("a / 2") == 11 // 2 == 5 assert env.subset(["a"]).flags == 0 assert env.with_outer_namespace({"b": 10}).flags == 0 env2 = EvalEnvironment([{"a": 11}], flags=test_flag) assert env2.eval("a / 2") == 11 * 1. / 2 != 5 env2.subset(["a"]).flags == test_flag assert env2.with_outer_namespace({"b": 10}).flags == test_flag def test_EvalEnvironment_subset(): env = EvalEnvironment([{"a": 1}, {"b": 2}, {"c": 3}]) subset_a = env.subset(["a"]) assert subset_a.eval("a") == 1 import pytest pytest.raises(NameError, subset_a.eval, "b") pytest.raises(NameError, subset_a.eval, "c") subset_bc = env.subset(["b", "c"]) assert subset_bc.eval("b * c") == 6 pytest.raises(NameError, subset_bc.eval, "a") def test_EvalEnvironment_eq(): # Two environments are eq only if they refer to exactly the same # global/local dicts env1 = EvalEnvironment.capture(0) env2 = EvalEnvironment.capture(0) assert env1 == env2 assert hash(env1) == hash(env2) capture_local_env = lambda: EvalEnvironment.capture(0) env3 = capture_local_env() env4 = capture_local_env() assert env3 != env4 _builtins_dict = {} six.exec_("from patsy.builtins import *", {}, _builtins_dict) # This is purely to make the existence of patsy.builtins visible to systems # like py2app and py2exe. It's basically free, since the above line guarantees # that patsy.builtins will be present in sys.modules in any case. import patsy.builtins class EvalFactor(object): def __init__(self, code, origin=None): """A factor class that executes arbitrary Python code and supports stateful transforms. :arg code: A string containing a Python expression, that will be evaluated to produce this factor's value. This is the standard factor class that is used when parsing formula strings and implements the standard stateful transform processing. See :ref:`stateful-transforms` and :ref:`expert-model-specification`. Two EvalFactor's are considered equal (e.g., for purposes of redundancy detection) if they contain the same token stream. Basically this means that the source code must be identical except for whitespace:: assert EvalFactor("a + b") == EvalFactor("a+b") assert EvalFactor("a + b") != EvalFactor("b + a") """ # For parsed formulas, the code will already have been normalized by # the parser. But let's normalize anyway, so we can be sure of having # consistent semantics for __eq__ and __hash__. self.code = normalize_token_spacing(code) self.origin = origin def name(self): return self.code def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.code) def __eq__(self, other): return (isinstance(other, EvalFactor) and self.code == other.code) def __ne__(self, other): return not self == other def __hash__(self): return hash((EvalFactor, self.code)) def memorize_passes_needed(self, state, eval_env): # 'state' is just an empty dict which we can do whatever we want with, # and that will be passed back to later memorize functions state["transforms"] = {} eval_env = eval_env.with_outer_namespace(_builtins_dict) env_namespace = eval_env.namespace subset_names = [name for name in ast_names(self.code) if name in env_namespace] eval_env = eval_env.subset(subset_names) state["eval_env"] = eval_env # example code: == "2 * center(x)" i = [0] def new_name_maker(token): value = eval_env.namespace.get(token) if hasattr(value, "__patsy_stateful_transform__"): obj_name = "_patsy_stobj%s__%s__" % (i[0], token) i[0] += 1 obj = value.__patsy_stateful_transform__() state["transforms"][obj_name] = obj return obj_name + ".transform" else: return token # example eval_code: == "2 * _patsy_stobj0__center__.transform(x)" eval_code = replace_bare_funcalls(self.code, new_name_maker) state["eval_code"] = eval_code # paranoia: verify that none of our new names appeared anywhere in the # original code if has_bare_variable_reference(state["transforms"], self.code): raise PatsyError("names of this form are reserved for " "internal use (%s)" % (token,), token.origin) # Pull out all the '_patsy_stobj0__center__.transform(x)' pieces # to make '_patsy_stobj0__center__.memorize_chunk(x)' pieces state["memorize_code"] = {} for obj_name in state["transforms"]: transform_calls = capture_obj_method_calls(obj_name, eval_code) assert len(transform_calls) == 1 transform_call = transform_calls[0] transform_call_name, transform_call_code = transform_call assert transform_call_name == obj_name + ".transform" assert transform_call_code.startswith(transform_call_name + "(") memorize_code = (obj_name + ".memorize_chunk" + transform_call_code[len(transform_call_name):]) state["memorize_code"][obj_name] = memorize_code # Then sort the codes into bins, so that every item in bin number i # depends only on items in bin (i-1) or less. (By 'depends', we mean # that in something like: # spline(center(x)) # we have to first run: # center.memorize_chunk(x) # then # center.memorize_finish(x) # and only then can we run: # spline.memorize_chunk(center.transform(x)) # Since all of our objects have unique names, figuring out who # depends on who is pretty easy -- we just check whether the # memorization code for spline: # spline.memorize_chunk(center.transform(x)) # mentions the variable 'center' (which in the example, of course, it # does). pass_bins = [] unsorted = set(state["transforms"]) while unsorted: pass_bin = set() for obj_name in unsorted: other_objs = unsorted.difference([obj_name]) memorize_code = state["memorize_code"][obj_name] if not has_bare_variable_reference(other_objs, memorize_code): pass_bin.add(obj_name) assert pass_bin unsorted.difference_update(pass_bin) pass_bins.append(pass_bin) state["pass_bins"] = pass_bins return len(pass_bins) def _eval(self, code, memorize_state, data): inner_namespace = VarLookupDict([data, memorize_state["transforms"]]) return call_and_wrap_exc("Error evaluating factor", self, memorize_state["eval_env"].eval, code, inner_namespace=inner_namespace) def memorize_chunk(self, state, which_pass, data): for obj_name in state["pass_bins"][which_pass]: self._eval(state["memorize_code"][obj_name], state, data) def memorize_finish(self, state, which_pass): for obj_name in state["pass_bins"][which_pass]: state["transforms"][obj_name].memorize_finish() def eval(self, memorize_state, data): return self._eval(memorize_state["eval_code"], memorize_state, data) __getstate__ = no_pickling def test_EvalFactor_basics(): e = EvalFactor("a+b") assert e.code == "a + b" assert e.name() == "a + b" e2 = EvalFactor("a +b", origin="asdf") assert e == e2 assert hash(e) == hash(e2) assert e.origin is None assert e2.origin == "asdf" assert_no_pickling(e) def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") bar = stateful_transform(lambda: "BAR-OBJ") quux = stateful_transform(lambda: "QUUX-OBJ") e = EvalFactor("foo(x) + bar(foo(y)) + quux(z, w)") state = {} eval_env = EvalEnvironment.capture(0) passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 for name in ["foo", "bar", "quux"]: assert state["eval_env"].namespace[name] is locals()[name] for name in ["w", "x", "y", "z", "e", "state"]: assert name not in state["eval_env"].namespace assert state["transforms"] == {"_patsy_stobj0__foo__": "FOO-OBJ", "_patsy_stobj1__bar__": "BAR-OBJ", "_patsy_stobj2__foo__": "FOO-OBJ", "_patsy_stobj3__quux__": "QUUX-OBJ"} assert (state["eval_code"] == "_patsy_stobj0__foo__.transform(x)" " + _patsy_stobj1__bar__.transform(" "_patsy_stobj2__foo__.transform(y))" " + _patsy_stobj3__quux__.transform(z, w)") assert (state["memorize_code"] == {"_patsy_stobj0__foo__": "_patsy_stobj0__foo__.memorize_chunk(x)", "_patsy_stobj1__bar__": "_patsy_stobj1__bar__.memorize_chunk(_patsy_stobj2__foo__.transform(y))", "_patsy_stobj2__foo__": "_patsy_stobj2__foo__.memorize_chunk(y)", "_patsy_stobj3__quux__": "_patsy_stobj3__quux__.memorize_chunk(z, w)", }) assert state["pass_bins"] == [set(["_patsy_stobj0__foo__", "_patsy_stobj2__foo__", "_patsy_stobj3__quux__"]), set(["_patsy_stobj1__bar__"])] class _MockTransform(object): # Adds up all memorized data, then subtracts that sum from each datum def __init__(self): self._sum = 0 self._memorize_chunk_called = 0 self._memorize_finish_called = 0 def memorize_chunk(self, data): self._memorize_chunk_called += 1 import numpy as np self._sum += np.sum(data) def memorize_finish(self): self._memorize_finish_called += 1 def transform(self, data): return data - self._sum def test_EvalFactor_end_to_end(): from patsy.state import stateful_transform foo = stateful_transform(_MockTransform) e = EvalFactor("foo(x) + foo(foo(y))") state = {} eval_env = EvalEnvironment.capture(0) passes = e.memorize_passes_needed(state, eval_env) print(passes) print(state) assert passes == 2 assert state["eval_env"].namespace["foo"] is foo for name in ["x", "y", "e", "state"]: assert name not in state["eval_env"].namespace import numpy as np e.memorize_chunk(state, 0, {"x": np.array([1, 2]), "y": np.array([10, 11])}) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 1 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 1 e.memorize_chunk(state, 0, {"x": np.array([12, -10]), "y": np.array([100, 3])}) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_chunk_called == 2 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_chunk_called == 2 assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_finish_called == 0 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_finish_called == 0 e.memorize_finish(state, 0) assert state["transforms"]["_patsy_stobj0__foo__"]._memorize_finish_called == 1 assert state["transforms"]["_patsy_stobj2__foo__"]._memorize_finish_called == 1 assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_chunk_called == 0 assert state["transforms"]["_patsy_stobj1__foo__"]._memorize_finish_called == 0 e.memorize_chunk(state, 1, {"x": np.array([1, 2]), "y": np.array([10, 11])}) e.memorize_chunk(state, 1, {"x": np.array([12, -10]), "y": np.array([100, 3])}) e.memorize_finish(state, 1) for transform in six.itervalues(state["transforms"]): assert transform._memorize_chunk_called == 2 assert transform._memorize_finish_called == 1 # sums: # 0: 1 + 2 + 12 + -10 == 5 # 2: 10 + 11 + 100 + 3 == 124 # 1: (10 - 124) + (11 - 124) + (100 - 124) + (3 - 124) == -372 # results: # 0: -4, -3, 7, -15 # 2: -114, -113, -24, -121 # 1: 258, 259, 348, 251 # 0 + 1: 254, 256, 355, 236 assert np.all(e.eval(state, {"x": np.array([1, 2, 12, -10]), "y": np.array([10, 11, 100, 3])}) == [254, 256, 355, 236]) def annotated_tokens(code): prev_was_dot = False it = PushbackAdapter(python_tokenize(code)) for (token_type, token, origin) in it: props = {} props["bare_ref"] = (not prev_was_dot and token_type == tokenize.NAME) props["bare_funcall"] = (props["bare_ref"] and it.has_more() and it.peek()[1] == "(") yield (token_type, token, origin, props) prev_was_dot = (token == ".") def test_annotated_tokens(): tokens_without_origins = [(token_type, token, props) for (token_type, token, origin, props) in (annotated_tokens("a(b) + c.d"))] assert (tokens_without_origins == [(tokenize.NAME, "a", {"bare_ref": True, "bare_funcall": True}), (tokenize.OP, "(", {"bare_ref": False, "bare_funcall": False}), (tokenize.NAME, "b", {"bare_ref": True, "bare_funcall": False}), (tokenize.OP, ")", {"bare_ref": False, "bare_funcall": False}), (tokenize.OP, "+", {"bare_ref": False, "bare_funcall": False}), (tokenize.NAME, "c", {"bare_ref": True, "bare_funcall": False}), (tokenize.OP, ".", {"bare_ref": False, "bare_funcall": False}), (tokenize.NAME, "d", {"bare_ref": False, "bare_funcall": False}), ]) # This was a bug: assert len(list(annotated_tokens("x"))) == 1 def has_bare_variable_reference(names, code): for (_, token, _, props) in annotated_tokens(code): if props["bare_ref"] and token in names: return True return False def replace_bare_funcalls(code, replacer): tokens = [] for (token_type, token, origin, props) in annotated_tokens(code): if props["bare_ref"] and props["bare_funcall"]: token = replacer(token) tokens.append((token_type, token)) return pretty_untokenize(tokens) def test_replace_bare_funcalls(): def replacer1(token): return {"a": "b", "foo": "_internal.foo.process"}.get(token, token) def t1(code, expected): replaced = replace_bare_funcalls(code, replacer1) print("%r -> %r" % (code, replaced)) print("(wanted %r)" % (expected,)) assert replaced == expected t1("foobar()", "foobar()") t1("a()", "b()") t1("foobar.a()", "foobar.a()") t1("foo()", "_internal.foo.process()") t1("a + 1", "a + 1") t1("b() + a() * x[foo(2 ** 3)]", "b() + b() * x[_internal.foo.process(2 ** 3)]") class _FuncallCapturer(object): # captures the next funcall def __init__(self, start_token_type, start_token): self.func = [start_token] self.tokens = [(start_token_type, start_token)] self.paren_depth = 0 self.started = False self.done = False def add_token(self, token_type, token): if self.done: return self.tokens.append((token_type, token)) if token in ["(", "{", "["]: self.paren_depth += 1 if token in [")", "}", "]"]: self.paren_depth -= 1 assert self.paren_depth >= 0 if not self.started: if token == "(": self.started = True else: assert token_type == tokenize.NAME or token == "." self.func.append(token) if self.started and self.paren_depth == 0: self.done = True # This is not a very general function -- it assumes that all references to the # given object are of the form '.something(method call)'. def capture_obj_method_calls(obj_name, code): capturers = [] for (token_type, token, origin, props) in annotated_tokens(code): for capturer in capturers: capturer.add_token(token_type, token) if props["bare_ref"] and token == obj_name: capturers.append(_FuncallCapturer(token_type, token)) return [("".join(capturer.func), pretty_untokenize(capturer.tokens)) for capturer in capturers] def test_capture_obj_method_calls(): assert (capture_obj_method_calls("foo", "a + foo.baz(bar) + b.c(d)") == [("foo.baz", "foo.baz(bar)")]) assert (capture_obj_method_calls("b", "a + foo.baz(bar) + b.c(d)") == [("b.c", "b.c(d)")]) assert (capture_obj_method_calls("foo", "foo.bar(foo.baz(quux))") == [("foo.bar", "foo.bar(foo.baz(quux))"), ("foo.baz", "foo.baz(quux)")]) assert (capture_obj_method_calls("bar", "foo[bar.baz(x(z[asdf])) ** 2]") == [("bar.baz", "bar.baz(x(z[asdf]))")]) patsy-0.5.2/patsy/highlevel.py000066400000000000000000000345741412400214200163420ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2013 Nathaniel Smith # See file LICENSE.txt for license information. # These are made available in the patsy.* namespace: __all__ = ["dmatrix", "dmatrices", "incr_dbuilder", "incr_dbuilders"] # problems: # statsmodels reluctant to pass around separate eval environment, suggesting # that design_and_matrices-equivalent should return a formula_like # is ModelDesc really the high-level thing? # ModelDesign doesn't work -- need to work with the builder set # want to be able to return either a matrix or a pandas dataframe import six import numpy as np from patsy import PatsyError from patsy.design_info import DesignMatrix, DesignInfo from patsy.eval import EvalEnvironment from patsy.desc import ModelDesc from patsy.build import (design_matrix_builders, build_design_matrices) from patsy.util import (have_pandas, asarray_or_pandas, atleast_2d_column_default) if have_pandas: import pandas # Tries to build a (lhs, rhs) design given a formula_like and an incremental # data source. If formula_like is not capable of doing this, then returns # None. def _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action): if isinstance(formula_like, DesignInfo): return (design_matrix_builders([[]], data_iter_maker, eval_env, NA_action)[0], formula_like) if (isinstance(formula_like, tuple) and len(formula_like) == 2 and isinstance(formula_like[0], DesignInfo) and isinstance(formula_like[1], DesignInfo)): return formula_like if hasattr(formula_like, "__patsy_get_model_desc__"): formula_like = formula_like.__patsy_get_model_desc__(eval_env) if not isinstance(formula_like, ModelDesc): raise PatsyError("bad value from %r.__patsy_get_model_desc__" % (formula_like,)) # fallthrough if not six.PY3 and isinstance(formula_like, unicode): # Included for the convenience of people who are using py2 with # __future__.unicode_literals. try: formula_like = formula_like.encode("ascii") except UnicodeEncodeError: raise PatsyError( "On Python 2, formula strings must be either 'str' objects, " "or else 'unicode' objects containing only ascii " "characters. You passed a unicode string with non-ascii " "characters. I'm afraid you'll have to either switch to " "ascii-only, or else upgrade to Python 3.") if isinstance(formula_like, str): formula_like = ModelDesc.from_formula(formula_like) # fallthrough if isinstance(formula_like, ModelDesc): assert isinstance(eval_env, EvalEnvironment) return design_matrix_builders([formula_like.lhs_termlist, formula_like.rhs_termlist], data_iter_maker, eval_env, NA_action) else: return None def incr_dbuilder(formula_like, data_iter_maker, eval_env=0, NA_action="drop"): """Construct a design matrix builder incrementally from a large data set. :arg formula_like: Similar to :func:`dmatrix`, except that explicit matrices are not allowed. Must be a formula string, a :class:`ModelDesc`, a :class:`DesignInfo`, or an object with a ``__patsy_get_model_desc__`` method. :arg data_iter_maker: A zero-argument callable which returns an iterator over dict-like data objects. This must be a callable rather than a simple iterator because sufficiently complex formulas may require multiple passes over the data (e.g. if there are nested stateful transforms). :arg eval_env: Either a :class:`EvalEnvironment` which will be used to look up any variables referenced in `formula_like` that cannot be found in `data`, or else a depth represented as an integer which will be passed to :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the context of the function calling :func:`incr_dbuilder` for lookups. If calling this function from a library, you probably want ``eval_env=1``, which means that variables should be resolved in *your* caller's namespace. :arg NA_action: An :class:`NAAction` object or string, used to determine what values count as 'missing' for purposes of determining the levels of categorical factors. :returns: A :class:`DesignInfo` Tip: for `data_iter_maker`, write a generator like:: def iter_maker(): for data_chunk in my_data_store: yield data_chunk and pass `iter_maker` (*not* `iter_maker()`). .. versionadded:: 0.2.0 The ``NA_action`` argument. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if design_infos is None: raise PatsyError("bad formula-like object") if len(design_infos[0].column_names) > 0: raise PatsyError("encountered outcome variables for a model " "that does not expect them") return design_infos[1] def incr_dbuilders(formula_like, data_iter_maker, eval_env=0, NA_action="drop"): """Construct two design matrix builders incrementally from a large data set. :func:`incr_dbuilders` is to :func:`incr_dbuilder` as :func:`dmatrices` is to :func:`dmatrix`. See :func:`incr_dbuilder` for details. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if design_infos is None: raise PatsyError("bad formula-like object") if len(design_infos[0].column_names) == 0: raise PatsyError("model is missing required outcome variables") return design_infos # This always returns a length-two tuple, # response, predictors # where # response is a DesignMatrix (possibly with 0 columns) # predictors is a DesignMatrix # The input 'formula_like' could be like: # (np.ndarray, np.ndarray) # (DesignMatrix, DesignMatrix) # (None, DesignMatrix) # np.ndarray # for predictor-only models # DesignMatrix # (None, np.ndarray) # "y ~ x" # ModelDesc(...) # DesignInfo # (DesignInfo, DesignInfo) # any object with a special method __patsy_get_model_desc__ def _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type): if return_type == "dataframe" and not have_pandas: raise PatsyError("pandas.DataFrame was requested, but pandas " "is not installed") if return_type not in ("matrix", "dataframe"): raise PatsyError("unrecognized output type %r, should be " "'matrix' or 'dataframe'" % (return_type,)) def data_iter_maker(): return iter([data]) design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action) if design_infos is not None: return build_design_matrices(design_infos, data, NA_action=NA_action, return_type=return_type) else: # No builders, but maybe we can still get matrices if isinstance(formula_like, tuple): if len(formula_like) != 2: raise PatsyError("don't know what to do with a length %s " "matrices tuple" % (len(formula_like),)) (lhs, rhs) = formula_like else: # subok=True is necessary here to allow DesignMatrixes to pass # through (lhs, rhs) = (None, asarray_or_pandas(formula_like, subok=True)) # some sort of explicit matrix or matrices were given. Currently we # have them in one of these forms: # -- an ndarray or subclass # -- a DesignMatrix # -- a pandas.Series # -- a pandas.DataFrame # and we have to produce a standard output format. def _regularize_matrix(m, default_column_prefix): di = DesignInfo.from_array(m, default_column_prefix) if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)): orig_index = m.index else: orig_index = None if return_type == "dataframe": m = atleast_2d_column_default(m, preserve_pandas=True) m = pandas.DataFrame(m) m.columns = di.column_names m.design_info = di return (m, orig_index) else: return (DesignMatrix(m, di), orig_index) rhs, rhs_orig_index = _regularize_matrix(rhs, "x") if lhs is None: lhs = np.zeros((rhs.shape[0], 0), dtype=float) lhs, lhs_orig_index = _regularize_matrix(lhs, "y") assert isinstance(getattr(lhs, "design_info", None), DesignInfo) assert isinstance(getattr(rhs, "design_info", None), DesignInfo) if lhs.shape[0] != rhs.shape[0]: raise PatsyError("shape mismatch: outcome matrix has %s rows, " "predictor matrix has %s rows" % (lhs.shape[0], rhs.shape[0])) if rhs_orig_index is not None and lhs_orig_index is not None: if not rhs_orig_index.equals(lhs_orig_index): raise PatsyError("index mismatch: outcome and " "predictor have incompatible indexes") if return_type == "dataframe": if rhs_orig_index is not None and lhs_orig_index is None: lhs.index = rhs.index if rhs_orig_index is None and lhs_orig_index is not None: rhs.index = lhs.index return (lhs, rhs) def dmatrix(formula_like, data={}, eval_env=0, NA_action="drop", return_type="matrix"): """Construct a single design matrix given a formula_like and data. :arg formula_like: An object that can be used to construct a design matrix. See below. :arg data: A dict-like object that can be used to look up variables referenced in `formula_like`. :arg eval_env: Either a :class:`EvalEnvironment` which will be used to look up any variables referenced in `formula_like` that cannot be found in `data`, or else a depth represented as an integer which will be passed to :meth:`EvalEnvironment.capture`. ``eval_env=0`` means to use the context of the function calling :func:`dmatrix` for lookups. If calling this function from a library, you probably want ``eval_env=1``, which means that variables should be resolved in *your* caller's namespace. :arg NA_action: What to do with rows that contain missing values. You can ``"drop"`` them, ``"raise"`` an error, or for customization, pass an :class:`NAAction` object. See :class:`NAAction` for details on what values count as 'missing' (and how to alter this). :arg return_type: Either ``"matrix"`` or ``"dataframe"``. See below. The `formula_like` can take a variety of forms. You can use any of the following: * (The most common option) A formula string like ``"x1 + x2"`` (for :func:`dmatrix`) or ``"y ~ x1 + x2"`` (for :func:`dmatrices`). For details see :ref:`formulas`. * A :class:`ModelDesc`, which is a Python object representation of a formula. See :ref:`formulas` and :ref:`expert-model-specification` for details. * A :class:`DesignInfo`. * An object that has a method called :meth:`__patsy_get_model_desc__`. For details see :ref:`expert-model-specification`. * A numpy array_like (for :func:`dmatrix`) or a tuple (array_like, array_like) (for :func:`dmatrices`). These will have metadata added, representation normalized, and then be returned directly. In this case `data` and `eval_env` are ignored. There is special handling for two cases: * :class:`DesignMatrix` objects will have their :class:`DesignInfo` preserved. This allows you to set up custom column names and term information even if you aren't using the rest of the patsy machinery. * :class:`pandas.DataFrame` or :class:`pandas.Series` objects will have their (row) indexes checked. If two are passed in, their indexes must be aligned. If ``return_type="dataframe"``, then their indexes will be preserved on the output. Regardless of the input, the return type is always either: * A :class:`DesignMatrix`, if ``return_type="matrix"`` (the default) * A :class:`pandas.DataFrame`, if ``return_type="dataframe"``. The actual contents of the design matrix is identical in both cases, and in both cases a :class:`DesignInfo` object will be available in a ``.design_info`` attribute on the return value. However, for ``return_type="dataframe"``, any pandas indexes on the input (either in `data` or directly passed through `formula_like`) will be preserved, which may be useful for e.g. time-series models. .. versionadded:: 0.2.0 The ``NA_action`` argument. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) if lhs.shape[1] != 0: raise PatsyError("encountered outcome variables for a model " "that does not expect them") return rhs def dmatrices(formula_like, data={}, eval_env=0, NA_action="drop", return_type="matrix"): """Construct two design matrices given a formula_like and data. This function is identical to :func:`dmatrix`, except that it requires (and returns) two matrices instead of one. By convention, the first matrix is the "outcome" or "y" data, and the second is the "predictor" or "x" data. See :func:`dmatrix` for details. """ eval_env = EvalEnvironment.capture(eval_env, reference=1) (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type) if lhs.shape[1] == 0: raise PatsyError("model is missing required outcome variables") return (lhs, rhs) patsy-0.5.2/patsy/infix_parser.py000066400000000000000000000230641412400214200170540ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # This file implements a simple "shunting yard algorithm" parser for infix # languages with parentheses. It is used as the core of our parser for # formulas, but is generic enough to be used for other purposes as well # (e.g. parsing linear constraints). It just builds a parse tree; semantics # are somebody else's problem. # # Plus it spends energy on tracking where each item in the parse tree comes # from, to allow high-quality error reporting. # # You are expected to provide an collection of Operators, a collection of # atomic types, and an iterator that provides Tokens. Each Operator should # have a unique token_type (which is an arbitrary Python object), and each # Token should have a matching token_type, or one of the special types # Token.LPAREN, Token.RPAREN. Each Token is required to have a valid Origin # attached, for error reporting. # XX: still seriously consider putting the magic intercept handling into the # tokenizer. we'd still need separate term-sets that get pasted together by ~ # to create the modeldesc, though... heck maybe we should just have a # modeldesc be 1-or-more termsets, with the convention that if it's 1, then # it's a rhs, and if it's 2, it's (lhs, rhs), and otherwise you're on your # own. Test: would this be useful for multiple-group log-linear models, # maybe? Answer: Perhaps. outcome ~ x1 + x2 ~ group. But lots of other # plausible, maybe better ways to write this -- (outcome | group) ~ x1 + x2? # "outcome ~ x1 + x2", group="group"? etc. from __future__ import print_function __all__ = ["Token", "ParseNode", "Operator", "parse"] from patsy import PatsyError from patsy.origin import Origin from patsy.util import (repr_pretty_delegate, repr_pretty_impl, no_pickling, assert_no_pickling) class _UniqueValue(object): def __init__(self, print_as): self._print_as = print_as def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._print_as) __getstate__ = no_pickling class Token(object): """A token with possible payload. .. attribute:: type An arbitrary object indicating the type of this token. Should be :term:`hashable`, but otherwise it can be whatever you like. """ LPAREN = _UniqueValue("LPAREN") RPAREN = _UniqueValue("RPAREN") def __init__(self, type, origin, extra=None): self.type = type self.origin = origin self.extra = extra __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): assert not cycle kwargs = [] if self.extra is not None: kwargs = [("extra", self.extra)] return repr_pretty_impl(p, self, [self.type, self.origin], kwargs) __getstate__ = no_pickling class ParseNode(object): def __init__(self, type, token, args, origin): self.type = type self.token = token self.args = args self.origin = origin __repr__ = repr_pretty_delegate def _repr_pretty_(self, p, cycle): return repr_pretty_impl(p, self, [self.type, self.token, self.args]) __getstate__ = no_pickling class Operator(object): def __init__(self, token_type, arity, precedence): self.token_type = token_type self.arity = arity self.precedence = precedence def __repr__(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, self.token_type, self.arity, self.precedence) __getstate__ = no_pickling class _StackOperator(object): def __init__(self, op, token): self.op = op self.token = token __getstate__ = no_pickling _open_paren = Operator(Token.LPAREN, -1, -9999999) class _ParseContext(object): def __init__(self, unary_ops, binary_ops, atomic_types, trace): self.op_stack = [] self.noun_stack = [] self.unary_ops = unary_ops self.binary_ops = binary_ops self.atomic_types = atomic_types self.trace = trace __getstate__ = no_pickling def _read_noun_context(token, c): if token.type == Token.LPAREN: if c.trace: print("Pushing open-paren") c.op_stack.append(_StackOperator(_open_paren, token)) return True elif token.type in c.unary_ops: if c.trace: print("Pushing unary op %r" % (token.type,)) c.op_stack.append(_StackOperator(c.unary_ops[token.type], token)) return True elif token.type in c.atomic_types: if c.trace: print("Pushing noun %r (%r)" % (token.type, token.extra)) c.noun_stack.append(ParseNode(token.type, token, [], token.origin)) return False else: raise PatsyError("expected a noun, not '%s'" % (token.origin.relevant_code(),), token) def _run_op(c): assert c.op_stack stackop = c.op_stack.pop() args = [] for i in range(stackop.op.arity): args.append(c.noun_stack.pop()) args.reverse() if c.trace: print("Reducing %r (%r)" % (stackop.op.token_type, args)) node = ParseNode(stackop.op.token_type, stackop.token, args, Origin.combine([stackop.token] + args)) c.noun_stack.append(node) def _read_op_context(token, c): if token.type == Token.RPAREN: if c.trace: print("Found close-paren") while c.op_stack and c.op_stack[-1].op.token_type != Token.LPAREN: _run_op(c) if not c.op_stack: raise PatsyError("missing '(' or extra ')'", token) assert c.op_stack[-1].op.token_type == Token.LPAREN # Expand the origin of the item on top of the noun stack to include # the open and close parens: combined = Origin.combine([c.op_stack[-1].token, c.noun_stack[-1].token, token]) c.noun_stack[-1].origin = combined # Pop the open-paren c.op_stack.pop() return False elif token.type in c.binary_ops: if c.trace: print("Found binary operator %r" % (token.type)) stackop = _StackOperator(c.binary_ops[token.type], token) while (c.op_stack and stackop.op.precedence <= c.op_stack[-1].op.precedence): _run_op(c) if c.trace: print("Pushing binary operator %r" % (token.type)) c.op_stack.append(stackop) return True else: raise PatsyError("expected an operator, not '%s'" % (token.origin.relevant_code(),), token) def infix_parse(tokens, operators, atomic_types, trace=False): token_source = iter(tokens) unary_ops = {} binary_ops = {} for op in operators: assert op.precedence > _open_paren.precedence if op.arity == 1: unary_ops[op.token_type] = op elif op.arity == 2: binary_ops[op.token_type] = op else: raise ValueError("operators must be unary or binary") c = _ParseContext(unary_ops, binary_ops, atomic_types, trace) # This is an implementation of Dijkstra's shunting yard algorithm: # http://en.wikipedia.org/wiki/Shunting_yard_algorithm # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm want_noun = True for token in token_source: if c.trace: print("Reading next token (want_noun=%r)" % (want_noun,)) if want_noun: want_noun = _read_noun_context(token, c) else: want_noun = _read_op_context(token, c) if c.trace: print("End of token stream") if want_noun: raise PatsyError("expected a noun, but instead the expression ended", c.op_stack[-1].token.origin) while c.op_stack: if c.op_stack[-1].op.token_type == Token.LPAREN: raise PatsyError("Unmatched '('", c.op_stack[-1].token) _run_op(c) assert len(c.noun_stack) == 1 return c.noun_stack.pop() # Much more thorough tests in parse_formula.py, this is just a smoke test: def test_infix_parse(): ops = [Operator("+", 2, 10), Operator("*", 2, 20), Operator("-", 1, 30)] atomic = ["ATOM1", "ATOM2"] # a + -b * (c + d) mock_origin = Origin("asdf", 2, 3) tokens = [Token("ATOM1", mock_origin, "a"), Token("+", mock_origin, "+"), Token("-", mock_origin, "-"), Token("ATOM2", mock_origin, "b"), Token("*", mock_origin, "*"), Token(Token.LPAREN, mock_origin, "("), Token("ATOM1", mock_origin, "c"), Token("+", mock_origin, "+"), Token("ATOM2", mock_origin, "d"), Token(Token.RPAREN, mock_origin, ")")] tree = infix_parse(tokens, ops, atomic) def te(tree, type, extra): assert tree.type == type assert tree.token.extra == extra te(tree, "+", "+") te(tree.args[0], "ATOM1", "a") assert tree.args[0].args == [] te(tree.args[1], "*", "*") te(tree.args[1].args[0], "-", "-") assert len(tree.args[1].args[0].args) == 1 te(tree.args[1].args[0].args[0], "ATOM2", "b") te(tree.args[1].args[1], "+", "+") te(tree.args[1].args[1].args[0], "ATOM1", "c") te(tree.args[1].args[1].args[1], "ATOM2", "d") import pytest # No ternary ops pytest.raises(ValueError, infix_parse, [], [Operator("+", 3, 10)], ["ATOMIC"]) # smoke test just to make sure there are no egregious bugs in 'trace' infix_parse(tokens, ops, atomic, trace=True) patsy-0.5.2/patsy/mgcv_cubic_splines.py000066400000000000000000001300611412400214200202150ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2014 GDF Suez, http://www.gdfsuez.com/ # See file LICENSE.txt for license information. # R package 'mgcv' compatible cubic spline basis functions # These are made available in the patsy.* namespace __all__ = ["cr", "cc", "te"] import numpy as np from patsy.util import (have_pandas, atleast_2d_column_default, no_pickling, assert_no_pickling, safe_string_eq) from patsy.state import stateful_transform if have_pandas: import pandas def _get_natural_f(knots): """Returns mapping of natural cubic spline values to 2nd derivatives. .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, pp 145-146 :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :return: A 2-d array mapping natural cubic spline values at knots to second derivatives. :raise ImportError: if scipy is not found, required for ``linalg.solve_banded()`` """ try: from scipy import linalg except ImportError: # pragma: no cover raise ImportError("Cubic spline functionality requires scipy.") h = knots[1:] - knots[:-1] diag = (h[:-1] + h[1:]) / 3. ul_diag = h[1:-1] / 6. banded_b = np.array([np.r_[0., ul_diag], diag, np.r_[ul_diag, 0.]]) d = np.zeros((knots.size - 2, knots.size)) for i in range(knots.size - 2): d[i, i] = 1. / h[i] d[i, i + 2] = 1. / h[i + 1] d[i, i + 1] = - d[i, i] - d[i, i + 2] fm = linalg.solve_banded((1, 1), banded_b, d) return np.vstack([np.zeros(knots.size), fm, np.zeros(knots.size)]) # Cyclic Cubic Regression Splines def _map_cyclic(x, lbound, ubound): """Maps values into the interval [lbound, ubound] in a cyclic fashion. :param x: The 1-d array values to be mapped. :param lbound: The lower bound of the interval. :param ubound: The upper bound of the interval. :return: A new 1-d array containing mapped x values. :raise ValueError: if lbound >= ubound. """ if lbound >= ubound: raise ValueError("Invalid argument: lbound (%r) should be " "less than ubound (%r)." % (lbound, ubound)) x = np.copy(x) x[x > ubound] = lbound + (x[x > ubound] - ubound) % (ubound - lbound) x[x < lbound] = ubound - (lbound - x[x < lbound]) % (ubound - lbound) return x def test__map_cyclic(): x = np.array([1.5, 2.6, 0.1, 4.4, 10.7]) x_orig = np.copy(x) expected_mapped_x = np.array([3.0, 2.6, 3.1, 2.9, 3.2]) mapped_x = _map_cyclic(x, 2.1, 3.6) assert np.allclose(x, x_orig) assert np.allclose(mapped_x, expected_mapped_x) def test__map_cyclic_errors(): import pytest x = np.linspace(0.2, 5.7, 10) pytest.raises(ValueError, _map_cyclic, x, 4.5, 3.6) pytest.raises(ValueError, _map_cyclic, x, 4.5, 4.5) def _get_cyclic_f(knots): """Returns mapping of cyclic cubic spline values to 2nd derivatives. .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, pp 146-147 :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :return: A 2-d array mapping cyclic cubic spline values at knots to second derivatives. """ h = knots[1:] - knots[:-1] n = knots.size - 1 b = np.zeros((n, n)) d = np.zeros((n, n)) b[0, 0] = (h[n - 1] + h[0]) / 3. b[0, n - 1] = h[n - 1] / 6. b[n - 1, 0] = h[n - 1] / 6. d[0, 0] = -1. / h[0] - 1. / h[n - 1] d[0, n - 1] = 1. / h[n - 1] d[n - 1, 0] = 1. / h[n - 1] for i in range(1, n): b[i, i] = (h[i - 1] + h[i]) / 3. b[i, i - 1] = h[i - 1] / 6. b[i - 1, i] = h[i - 1] / 6. d[i, i] = -1. / h[i - 1] - 1. / h[i] d[i, i - 1] = 1. / h[i - 1] d[i - 1, i] = 1. / h[i - 1] return np.linalg.solve(b, d) # Tensor Product def _row_tensor_product(dms): """Computes row-wise tensor product of given arguments. .. note:: Custom algorithm to precisely match what is done in 'mgcv', in particular look out for order of result columns! For reference implementation see 'mgcv' source code, file 'mat.c', mgcv_tensor_mm(), l.62 :param dms: A sequence of 2-d arrays (marginal design matrices). :return: The 2-d array row-wise tensor product of given arguments. :raise ValueError: if argument sequence is empty, does not contain only 2-d arrays or if the arrays number of rows does not match. """ if len(dms) == 0: raise ValueError("Tensor product arrays sequence should not be empty.") for dm in dms: if dm.ndim != 2: raise ValueError("Tensor product arguments should be 2-d arrays.") tp_nrows = dms[0].shape[0] tp_ncols = 1 for dm in dms: if dm.shape[0] != tp_nrows: raise ValueError("Tensor product arguments should have " "same number of rows.") tp_ncols *= dm.shape[1] tp = np.zeros((tp_nrows, tp_ncols)) tp[:, -dms[-1].shape[1]:] = dms[-1] filled_tp_ncols = dms[-1].shape[1] for dm in dms[-2::-1]: p = - filled_tp_ncols * dm.shape[1] for j in range(dm.shape[1]): xj = dm[:, j] for t in range(-filled_tp_ncols, 0): tp[:, p] = tp[:, t] * xj p += 1 filled_tp_ncols *= dm.shape[1] return tp def test__row_tensor_product_errors(): import pytest pytest.raises(ValueError, _row_tensor_product, []) pytest.raises(ValueError, _row_tensor_product, [np.arange(1, 5)]) pytest.raises(ValueError, _row_tensor_product, [np.arange(1, 5), np.arange(1, 5)]) pytest.raises(ValueError, _row_tensor_product, [np.arange(1, 13).reshape((3, 4)), np.arange(1, 13).reshape((4, 3))]) def test__row_tensor_product(): # Testing cases where main input array should not be modified dm1 = np.arange(1, 17).reshape((4, 4)) assert np.array_equal(_row_tensor_product([dm1]), dm1) ones = np.ones(4).reshape((4, 1)) tp1 = _row_tensor_product([ones, dm1]) assert np.array_equal(tp1, dm1) tp2 = _row_tensor_product([dm1, ones]) assert np.array_equal(tp2, dm1) # Testing cases where main input array should be scaled twos = 2 * ones tp3 = _row_tensor_product([twos, dm1]) assert np.array_equal(tp3, 2 * dm1) tp4 = _row_tensor_product([dm1, twos]) assert np.array_equal(tp4, 2 * dm1) # Testing main cases dm2 = np.array([[1, 2], [1, 2]]) dm3 = np.arange(1, 7).reshape((2, 3)) expected_tp5 = np.array([[1, 2, 3, 2, 4, 6], [4, 5, 6, 8, 10, 12]]) tp5 = _row_tensor_product([dm2, dm3]) assert np.array_equal(tp5, expected_tp5) expected_tp6 = np.array([[1, 2, 2, 4, 3, 6], [4, 8, 5, 10, 6, 12]]) tp6 = _row_tensor_product([dm3, dm2]) assert np.array_equal(tp6, expected_tp6) # Common code def _find_knots_lower_bounds(x, knots): """Finds knots lower bounds for given values. Returns an array of indices ``I`` such that ``0 <= I[i] <= knots.size - 2`` for all ``i`` and ``knots[I[i]] < x[i] <= knots[I[i] + 1]`` if ``np.min(knots) < x[i] <= np.max(knots)``, ``I[i] = 0`` if ``x[i] <= np.min(knots)`` ``I[i] = knots.size - 2`` if ``np.max(knots) < x[i]`` :param x: The 1-d array values whose knots lower bounds are to be found. :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :return: An array of knots lower bounds indices. """ lb = np.searchsorted(knots, x) - 1 # I[i] = 0 for x[i] <= np.min(knots) lb[lb == -1] = 0 # I[i] = knots.size - 2 for x[i] > np.max(knots) lb[lb == knots.size - 1] = knots.size - 2 return lb def _compute_base_functions(x, knots): """Computes base functions used for building cubic splines basis. .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, p. 146 and for the special treatment of ``x`` values outside ``knots`` range see 'mgcv' source code, file 'mgcv.c', function 'crspl()', l.249 :param x: The 1-d array values for which base functions should be computed. :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :return: 4 arrays corresponding to the 4 base functions ajm, ajp, cjm, cjp + the 1-d array of knots lower bounds indices corresponding to the given ``x`` values. """ j = _find_knots_lower_bounds(x, knots) h = knots[1:] - knots[:-1] hj = h[j] xj1_x = knots[j + 1] - x x_xj = x - knots[j] ajm = xj1_x / hj ajp = x_xj / hj cjm_3 = xj1_x * xj1_x * xj1_x / (6. * hj) cjm_3[x > np.max(knots)] = 0. cjm_1 = hj * xj1_x / 6. cjm = cjm_3 - cjm_1 cjp_3 = x_xj * x_xj * x_xj / (6. * hj) cjp_3[x < np.min(knots)] = 0. cjp_1 = hj * x_xj / 6. cjp = cjp_3 - cjp_1 return ajm, ajp, cjm, cjp, j def _absorb_constraints(design_matrix, constraints): """Absorb model parameters constraints into the design matrix. :param design_matrix: The (2-d array) initial design matrix. :param constraints: The 2-d array defining initial model parameters (``betas``) constraints (``np.dot(constraints, betas) = 0``). :return: The new design matrix with absorbed parameters constraints. :raise ImportError: if scipy is not found, used for ``scipy.linalg.qr()`` which is cleaner than numpy's version requiring a call like ``qr(..., mode='complete')`` to get a full QR decomposition. """ try: from scipy import linalg except ImportError: # pragma: no cover raise ImportError("Cubic spline functionality requires scipy.") m = constraints.shape[0] q, r = linalg.qr(np.transpose(constraints)) return np.dot(design_matrix, q[:, m:]) def _get_free_crs_dmatrix(x, knots, cyclic=False): """Builds an unconstrained cubic regression spline design matrix. Returns design matrix with dimensions ``len(x) x n`` for a cubic regression spline smoother where - ``n = len(knots)`` for natural CRS - ``n = len(knots) - 1`` for cyclic CRS .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, p. 145 :param x: The 1-d array values. :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :param cyclic: Indicates whether used cubic regression splines should be cyclic or not. Default is ``False``. :return: The (2-d array) design matrix. """ n = knots.size if cyclic: x = _map_cyclic(x, min(knots), max(knots)) n -= 1 ajm, ajp, cjm, cjp, j = _compute_base_functions(x, knots) j1 = j + 1 if cyclic: j1[j1 == n] = 0 i = np.identity(n) if cyclic: f = _get_cyclic_f(knots) else: f = _get_natural_f(knots) dmt = ajm * i[j, :].T + ajp * i[j1, :].T + \ cjm * f[j, :].T + cjp * f[j1, :].T return dmt.T def _get_crs_dmatrix(x, knots, constraints=None, cyclic=False): """Builds a cubic regression spline design matrix. Returns design matrix with dimensions len(x) x n where: - ``n = len(knots) - nrows(constraints)`` for natural CRS - ``n = len(knots) - nrows(constraints) - 1`` for cyclic CRS for a cubic regression spline smoother :param x: The 1-d array values. :param knots: The 1-d array knots used for cubic spline parametrization, must be sorted in ascending order. :param constraints: The 2-d array defining model parameters (``betas``) constraints (``np.dot(constraints, betas) = 0``). :param cyclic: Indicates whether used cubic regression splines should be cyclic or not. Default is ``False``. :return: The (2-d array) design matrix. """ dm = _get_free_crs_dmatrix(x, knots, cyclic) if constraints is not None: dm = _absorb_constraints(dm, constraints) return dm def _get_te_dmatrix(design_matrices, constraints=None): """Builds tensor product design matrix, given the marginal design matrices. :param design_matrices: A sequence of 2-d arrays (marginal design matrices). :param constraints: The 2-d array defining model parameters (``betas``) constraints (``np.dot(constraints, betas) = 0``). :return: The (2-d array) design matrix. """ dm = _row_tensor_product(design_matrices) if constraints is not None: dm = _absorb_constraints(dm, constraints) return dm # Stateful Transforms def _get_all_sorted_knots(x, n_inner_knots=None, inner_knots=None, lower_bound=None, upper_bound=None): """Gets all knots locations with lower and upper exterior knots included. If needed, inner knots are computed as equally spaced quantiles of the input data falling between given lower and upper bounds. :param x: The 1-d array data values. :param n_inner_knots: Number of inner knots to compute. :param inner_knots: Provided inner knots if any. :param lower_bound: The lower exterior knot location. If unspecified, the minimum of ``x`` values is used. :param upper_bound: The upper exterior knot location. If unspecified, the maximum of ``x`` values is used. :return: The array of ``n_inner_knots + 2`` distinct knots. :raise ValueError: for various invalid parameters sets or if unable to compute ``n_inner_knots + 2`` distinct knots. """ if lower_bound is None and x.size == 0: raise ValueError("Cannot set lower exterior knot location: empty " "input data and lower_bound not specified.") elif lower_bound is None and x.size != 0: lower_bound = np.min(x) if upper_bound is None and x.size == 0: raise ValueError("Cannot set upper exterior knot location: empty " "input data and upper_bound not specified.") elif upper_bound is None and x.size != 0: upper_bound = np.max(x) if upper_bound < lower_bound: raise ValueError("lower_bound > upper_bound (%r > %r)" % (lower_bound, upper_bound)) if inner_knots is None and n_inner_knots is not None: if n_inner_knots < 0: raise ValueError("Invalid requested number of inner knots: %r" % (n_inner_knots,)) x = x[(lower_bound <= x) & (x <= upper_bound)] x = np.unique(x) if x.size != 0: inner_knots_q = np.linspace(0, 100, n_inner_knots + 2)[1:-1] # .tolist() is necessary to work around a bug in numpy 1.8 inner_knots = np.asarray(np.percentile(x, inner_knots_q.tolist())) elif n_inner_knots == 0: inner_knots = np.array([]) else: raise ValueError("No data values between lower_bound(=%r) and " "upper_bound(=%r): cannot compute requested " "%r inner knot(s)." % (lower_bound, upper_bound, n_inner_knots)) elif inner_knots is not None: inner_knots = np.unique(inner_knots) if n_inner_knots is not None and n_inner_knots != inner_knots.size: raise ValueError("Needed number of inner knots=%r does not match " "provided number of inner knots=%r." % (n_inner_knots, inner_knots.size)) n_inner_knots = inner_knots.size if np.any(inner_knots < lower_bound): raise ValueError("Some knot values (%s) fall below lower bound " "(%r)." % (inner_knots[inner_knots < lower_bound], lower_bound)) if np.any(inner_knots > upper_bound): raise ValueError("Some knot values (%s) fall above upper bound " "(%r)." % (inner_knots[inner_knots > upper_bound], upper_bound)) else: raise ValueError("Must specify either 'n_inner_knots' or 'inner_knots'.") all_knots = np.concatenate(([lower_bound, upper_bound], inner_knots)) all_knots = np.unique(all_knots) if all_knots.size != n_inner_knots + 2: raise ValueError("Unable to compute n_inner_knots(=%r) + 2 distinct " "knots: %r data value(s) found between " "lower_bound(=%r) and upper_bound(=%r)." % (n_inner_knots, x.size, lower_bound, upper_bound)) return all_knots def test__get_all_sorted_knots(): import pytest pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), -1) pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0) pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=1) pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, upper_bound=5) pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=3, upper_bound=1) assert np.array_equal( _get_all_sorted_knots(np.array([]), 0, lower_bound=1, upper_bound=5), [1, 5]) pytest.raises(ValueError, _get_all_sorted_knots, np.array([]), 0, lower_bound=1, upper_bound=1) x = np.arange(6) * 2 pytest.raises(ValueError, _get_all_sorted_knots, x, -2) assert np.array_equal( _get_all_sorted_knots(x, 0), [0, 10]) assert np.array_equal( _get_all_sorted_knots(x, 0, lower_bound=3, upper_bound=8), [3, 8]) assert np.array_equal( _get_all_sorted_knots(x, 2, lower_bound=1, upper_bound=9), [1, 4, 6, 9]) pytest.raises(ValueError, _get_all_sorted_knots, x, 2, lower_bound=1, upper_bound=3) pytest.raises(ValueError, _get_all_sorted_knots, x, 1, lower_bound=1.3, upper_bound=1.4) assert np.array_equal( _get_all_sorted_knots(x, 1, lower_bound=1, upper_bound=3), [1, 2, 3]) pytest.raises(ValueError, _get_all_sorted_knots, x, 1, lower_bound=2, upper_bound=3) pytest.raises(ValueError, _get_all_sorted_knots, x, 1, inner_knots=[2, 3]) pytest.raises(ValueError, _get_all_sorted_knots, x, lower_bound=2, upper_bound=3) assert np.array_equal( _get_all_sorted_knots(x, inner_knots=[3, 7]), [0, 3, 7, 10]) assert np.array_equal( _get_all_sorted_knots(x, inner_knots=[3, 7], lower_bound=2), [2, 3, 7, 10]) pytest.raises(ValueError, _get_all_sorted_knots, x, inner_knots=[3, 7], lower_bound=4) pytest.raises(ValueError, _get_all_sorted_knots, x, inner_knots=[3, 7], upper_bound=6) def _get_centering_constraint_from_dmatrix(design_matrix): """ Computes the centering constraint from the given design matrix. We want to ensure that if ``b`` is the array of parameters, our model is centered, ie ``np.mean(np.dot(design_matrix, b))`` is zero. We can rewrite this as ``np.dot(c, b)`` being zero with ``c`` a 1-row constraint matrix containing the mean of each column of ``design_matrix``. :param design_matrix: The 2-d array design matrix. :return: A 2-d array (1 x ncols(design_matrix)) defining the centering constraint. """ return design_matrix.mean(axis=0).reshape((1, design_matrix.shape[1])) class CubicRegressionSpline(object): """Base class for cubic regression spline stateful transforms This class contains all the functionality for the following stateful transforms: - ``cr(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None)`` for natural cubic regression spline - ``cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None)`` for cyclic cubic regression spline """ common_doc = """ :arg df: The number of degrees of freedom to use for this spline. The return value will have this many columns. You must specify at least one of ``df`` and ``knots``. :arg knots: The interior knots to use for the spline. If unspecified, then equally spaced quantiles of the input data are used. You must specify at least one of ``df`` and ``knots``. :arg lower_bound: The lower exterior knot location. :arg upper_bound: The upper exterior knot location. :arg constraints: Either a 2-d array defining general linear constraints (that is ``np.dot(constraints, betas)`` is zero, where ``betas`` denotes the array of *initial* parameters, corresponding to the *initial* unconstrained design matrix), or the string ``'center'`` indicating that we should apply a centering constraint (this constraint will be computed from the input data, remembered and re-used for prediction from the fitted model). The constraints are absorbed in the resulting design matrix which means that the model is actually rewritten in terms of *unconstrained* parameters. For more details see :ref:`spline-regression`. This is a stateful transforms (for details see :ref:`stateful-transforms`). If ``knots``, ``lower_bound``, or ``upper_bound`` are not specified, they will be calculated from the data and then the chosen values will be remembered and re-used for prediction from the fitted model. Using this function requires scipy be installed. .. versionadded:: 0.3.0 """ def __init__(self, name, cyclic): self._name = name self._cyclic = cyclic self._tmp = {} self._all_knots = None self._constraints = None def memorize_chunk(self, x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None): args = {"df": df, "knots": knots, "lower_bound": lower_bound, "upper_bound": upper_bound, "constraints": constraints, } self._tmp["args"] = args x = np.atleast_1d(x) if x.ndim == 2 and x.shape[1] == 1: x = x[:, 0] if x.ndim > 1: raise ValueError("Input to %r must be 1-d, " "or a 2-d column vector." % (self._name,)) self._tmp.setdefault("xs", []).append(x) def memorize_finish(self): args = self._tmp["args"] xs = self._tmp["xs"] # Guards against invalid subsequent memorize_chunk() calls. del self._tmp x = np.concatenate(xs) if args["df"] is None and args["knots"] is None: raise ValueError("Must specify either 'df' or 'knots'.") constraints = args["constraints"] n_constraints = 0 if constraints is not None: if safe_string_eq(constraints, "center"): # Here we collect only number of constraints, # actual centering constraint will be computed after all_knots n_constraints = 1 else: constraints = np.atleast_2d(constraints) if constraints.ndim != 2: raise ValueError("Constraints must be 2-d array or " "1-d vector.") n_constraints = constraints.shape[0] n_inner_knots = None if args["df"] is not None: min_df = 1 if not self._cyclic and n_constraints == 0: min_df = 2 if args["df"] < min_df: raise ValueError("'df'=%r must be greater than or equal to %r." % (args["df"], min_df)) n_inner_knots = args["df"] - 2 + n_constraints if self._cyclic: n_inner_knots += 1 self._all_knots = _get_all_sorted_knots(x, n_inner_knots=n_inner_knots, inner_knots=args["knots"], lower_bound=args["lower_bound"], upper_bound=args["upper_bound"]) if constraints is not None: if safe_string_eq(constraints, "center"): # Now we can compute centering constraints constraints = _get_centering_constraint_from_dmatrix( _get_free_crs_dmatrix(x, self._all_knots, cyclic=self._cyclic) ) df_before_constraints = self._all_knots.size if self._cyclic: df_before_constraints -= 1 if constraints.shape[1] != df_before_constraints: raise ValueError("Constraints array should have %r columns but" " %r found." % (df_before_constraints, constraints.shape[1])) self._constraints = constraints def transform(self, x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None): x_orig = x x = np.atleast_1d(x) if x.ndim == 2 and x.shape[1] == 1: x = x[:, 0] if x.ndim > 1: raise ValueError("Input to %r must be 1-d, " "or a 2-d column vector." % (self._name,)) dm = _get_crs_dmatrix(x, self._all_knots, self._constraints, cyclic=self._cyclic) if have_pandas: if isinstance(x_orig, (pandas.Series, pandas.DataFrame)): dm = pandas.DataFrame(dm) dm.index = x_orig.index return dm __getstate__ = no_pickling class CR(CubicRegressionSpline): """cr(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) Generates a natural cubic spline basis for ``x`` (with the option of absorbing centering or more general parameters constraints), allowing non-linear fits. The usual usage is something like:: y ~ 1 + cr(x, df=5, constraints='center') to fit ``y`` as a smooth function of ``x``, with 5 degrees of freedom given to the smooth, and centering constraint absorbed in the resulting design matrix. Note that in this example, due to the centering constraint, 6 knots will get computed from the input data ``x`` to achieve 5 degrees of freedom. .. note:: This function reproduce the cubic regression splines 'cr' and 'cs' as implemented in the R package 'mgcv' (GAM modelling). """ # Under python -OO, __doc__ will be defined but set to None if __doc__: __doc__ += CubicRegressionSpline.common_doc def __init__(self): CubicRegressionSpline.__init__(self, name='cr', cyclic=False) cr = stateful_transform(CR) class CC(CubicRegressionSpline): """cc(x, df=None, knots=None, lower_bound=None, upper_bound=None, constraints=None) Generates a cyclic cubic spline basis for ``x`` (with the option of absorbing centering or more general parameters constraints), allowing non-linear fits. The usual usage is something like:: y ~ 1 + cc(x, df=7, constraints='center') to fit ``y`` as a smooth function of ``x``, with 7 degrees of freedom given to the smooth, and centering constraint absorbed in the resulting design matrix. Note that in this example, due to the centering and cyclic constraints, 9 knots will get computed from the input data ``x`` to achieve 7 degrees of freedom. .. note:: This function reproduce the cubic regression splines 'cc' as implemented in the R package 'mgcv' (GAM modelling). """ # Under python -OO, __doc__ will be defined but set to None if __doc__: __doc__ += CubicRegressionSpline.common_doc def __init__(self): CubicRegressionSpline.__init__(self, name='cc', cyclic=True) cc = stateful_transform(CC) def test_crs_errors(): import pytest # Invalid 'x' shape pytest.raises(ValueError, cr, np.arange(16).reshape((4, 4)), df=4) pytest.raises(ValueError, CR().transform, np.arange(16).reshape((4, 4)), df=4) # Should provide at least 'df' or 'knots' pytest.raises(ValueError, cr, np.arange(50)) # Invalid constraints shape pytest.raises(ValueError, cr, np.arange(50), df=4, constraints=np.arange(27).reshape((3, 3, 3))) # Invalid nb of columns in constraints # (should have df + 1 = 5, but 6 provided) pytest.raises(ValueError, cr, np.arange(50), df=4, constraints=np.arange(6)) # Too small 'df' for natural cubic spline pytest.raises(ValueError, cr, np.arange(50), df=1) # Too small 'df' for cyclic cubic spline pytest.raises(ValueError, cc, np.arange(50), df=0) def test_crs_compat(): from patsy.test_state import check_stateful from patsy.test_splines_crs_data import (R_crs_test_x, R_crs_test_data, R_crs_num_tests) lines = R_crs_test_data.split("\n") tests_ran = 0 start_idx = lines.index("--BEGIN TEST CASE--") while True: if not lines[start_idx] == "--BEGIN TEST CASE--": break start_idx += 1 stop_idx = lines.index("--END TEST CASE--", start_idx) block = lines[start_idx:stop_idx] test_data = {} for line in block: key, value = line.split("=", 1) test_data[key] = value # Translate the R output into Python calling conventions adjust_df = 0 if test_data["spline_type"] == "cr" or test_data["spline_type"] == "cs": spline_type = CR elif test_data["spline_type"] == "cc": spline_type = CC adjust_df += 1 else: raise ValueError("Unrecognized spline type %r" % (test_data["spline_type"],)) kwargs = {} if test_data["absorb_cons"] == "TRUE": kwargs["constraints"] = "center" adjust_df += 1 if test_data["knots"] != "None": all_knots = np.asarray(eval(test_data["knots"])) all_knots.sort() kwargs["knots"] = all_knots[1:-1] kwargs["lower_bound"] = all_knots[0] kwargs["upper_bound"] = all_knots[-1] else: kwargs["df"] = eval(test_data["nb_knots"]) - adjust_df output = np.asarray(eval(test_data["output"])) # Do the actual test check_stateful(spline_type, False, R_crs_test_x, output, **kwargs) tests_ran += 1 # Set up for the next one start_idx = stop_idx + 1 assert tests_ran == R_crs_num_tests test_crs_compat.slow = True def test_crs_with_specific_constraint(): from patsy.highlevel import incr_dbuilder, build_design_matrices, dmatrix x = (-1.5)**np.arange(20) # Hard coded R values for smooth: s(x, bs="cr", k=5) # R> knots <- smooth$xp knots_R = np.array([-2216.837820053100585937, -50.456909179687500000, -0.250000000000000000, 33.637939453125000000, 1477.891880035400390625]) # R> centering.constraint <- t(qr.X(attr(smooth, "qrc"))) centering_constraint_R = np.array([[0.064910676323168478574, 1.4519875239407085132, -2.1947446912471946234, 1.6129783104357671153, 0.064868180547550072235]]) # values for which we want a prediction new_x = np.array([-3000., -200., 300., 2000.]) result1 = dmatrix("cr(new_x, knots=knots_R[1:-1], " "lower_bound=knots_R[0], upper_bound=knots_R[-1], " "constraints=centering_constraint_R)") data_chunked = [{"x": x[:10]}, {"x": x[10:]}] new_data = {"x": new_x} builder = incr_dbuilder("cr(x, df=4, constraints='center')", lambda: iter(data_chunked)) result2 = build_design_matrices([builder], new_data)[0] assert np.allclose(result1, result2, rtol=1e-12, atol=0.) class TE(object): """te(s1, .., sn, constraints=None) Generates smooth of several covariates as a tensor product of the bases of marginal univariate smooths ``s1, .., sn``. The marginal smooths are required to transform input univariate data into some kind of smooth functions basis producing a 2-d array output with the ``(i, j)`` element corresponding to the value of the ``j`` th basis function at the ``i`` th data point. The resulting basis dimension is the product of the basis dimensions of the marginal smooths. The usual usage is something like:: y ~ 1 + te(cr(x1, df=5), cc(x2, df=6), constraints='center') to fit ``y`` as a smooth function of both ``x1`` and ``x2``, with a natural cubic spline for ``x1`` marginal smooth and a cyclic cubic spline for ``x2`` (and centering constraint absorbed in the resulting design matrix). :arg constraints: Either a 2-d array defining general linear constraints (that is ``np.dot(constraints, betas)`` is zero, where ``betas`` denotes the array of *initial* parameters, corresponding to the *initial* unconstrained design matrix), or the string ``'center'`` indicating that we should apply a centering constraint (this constraint will be computed from the input data, remembered and re-used for prediction from the fitted model). The constraints are absorbed in the resulting design matrix which means that the model is actually rewritten in terms of *unconstrained* parameters. For more details see :ref:`spline-regression`. Using this function requires scipy be installed. .. note:: This function reproduce the tensor product smooth 'te' as implemented in the R package 'mgcv' (GAM modelling). See also 'Generalized Additive Models', Simon N. Wood, 2006, pp 158-163 .. versionadded:: 0.3.0 """ def __init__(self): self._tmp = {} self._constraints = None def memorize_chunk(self, *args, **kwargs): constraints = self._tmp.setdefault("constraints", kwargs.get("constraints")) if safe_string_eq(constraints, "center"): args_2d = [] for arg in args: arg = atleast_2d_column_default(arg) if arg.ndim != 2: raise ValueError("Each tensor product argument must be " "a 2-d array or 1-d vector.") args_2d.append(arg) tp = _row_tensor_product(args_2d) self._tmp.setdefault("count", 0) self._tmp["count"] += tp.shape[0] chunk_sum = np.atleast_2d(tp.sum(axis=0)) self._tmp.setdefault("sum", np.zeros(chunk_sum.shape)) self._tmp["sum"] += chunk_sum def memorize_finish(self): tmp = self._tmp constraints = self._tmp["constraints"] # Guards against invalid subsequent memorize_chunk() calls. del self._tmp if constraints is not None: if safe_string_eq(constraints, "center"): constraints = np.atleast_2d(tmp["sum"] / tmp["count"]) else: constraints = np.atleast_2d(constraints) if constraints.ndim != 2: raise ValueError("Constraints must be 2-d array or " "1-d vector.") self._constraints = constraints def transform(self, *args, **kwargs): args_2d = [] for arg in args: arg = atleast_2d_column_default(arg) if arg.ndim != 2: raise ValueError("Each tensor product argument must be " "a 2-d array or 1-d vector.") args_2d.append(arg) return _get_te_dmatrix(args_2d, self._constraints) __getstate__ = no_pickling te = stateful_transform(TE) def test_te_errors(): import pytest x = np.arange(27) # Invalid input shape pytest.raises(ValueError, te, x.reshape((3, 3, 3))) pytest.raises(ValueError, te, x.reshape((3, 3, 3)), constraints='center') # Invalid constraints shape pytest.raises(ValueError, te, x, constraints=np.arange(8).reshape((2, 2, 2))) def test_te_1smooth(): from patsy.splines import bs # Tensor product of 1 smooth covariate should be the same # as the smooth alone x = (-1.5)**np.arange(20) assert np.allclose(cr(x, df=6), te(cr(x, df=6))) assert np.allclose(cc(x, df=5), te(cc(x, df=5))) assert np.allclose(bs(x, df=4), te(bs(x, df=4))) # Adding centering constraint to tensor product assert np.allclose(cr(x, df=3, constraints='center'), te(cr(x, df=4), constraints='center')) # Adding specific constraint center_constraint = np.arange(1, 5) assert np.allclose(cr(x, df=3, constraints=center_constraint), te(cr(x, df=4), constraints=center_constraint)) def test_te_2smooths(): from patsy.highlevel import incr_dbuilder, build_design_matrices x1 = (-1.5)**np.arange(20) x2 = (1.6)**np.arange(20) # Hard coded R results for smooth: te(x1, x2, bs=c("cs", "cc"), k=c(5,7)) # Without centering constraint: dmatrix_R_nocons = \ np.array([[-4.4303024184609255207e-06, 7.9884438387230142235e-06, 9.7987758194797719025e-06, -7.2894213245475212959e-08, 1.5907686862964493897e-09, -3.2565884983072595159e-11, 0.0170749607855874667439, -3.0788499835965849050e-02, -3.7765754357352458725e-02, 2.8094376299826799787e-04, -6.1310290747349201414e-06, 1.2551314933193442915e-07, -0.26012671685838206770, 4.6904420337437874311e-01, 0.5753384627946153129230, -4.2800085814700449330e-03, 9.3402525733484874533e-05, -1.9121170389937518131e-06, -0.0904312240489447832781, 1.6305991924427923334e-01, 2.0001237112941641638e-01, -1.4879148887003382663e-03, 3.2470731316462736135e-05, -6.6473404365914134499e-07, 2.0447857920168824846e-05, -3.6870296695050991799e-05, -4.5225801045409022233e-05, 3.3643990293641665710e-07, -7.3421200200015877329e-09, 1.5030635073660743297e-10], [-9.4006130602653794302e-04, 7.8681398069163730347e-04, 2.4573006857381437217e-04, -1.4524712230452725106e-04, 7.8216741353106329551e-05, -3.1304283003914264551e-04, 3.6231183382798337611064, -3.0324832476174168328e+00, -9.4707559178211142559e-01, 5.5980126937492580286e-01, -3.0145747744342332730e-01, 1.2065077148806895302e+00, -35.17561267504181188315, 2.9441339255948005160e+01, 9.1948319320782125885216, -5.4349184288245195873e+00, 2.9267472035096449012e+00, -1.1713569391233907169e+01, 34.0275626863976370373166, -2.8480442582712722555e+01, -8.8947340548151565542e+00, 5.2575353623762932642e+00, -2.8312249982592527786e+00, 1.1331265795534763541e+01, 7.9462158845078978420e-01, -6.6508361863670617531e-01, -2.0771242914526857892e-01, 1.2277550230353953542e-01, -6.6115593588420035198e-02, 2.6461103043402139923e-01]]) # With centering constraint: dmatrix_R_cons = \ np.array([[0.00329998606323867252343, 1.6537431155796576600e-04, -1.2392262709790753433e-04, 6.5405304166706783407e-05, -6.6764045799537624095e-05, -0.1386431081763726258504, 0.124297283800864313830, -3.5487293655619825405e-02, -3.0527115315785902268e-03, 5.2009247643311604277e-04, -0.00384203992301702674378, -0.058901915802819435064, 0.266422358491648914036, 0.5739281693874087597607, -1.3171008503525844392e-03, 8.2573456631878912413e-04, 6.6730833453016958831e-03, -0.1467677784718444955470, 0.220757650934837484913, 0.1983127687880171796664, -1.6269930328365173316e-03, -1.7785892412241208812e-03, -3.2702835436351201243e-03, -4.3252183044300757109e-02, 4.3403766976235179376e-02, 3.5973406402893762387e-05, -5.4035858568225075046e-04, 2.9565209382794241247e-04, -2.2769990750264097637e-04], [0.41547954838956052681098, 1.9843570584107707994e-02, -1.5746590234791378593e-02, 8.3171184312221431434e-03, -8.7233014052017516377e-03, -15.9926770785086258541696, 16.503663226274017716833, -6.6005803955894726265e-01, 1.3986092022708346283e-01, -2.3516913533670955050e-01, 0.72251037497207359905360, -9.827337059999853963177, 3.917078117294827688255, 9.0171773596973618936090, -5.0616811270787671617e+00, 3.0189990249009683865e+00, -1.0872720629943064097e+01, 26.9308504460453121964747, -21.212262927009287949431, -9.1088328555582247503253, 5.2400156972500298025e+00, -3.0593641098325474736e+00, 1.0919392118399086300e+01, -4.6564290223265718538e+00, 4.8071307441606982991e+00, -1.9748377005689798924e-01, 5.4664183716965096538e-02, -2.8871392916916285148e-02, 2.3592766838010845176e-01]]) new_x1 = np.array([11.390625, 656.84083557128906250]) new_x2 = np.array([16.777216000000006346, 1844.6744073709567147]) new_data = {"x1": new_x1, "x2": new_x2} data_chunked = [{"x1": x1[:10], "x2": x2[:10]}, {"x1": x1[10:], "x2": x2[10:]}] builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6)) - 1", lambda: iter(data_chunked)) dmatrix_nocons = build_design_matrices([builder], new_data)[0] assert np.allclose(dmatrix_nocons, dmatrix_R_nocons, rtol=1e-12, atol=0.) builder = incr_dbuilder("te(cr(x1, df=5), cc(x2, df=6), " "constraints='center') - 1", lambda: iter(data_chunked)) dmatrix_cons = build_design_matrices([builder], new_data)[0] assert np.allclose(dmatrix_cons, dmatrix_R_cons, rtol=1e-12, atol=0.) def test_te_3smooths(): from patsy.highlevel import incr_dbuilder, build_design_matrices x1 = (-1.5)**np.arange(20) x2 = (1.6)**np.arange(20) x3 = (-1.2)**np.arange(20) # Hard coded R results for smooth: te(x1, x2, x3, bs=c("cr", "cs", "cc"), k=c(3,3,4)) design_matrix_R = \ np.array([[7.2077663709837084334e-05, 2.0648333344343273131e-03, -4.7934014082310591768e-04, 2.3923430783992746568e-04, 6.8534265421922660466e-03, -1.5909867344112936776e-03, -6.8057712777151204314e-09, -1.9496724335203412851e-07, 4.5260614658693259131e-08, 0.0101479754187435277507, 0.290712501531622591333, -0.067487370093906928759, 0.03368233306025386619709, 0.9649092451763204847381, -0.2239985793289433757547, -9.5819975394704535133e-07, -2.7449874082511405643e-05, 6.3723431275833230217e-06, -1.5205851762850489204e-04, -0.00435607204539782688624, 0.00101123909269346416370, -5.0470024059694933508e-04, -1.4458319360584082416e-02, 3.3564223914790921634e-03, 1.4357783514933466209e-08, 4.1131230514870551983e-07, -9.5483976834512651038e-08]]) new_data = {"x1": -38.443359375000000000, "x2": 68.719476736000032702, "x3": -5.1597803519999985156} data_chunked = [{"x1": x1[:10], "x2": x2[:10], "x3": x3[:10]}, {"x1": x1[10:], "x2": x2[10:], "x3": x3[10:]}] builder = incr_dbuilder("te(cr(x1, df=3), cr(x2, df=3), cc(x3, df=3)) - 1", lambda: iter(data_chunked)) design_matrix = build_design_matrices([builder], new_data)[0] assert np.allclose(design_matrix, design_matrix_R, rtol=1e-12, atol=0.) patsy-0.5.2/patsy/missing.py000066400000000000000000000264741412400214200160440ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2013 Nathaniel Smith # See file LICENSE.txt for license information. # Missing data detection/handling # First, how do we represent missing data? (i.e., which values count as # "missing"?) In the long run, we want to use numpy's NA support... but that # doesn't exist yet. Until then, people use various sorts of ad-hoc # things. Some things that might be considered NA: # NA (eventually) # NaN (in float or object arrays) # None (in object arrays) # np.ma.masked (in numpy.ma masked arrays) # Pandas compatibility considerations: # For numeric arrays, None is unconditionally converted to NaN. # For object arrays (including string arrays!), None and NaN are preserved, # but pandas.isnull() returns True for both. # np.ma compatibility considerations: # Preserving array subtypes is a huge pain, because it means that we can't # just call 'asarray' and be done... we already jump through tons of hoops # to write code that can handle both ndarray's and pandas objects, and # just thinking about adding another item to this list makes me tired. So # for now we don't support np.ma missing values. Use pandas! # Next, what should be done once we find missing data? R's options: # -- throw away those rows (from all aligned matrices) # -- with or without preserving information on which rows were discarded # -- error out # -- carry on # The 'carry on' option requires that we have some way to represent NA in our # output array. To avoid further solidifying the use of NaN for this purpose, # we'll leave this option out for now, until real NA support is # available. Also, we always preserve information on which rows were # discarded, using the pandas index functionality (currently this is only # returned to the original caller if they used return_type="dataframe", # though). import numpy as np from patsy import PatsyError from patsy.util import (safe_isnan, safe_scalar_isnan, no_pickling, assert_no_pickling) # These are made available in the patsy.* namespace __all__ = ["NAAction"] _valid_NA_types = ["None", "NaN"] _valid_NA_responses = ["raise", "drop"] def _desc_options(options): return ", ".join([repr(opt) for opt in options]) class NAAction(object): """An :class:`NAAction` object defines a strategy for handling missing data. "NA" is short for "Not Available", and is used to refer to any value which is somehow unmeasured or unavailable. In the long run, it is devoutly hoped that numpy will gain first-class missing value support. Until then, we work around this lack as best we're able. There are two parts to this: First, we have to determine what counts as missing data. For numerical data, the default is to treat NaN values (e.g., ``numpy.nan``) as missing. For categorical data, the default is to treat NaN values, and also the Python object None, as missing. (This is consistent with how pandas does things, so if you're already using None/NaN to mark missing data in your pandas DataFrames, you're good to go.) Second, we have to decide what to do with any missing data when we encounter it. One option is to simply discard any rows which contain missing data from our design matrices (``drop``). Another option is to raise an error (``raise``). A third option would be to simply let the missing values pass through into the returned design matrices. However, this last option is not yet implemented, because of the lack of any standard way to represent missing values in arbitrary numpy matrices; we're hoping numpy will get this sorted out before we standardize on anything ourselves. You can control how patsy handles missing data through the ``NA_action=`` argument to functions like :func:`build_design_matrices` and :func:`dmatrix`. If all you want to do is to choose between ``drop`` and ``raise`` behaviour, you can pass one of those strings as the ``NA_action=`` argument directly. If you want more fine-grained control over how missing values are detected and handled, then you can create an instance of this class, or your own object that implements the same interface, and pass that as the ``NA_action=`` argument instead. """ def __init__(self, on_NA="drop", NA_types=["None", "NaN"]): """The :class:`NAAction` constructor takes the following arguments: :arg on_NA: How to handle missing values. The default is ``"drop"``, which removes all rows from all matrices which contain any missing values. Also available is ``"raise"``, which raises an exception when any missing values are encountered. :arg NA_types: Which rules are used to identify missing values, as a list of strings. Allowed values are: * ``"None"``: treat the ``None`` object as missing in categorical data. * ``"NaN"``: treat floating point NaN values as missing in categorical and numerical data. .. versionadded:: 0.2.0 """ self.on_NA = on_NA if self.on_NA not in _valid_NA_responses: raise ValueError("invalid on_NA action %r " "(should be one of %s)" % (on_NA, _desc_options(_valid_NA_responses))) if isinstance(NA_types, str): raise ValueError("NA_types should be a list of strings") self.NA_types = tuple(NA_types) for NA_type in self.NA_types: if NA_type not in _valid_NA_types: raise ValueError("invalid NA_type %r " "(should be one of %s)" % (NA_type, _desc_options(_valid_NA_types))) def is_categorical_NA(self, obj): """Return True if `obj` is a categorical NA value. Note that here `obj` is a single scalar value.""" if "NaN" in self.NA_types and safe_scalar_isnan(obj): return True if "None" in self.NA_types and obj is None: return True return False def is_numerical_NA(self, arr): """Returns a 1-d mask array indicating which rows in an array of numerical values contain at least one NA value. Note that here `arr` is a numpy array or pandas DataFrame.""" mask = np.zeros(arr.shape, dtype=bool) if "NaN" in self.NA_types: mask |= np.isnan(arr) if mask.ndim > 1: mask = np.any(mask, axis=1) return mask def handle_NA(self, values, is_NAs, origins): """Takes a set of factor values that may have NAs, and handles them appropriately. :arg values: A list of `ndarray` objects representing the data. These may be 1- or 2-dimensional, and may be of varying dtype. All will have the same number of rows (or entries, for 1-d arrays). :arg is_NAs: A list with the same number of entries as `values`, containing boolean `ndarray` objects that indicate which rows contain NAs in the corresponding entry in `values`. :arg origins: A list with the same number of entries as `values`, containing information on the origin of each value. If we encounter a problem with some particular value, we use the corresponding entry in `origins` as the origin argument when raising a :class:`PatsyError`. :returns: A list of new values (which may have a differing number of rows.) """ assert len(values) == len(is_NAs) == len(origins) if len(values) == 0: return values if self.on_NA == "raise": return self._handle_NA_raise(values, is_NAs, origins) elif self.on_NA == "drop": return self._handle_NA_drop(values, is_NAs, origins) else: # pragma: no cover assert False def _handle_NA_raise(self, values, is_NAs, origins): for is_NA, origin in zip(is_NAs, origins): if np.any(is_NA): raise PatsyError("factor contains missing values", origin) return values def _handle_NA_drop(self, values, is_NAs, origins): total_mask = np.zeros(is_NAs[0].shape[0], dtype=bool) for is_NA in is_NAs: total_mask |= is_NA good_mask = ~total_mask # "..." to handle 1- versus 2-dim indexing return [v[good_mask, ...] for v in values] __getstate__ = no_pickling def test_NAAction_basic(): import pytest pytest.raises(ValueError, NAAction, on_NA="pord") pytest.raises(ValueError, NAAction, NA_types=("NaN", "asdf")) pytest.raises(ValueError, NAAction, NA_types="NaN") assert_no_pickling(NAAction()) def test_NAAction_NA_types_numerical(): for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]: action = NAAction(NA_types=NA_types) for extra_shape in [(), (1,), (2,)]: arr = np.ones((4,) + extra_shape, dtype=float) nan_rows = [0, 2] if arr.ndim > 1 and arr.shape[1] > 1: arr[nan_rows, [0, 1]] = np.nan else: arr[nan_rows] = np.nan exp_NA_mask = np.zeros(4, dtype=bool) if "NaN" in NA_types: exp_NA_mask[nan_rows] = True got_NA_mask = action.is_numerical_NA(arr) assert np.array_equal(got_NA_mask, exp_NA_mask) def test_NAAction_NA_types_categorical(): for NA_types in [[], ["NaN"], ["None"], ["NaN", "None"]]: action = NAAction(NA_types=NA_types) assert not action.is_categorical_NA("a") assert not action.is_categorical_NA(1) assert action.is_categorical_NA(None) == ("None" in NA_types) assert action.is_categorical_NA(np.nan) == ("NaN" in NA_types) def test_NAAction_drop(): action = NAAction("drop") in_values = [np.asarray([-1, 2, -1, 4, 5]), np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]), np.asarray([[1.0, np.nan], [3.0, 4.0], [10.0, 5.0], [6.0, 7.0], [8.0, np.nan]]), ] is_NAs = [np.asarray([True, False, True, False, False]), np.zeros(5, dtype=bool), np.asarray([True, False, False, False, True]), ] out_values = action.handle_NA(in_values, is_NAs, [None] * 3) assert len(out_values) == 3 assert np.array_equal(out_values[0], [2, 4]) assert np.array_equal(out_values[1], [20.0, 40.0]) assert np.array_equal(out_values[2], [[3.0, 4.0], [6.0, 7.0]]) def test_NAAction_raise(): action = NAAction(on_NA="raise") # no-NA just passes through: in_arrs = [np.asarray([1.1, 1.2]), np.asarray([1, 2])] is_NAs = [np.asarray([False, False])] * 2 got_arrs = action.handle_NA(in_arrs, is_NAs, [None, None]) assert np.array_equal(got_arrs[0], in_arrs[0]) assert np.array_equal(got_arrs[1], in_arrs[1]) from patsy.origin import Origin o1 = Origin("asdf", 0, 1) o2 = Origin("asdf", 2, 3) # NA raises an error with a correct origin in_idx = np.arange(2) in_arrs = [np.asarray([1.1, 1.2]), np.asarray([1.0, np.nan])] is_NAs = [np.asarray([False, False]), np.asarray([False, True])] try: action.handle_NA(in_arrs, is_NAs, [o1, o2]) assert False except PatsyError as e: assert e.origin is o2 patsy-0.5.2/patsy/origin.py000066400000000000000000000107561412400214200156560ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2012 Nathaniel Smith # See file LICENSE.txt for license information. # The core 'origin' tracking system. This point of this is to have machinery # so if some object is ultimately derived from some portion of a string (e.g., # a formula), then we can keep track of that, and use it to give proper error # messages. # These are made available in the patsy.* namespace __all__ = ["Origin"] class Origin(object): """This represents the origin of some object in some string. For example, if we have an object ``x1_obj`` that was produced by parsing the ``x1`` in the formula ``"y ~ x1:x2"``, then we conventionally keep track of that relationship by doing:: x1_obj.origin = Origin("y ~ x1:x2", 4, 6) Then later if we run into a problem, we can do:: raise PatsyError("invalid factor", x1_obj) and we'll produce a nice error message like:: PatsyError: invalid factor y ~ x1:x2 ^^ Origins are compared by value, and hashable. """ def __init__(self, code, start, end): self.code = code self.start = start self.end = end @classmethod def combine(cls, origin_objs): """Class method for combining a set of Origins into one large Origin that spans them. Example usage: if we wanted to represent the origin of the "x1:x2" term, we could do ``Origin.combine([x1_obj, x2_obj])``. Single argument is an iterable, and each element in the iterable should be either: * An Origin object * ``None`` * An object that has a ``.origin`` attribute which fulfills the above criteria. Returns either an Origin object, or None. """ origins = [] for obj in origin_objs: if obj is not None and not isinstance(obj, Origin): obj = obj.origin if obj is None: continue origins.append(obj) if not origins: return None codes = set([o.code for o in origins]) assert len(codes) == 1 start = min([o.start for o in origins]) end = max([o.end for o in origins]) return cls(codes.pop(), start, end) def relevant_code(self): """Extracts and returns the span of the original code represented by this Origin. Example: ``x1``.""" return self.code[self.start:self.end] def __eq__(self, other): return (isinstance(other, Origin) and self.code == other.code and self.start == other.start and self.end == other.end) def __ne__(self, other): return not self == other def __hash__(self): return hash((Origin, self.code, self.start, self.end)) def caretize(self, indent=0): """Produces a user-readable two line string indicating the origin of some code. Example:: y ~ x1:x2 ^^ If optional argument 'indent' is given, then both lines will be indented by this much. The returned string does not have a trailing newline. """ return ("%s%s\n%s%s%s" % (" " * indent, self.code, " " * indent, " " * self.start, "^" * (self.end - self.start))) def __repr__(self): return "%s<-%s (%s-%s)>" % ( self.code[:self.start], self.code[self.start:self.end], self.code[self.end:], self.start, self.end) # We reimplement patsy.util.no_pickling, to avoid circular import issues def __getstate__(self): raise NotImplementedError def test_Origin(): o1 = Origin("012345", 2, 4) o2 = Origin("012345", 4, 5) assert o1.caretize() == "012345\n ^^" assert o2.caretize() == "012345\n ^" o3 = Origin.combine([o1, o2]) assert o3.code == "012345" assert o3.start == 2 assert o3.end == 5 assert o3.caretize(indent=2) == " 012345\n ^^^" assert o3 == Origin("012345", 2, 5) class ObjWithOrigin(object): def __init__(self, origin=None): self.origin = origin o4 = Origin.combine([ObjWithOrigin(o1), ObjWithOrigin(), None]) assert o4 == o1 o5 = Origin.combine([ObjWithOrigin(o1), o2]) assert o5 == o3 assert Origin.combine([ObjWithOrigin(), ObjWithOrigin()]) is None from patsy.util import assert_no_pickling assert_no_pickling(Origin("", 0, 0)) patsy-0.5.2/patsy/parse_formula.py000066400000000000000000000230201412400214200172120ustar00rootroot00000000000000 # This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # This file defines a parser for a simple language based on S/R "formulas" # (which are described in sections 2.3 and 2.4 in Chambers & Hastie, 1992). It # uses the machinery in patsy.parse_core to do the heavy-lifting -- its # biggest job is to handle tokenization. from __future__ import print_function __all__ = ["parse_formula"] # The Python tokenizer import tokenize import six from six.moves import cStringIO as StringIO from patsy import PatsyError from patsy.origin import Origin from patsy.infix_parser import Token, Operator, infix_parse, ParseNode from patsy.tokens import python_tokenize, pretty_untokenize from patsy.util import PushbackAdapter _atomic_token_types = ["PYTHON_EXPR", "ZERO", "ONE", "NUMBER"] def _is_a(f, v): try: f(v) except ValueError: return False else: return True # Helper function for _tokenize_formula: def _read_python_expr(it, end_tokens): # Read out a full python expression, stopping when we hit an # unnested end token. pytypes = [] token_strings = [] origins = [] bracket_level = 0 for pytype, token_string, origin in it: assert bracket_level >= 0 if bracket_level == 0 and token_string in end_tokens: it.push_back((pytype, token_string, origin)) break if token_string in ("(", "[", "{"): bracket_level += 1 if token_string in (")", "]", "}"): bracket_level -= 1 if bracket_level < 0: raise PatsyError("unmatched close bracket", origin) pytypes.append(pytype) token_strings.append(token_string) origins.append(origin) # Either we found an end_token, or we hit the end of the string if bracket_level == 0: expr_text = pretty_untokenize(zip(pytypes, token_strings)) if expr_text == "0": token_type = "ZERO" elif expr_text == "1": token_type = "ONE" elif _is_a(int, expr_text) or _is_a(float, expr_text): token_type = "NUMBER" else: token_type = "PYTHON_EXPR" return Token(token_type, Origin.combine(origins), extra=expr_text) else: raise PatsyError("unclosed bracket in embedded Python " "expression", Origin.combine(origins)) def _tokenize_formula(code, operator_strings): assert "(" not in operator_strings assert ")" not in operator_strings magic_token_types = {"(": Token.LPAREN, ")": Token.RPAREN, } for operator_string in operator_strings: magic_token_types[operator_string] = operator_string # Once we enter a Python expression, a ( does not end it, but any other # "magic" token does: end_tokens = set(magic_token_types) end_tokens.remove("(") it = PushbackAdapter(python_tokenize(code)) for pytype, token_string, origin in it: if token_string in magic_token_types: yield Token(magic_token_types[token_string], origin) else: it.push_back((pytype, token_string, origin)) yield _read_python_expr(it, end_tokens) def test__tokenize_formula(): code = "y ~ a + (foo(b,c + 2)) + -1 + 0 + 10" tokens = list(_tokenize_formula(code, ["+", "-", "~"])) expecteds = [("PYTHON_EXPR", Origin(code, 0, 1), "y"), ("~", Origin(code, 2, 3), None), ("PYTHON_EXPR", Origin(code, 4, 5), "a"), ("+", Origin(code, 6, 7), None), (Token.LPAREN, Origin(code, 8, 9), None), ("PYTHON_EXPR", Origin(code, 9, 23), "foo(b, c + 2)"), (Token.RPAREN, Origin(code, 23, 24), None), ("+", Origin(code, 25, 26), None), ("-", Origin(code, 27, 28), None), ("ONE", Origin(code, 28, 29), "1"), ("+", Origin(code, 30, 31), None), ("ZERO", Origin(code, 32, 33), "0"), ("+", Origin(code, 34, 35), None), ("NUMBER", Origin(code, 36, 38), "10"), ] for got, expected in zip(tokens, expecteds): assert isinstance(got, Token) assert got.type == expected[0] assert got.origin == expected[1] assert got.extra == expected[2] _unary_tilde = Operator("~", 1, -100) _default_ops = [ _unary_tilde, Operator("~", 2, -100), Operator("+", 2, 100), Operator("-", 2, 100), Operator("*", 2, 200), Operator("/", 2, 200), Operator(":", 2, 300), Operator("**", 2, 500), Operator("+", 1, 100), Operator("-", 1, 100), ] def parse_formula(code, extra_operators=[]): if not code.strip(): code = "~ 1" for op in extra_operators: if op.precedence < 0: raise ValueError("all operators must have precedence >= 0") operators = _default_ops + extra_operators operator_strings = [op.token_type for op in operators] tree = infix_parse(_tokenize_formula(code, operator_strings), operators, _atomic_token_types) if not isinstance(tree, ParseNode) or tree.type != "~": tree = ParseNode("~", None, [tree], tree.origin) return tree ############# _parser_tests = { "": ["~", "1"], " ": ["~", "1"], " \n ": ["~", "1"], "1": ["~", "1"], "a": ["~", "a"], "a ~ b": ["~", "a", "b"], "(a ~ b)": ["~", "a", "b"], "a ~ ((((b))))": ["~", "a", "b"], "a ~ ((((+b))))": ["~", "a", ["+", "b"]], "a + b + c": ["~", ["+", ["+", "a", "b"], "c"]], "a + (b ~ c) + d": ["~", ["+", ["+", "a", ["~", "b", "c"]], "d"]], "a + np.log(a, base=10)": ["~", ["+", "a", "np.log(a, base=10)"]], # Note different spacing: "a + np . log(a , base = 10)": ["~", ["+", "a", "np.log(a, base=10)"]], # Check precedence "a + b ~ c * d": ["~", ["+", "a", "b"], ["*", "c", "d"]], "a + b * c": ["~", ["+", "a", ["*", "b", "c"]]], "-a**2": ["~", ["-", ["**", "a", "2"]]], "-a:b": ["~", ["-", [":", "a", "b"]]], "a + b:c": ["~", ["+", "a", [":", "b", "c"]]], "(a + b):c": ["~", [":", ["+", "a", "b"], "c"]], "a*b:c": ["~", ["*", "a", [":", "b", "c"]]], "a+b / c": ["~", ["+", "a", ["/", "b", "c"]]], "~ a": ["~", "a"], "-1": ["~", ["-", "1"]], } def _compare_trees(got, expected): assert isinstance(got, ParseNode) if got.args: assert got.type == expected[0] for arg, expected_arg in zip(got.args, expected[1:]): _compare_trees(arg, expected_arg) else: assert got.type in _atomic_token_types assert got.token.extra == expected def _do_parse_test(test_cases, extra_operators): for code, expected in six.iteritems(test_cases): actual = parse_formula(code, extra_operators=extra_operators) print(repr(code), repr(expected)) print(actual) _compare_trees(actual, expected) def test_parse_formula(): _do_parse_test(_parser_tests, []) def test_parse_origin(): tree = parse_formula("a ~ b + c") assert tree.origin == Origin("a ~ b + c", 0, 9) assert tree.token.origin == Origin("a ~ b + c", 2, 3) assert tree.args[0].origin == Origin("a ~ b + c", 0, 1) assert tree.args[1].origin == Origin("a ~ b + c", 4, 9) assert tree.args[1].token.origin == Origin("a ~ b + c", 6, 7) assert tree.args[1].args[0].origin == Origin("a ~ b + c", 4, 5) assert tree.args[1].args[1].origin == Origin("a ~ b + c", 8, 9) # <> mark off where the error should be reported: _parser_error_tests = [ "a <+>", "a + <(>", "a + b <# asdf>", "<)>", "a + <)>", "<*> a", "a + <*>", "a + ", "a + ", "a + ", "a + <[bar>", "a + <{bar>", "a + <{bar[]>", "a + foo<]>bar", "a + foo[]<]>bar", "a + foo{}<}>bar", "a + foo<)>bar", "a + b<)>", "(a) <.>", "<(>a + b", "a +< >'foo", # Not the best placement for the error ] # Split out so it can also be used by tests of the evaluator (which also # raises PatsyError's) def _parsing_error_test(parse_fn, error_descs): # pragma: no cover for error_desc in error_descs: letters = [] start = None end = None for letter in error_desc: if letter == "<": start = len(letters) elif letter == ">": end = len(letters) else: letters.append(letter) bad_code = "".join(letters) assert start is not None and end is not None print(error_desc) print(repr(bad_code), start, end) try: parse_fn(bad_code) except PatsyError as e: print(e) assert e.origin.code == bad_code assert e.origin.start == start assert e.origin.end == end else: assert False, "parser failed to report an error!" def test_parse_errors(extra_operators=[]): def parse_fn(code): return parse_formula(code, extra_operators=extra_operators) _parsing_error_test(parse_fn, _parser_error_tests) _extra_op_parser_tests = { "a | b": ["~", ["|", "a", "b"]], "a * b|c": ["~", ["*", "a", ["|", "b", "c"]]], } def test_parse_extra_op(): extra_operators = [Operator("|", 2, 250)] _do_parse_test(_parser_tests, extra_operators=extra_operators) _do_parse_test(_extra_op_parser_tests, extra_operators=extra_operators) test_parse_errors(extra_operators=extra_operators) patsy-0.5.2/patsy/redundancy.py000066400000000000000000000246421412400214200165220ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # This file has the code that figures out how each factor in some given Term # should be coded. This is complicated by dealing with models with categorical # factors like: # 1 + a + a:b # then technically 'a' (which represents the space of vectors that can be # produced as linear combinations of the dummy coding of the levels of the # factor a) is collinear with the intercept, and 'a:b' (which represents the # space of vectors that can be produced as linear combinations of the dummy # coding *of a new factor whose levels are the cartesian product of a and b) # is collinear with both 'a' and the intercept. # # In such a case, the rule is that we find some way to code each term so that # the full space of vectors that it represents *is present in the model* BUT # there is no collinearity between the different terms. In effect, we have to # choose a set of vectors that spans everything that that term wants to span, # *except* that part of the vector space which was already spanned by earlier # terms. # How? We replace each term with the set of "subterms" that it covers, like # so: # 1 -> () # a -> (), a- # a:b -> (), a-, b-, a-:b- # where "-" means "coded so as not to span the intercept". So that example # above expands to # [()] + [() + a-] + [() + a- + b- + a-:b-] # so we go through from left to right, and for each term we: # 1) toss out all the subterms that have already been used (this is a simple # equality test, no magic) # 2) simplify the terms that are left, according to rules like # () + a- = a+ # (here + means, "coded to span the intercept") # 3) use the resulting subterm list as our coding for this term! # So in the above, we go: # (): stays the same, coded as intercept # () + a-: reduced to just a-, which is what we code # () + a- + b- + a-:b-: reduced to b- + a-:b-, which is simplified to a+:b-. from __future__ import print_function from patsy.util import no_pickling # This should really be a named tuple, but those don't exist until Python # 2.6... class _ExpandedFactor(object): """A factor, with an additional annotation for whether it is coded full-rank (includes_intercept=True) or not. These objects are treated as immutable.""" def __init__(self, includes_intercept, factor): self.includes_intercept = includes_intercept self.factor = factor def __hash__(self): return hash((_ExpandedFactor, self.includes_intercept, self.factor)) def __eq__(self, other): return (isinstance(other, _ExpandedFactor) and other.includes_intercept == self.includes_intercept and other.factor == self.factor) def __ne__(self, other): return not self == other def __repr__(self): if self.includes_intercept: suffix = "+" else: suffix = "-" return "%r%s" % (self.factor, suffix) __getstate__ = no_pickling class _Subterm(object): "Also immutable." def __init__(self, efactors): self.efactors = frozenset(efactors) def can_absorb(self, other): # returns True if 'self' is like a-:b-, and 'other' is like a- return (len(self.efactors) - len(other.efactors) == 1 and self.efactors.issuperset(other.efactors)) def absorb(self, other): diff = self.efactors.difference(other.efactors) assert len(diff) == 1 efactor = list(diff)[0] assert not efactor.includes_intercept new_factors = set(other.efactors) new_factors.add(_ExpandedFactor(True, efactor.factor)) return _Subterm(new_factors) def __hash__(self): return hash((_Subterm, self.efactors)) def __eq__(self, other): return (isinstance(other, _Subterm) and self.efactors == self.efactors) def __ne__(self, other): return not self == other def __repr__(self): return "%s(%r)" % (self.__class__.__name__, list(self.efactors)) __getstate__ = no_pickling # For testing: takes a shorthand description of a list of subterms like # [(), ("a-",), ("a-", "b+")] # and expands it into a list of _Subterm and _ExpandedFactor objects. def _expand_test_abbrevs(short_subterms): subterms = [] for subterm in short_subterms: factors = [] for factor_name in subterm: assert factor_name[-1] in ("+", "-") factors.append(_ExpandedFactor(factor_name[-1] == "+", factor_name[:-1])) subterms.append(_Subterm(factors)) return subterms def test__Subterm(): s_ab = _expand_test_abbrevs([["a-", "b-"]])[0] s_abc = _expand_test_abbrevs([["a-", "b-", "c-"]])[0] s_null = _expand_test_abbrevs([[]])[0] s_cd = _expand_test_abbrevs([["c-", "d-"]])[0] s_a = _expand_test_abbrevs([["a-"]])[0] s_ap = _expand_test_abbrevs([["a+"]])[0] s_abp = _expand_test_abbrevs([["a-", "b+"]])[0] for bad in s_abc, s_null, s_cd, s_ap, s_abp: assert not s_ab.can_absorb(bad) assert s_ab.can_absorb(s_a) assert s_ab.absorb(s_a) == s_abp # Importantly, this preserves the order of the input. Both the items inside # each subset are in the order they were in the original tuple, and the tuples # are emitted so that they're sorted with respect to their elements position # in the original tuple. def _subsets_sorted(tupl): def helper(seq): if not seq: yield () else: obj = seq[0] for subset in _subsets_sorted(seq[1:]): yield subset yield (obj,) + subset # Transform each obj -> (idx, obj) tuple, so that we can later sort them # by their position in the original list. expanded = list(enumerate(tupl)) expanded_subsets = list(helper(expanded)) # This exploits Python's stable sort: we want short before long, and ties # broken by natural ordering on the (idx, obj) entries in each subset. So # we sort by the latter first, then by the former. expanded_subsets.sort() expanded_subsets.sort(key=len) # And finally, we strip off the idx's: for subset in expanded_subsets: yield tuple([obj for (idx, obj) in subset]) def test__subsets_sorted(): assert list(_subsets_sorted((1, 2))) == [(), (1,), (2,), (1, 2)] assert (list(_subsets_sorted((1, 2, 3))) == [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]) assert len(list(_subsets_sorted(range(5)))) == 2 ** 5 def _simplify_one_subterm(subterms): # We simplify greedily from left to right. # Returns True if succeeded, False otherwise for short_i, short_subterm in enumerate(subterms): for long_i, long_subterm in enumerate(subterms[short_i + 1:]): if long_subterm.can_absorb(short_subterm): new_subterm = long_subterm.absorb(short_subterm) subterms[short_i + 1 + long_i] = new_subterm subterms.pop(short_i) return True return False def _simplify_subterms(subterms): while _simplify_one_subterm(subterms): pass def test__simplify_subterms(): def t(given, expected): given = _expand_test_abbrevs(given) expected = _expand_test_abbrevs(expected) print("testing if:", given, "->", expected) _simplify_subterms(given) assert given == expected t([("a-",)], [("a-",)]) t([(), ("a-",)], [("a+",)]) t([(), ("a-",), ("b-",), ("a-", "b-")], [("a+", "b+")]) t([(), ("a-",), ("a-", "b-")], [("a+",), ("a-", "b-")]) t([("a-",), ("b-",), ("a-", "b-")], [("b-",), ("a-", "b+")]) # 'term' is a Term # 'numeric_factors' is any set-like object which lists the # numeric/non-categorical factors in this term. Such factors are just # ignored by this routine. # 'used_subterms' is a set which records which subterms have previously been # used. E.g., a:b has subterms (), a, b, a:b, and if we're processing # y ~ a + a:b # then by the time we reach a:b, the () and a subterms will have already # been used. This is an in/out argument, and should be treated as opaque by # callers -- really it is a way for multiple invocations of this routine to # talk to each other. Each time it is called, this routine adds the subterms # of each factor to this set in place. So the first time this routine is # called, pass in an empty set, and then just keep passing the same set to # any future calls. # Returns: a list of dicts. Each dict maps from factors to booleans. The # coding for the given term should use a full-rank contrast for those factors # which map to True, a (n-1)-rank contrast for those factors which map to # False, and any factors which are not mentioned are numeric and should be # added back in. These dicts should add columns to the design matrix from left # to right. def pick_contrasts_for_term(term, numeric_factors, used_subterms): categorical_factors = [f for f in term.factors if f not in numeric_factors] # Converts a term into an expanded list of subterms like: # a:b -> 1 + a- + b- + a-:b- # and discards the ones that have already been used. subterms = [] for subset in _subsets_sorted(categorical_factors): subterm = _Subterm([_ExpandedFactor(False, f) for f in subset]) if subterm not in used_subterms: subterms.append(subterm) used_subterms.update(subterms) _simplify_subterms(subterms) factor_codings = [] for subterm in subterms: factor_coding = {} for expanded in subterm.efactors: factor_coding[expanded.factor] = expanded.includes_intercept factor_codings.append(factor_coding) return factor_codings def test_pick_contrasts_for_term(): from patsy.desc import Term used = set() codings = pick_contrasts_for_term(Term([]), set(), used) assert codings == [{}] codings = pick_contrasts_for_term(Term(["a", "x"]), set(["x"]), used) assert codings == [{"a": False}] codings = pick_contrasts_for_term(Term(["a", "b"]), set(), used) assert codings == [{"a": True, "b": False}] used_snapshot = set(used) codings = pick_contrasts_for_term(Term(["c", "d"]), set(), used) assert codings == [{"d": False}, {"c": False, "d": True}] # Do it again backwards, to make sure we're deterministic with respect to # order: codings = pick_contrasts_for_term(Term(["d", "c"]), set(), used_snapshot) assert codings == [{"c": False}, {"c": True, "d": False}] patsy-0.5.2/patsy/splines.py000066400000000000000000000421671412400214200160450ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012-2013 Nathaniel Smith # See file LICENSE.txt for license information. # R-compatible spline basis functions # These are made available in the patsy.* namespace __all__ = ["bs"] import numpy as np from patsy.util import have_pandas, no_pickling, assert_no_pickling from patsy.state import stateful_transform if have_pandas: import pandas def _eval_bspline_basis(x, knots, degree): try: from scipy.interpolate import splev except ImportError: # pragma: no cover raise ImportError("spline functionality requires scipy") # 'knots' are assumed to be already pre-processed. E.g. usually you # want to include duplicate copies of boundary knots; you should do # that *before* calling this constructor. knots = np.atleast_1d(np.asarray(knots, dtype=float)) assert knots.ndim == 1 knots.sort() degree = int(degree) x = np.atleast_1d(x) if x.ndim == 2 and x.shape[1] == 1: x = x[:, 0] assert x.ndim == 1 # XX FIXME: when points fall outside of the boundaries, splev and R seem # to handle them differently. I don't know why yet. So until we understand # this and decide what to do with it, I'm going to play it safe and # disallow such points. if np.min(x) < np.min(knots) or np.max(x) > np.max(knots): raise NotImplementedError("some data points fall outside the " "outermost knots, and I'm not sure how " "to handle them. (Patches accepted!)") # Thanks to Charles Harris for explaining splev. It's not well # documented, but basically it computes an arbitrary b-spline basis # given knots and degree on some specified points (or derivatives # thereof, but we don't use that functionality), and then returns some # linear combination of these basis functions. To get out the basis # functions themselves, we use linear combinations like [1, 0, 0], [0, # 1, 0], [0, 0, 1]. # NB: This probably makes it rather inefficient (though I haven't checked # to be sure -- maybe the fortran code actually skips computing the basis # function for coefficients that are zero). # Note: the order of a spline is the same as its degree + 1. # Note: there are (len(knots) - order) basis functions. n_bases = len(knots) - (degree + 1) basis = np.empty((x.shape[0], n_bases), dtype=float) for i in range(n_bases): coefs = np.zeros((n_bases,)) coefs[i] = 1 basis[:, i] = splev(x, (knots, coefs, degree)) return basis def _R_compat_quantile(x, probs): #return np.percentile(x, 100 * np.asarray(probs)) probs = np.asarray(probs) quantiles = np.asarray([np.percentile(x, 100 * prob) for prob in probs.ravel(order="C")]) return quantiles.reshape(probs.shape, order="C") def test__R_compat_quantile(): def t(x, prob, expected): assert np.allclose(_R_compat_quantile(x, prob), expected) t([10, 20], 0.5, 15) t([10, 20], 0.3, 13) t([10, 20], [0.3, 0.7], [13, 17]) t(list(range(10)), [0.3, 0.7], [2.7, 6.3]) class BS(object): """bs(x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None) Generates a B-spline basis for ``x``, allowing non-linear fits. The usual usage is something like:: y ~ 1 + bs(x, 4) to fit ``y`` as a smooth function of ``x``, with 4 degrees of freedom given to the smooth. :arg df: The number of degrees of freedom to use for this spline. The return value will have this many columns. You must specify at least one of ``df`` and ``knots``. :arg knots: The interior knots to use for the spline. If unspecified, then equally spaced quantiles of the input data are used. You must specify at least one of ``df`` and ``knots``. :arg degree: The degree of the spline to use. :arg include_intercept: If ``True``, then the resulting spline basis will span the intercept term (i.e., the constant function). If ``False`` (the default) then this will not be the case, which is useful for avoiding overspecification in models that include multiple spline terms and/or an intercept term. :arg lower_bound: The lower exterior knot location. :arg upper_bound: The upper exterior knot location. A spline with ``degree=0`` is piecewise constant with breakpoints at each knot, and the default knot positions are quantiles of the input. So if you find yourself in the situation of wanting to quantize a continuous variable into ``num_bins`` equal-sized bins with a constant effect across each bin, you can use ``bs(x, num_bins - 1, degree=0)``. (The ``- 1`` is because one degree of freedom will be taken by the intercept; alternatively, you could leave the intercept term out of your model and use ``bs(x, num_bins, degree=0, include_intercept=True)``. A spline with ``degree=1`` is piecewise linear with breakpoints at each knot. The default is ``degree=3``, which gives a cubic b-spline. This is a stateful transform (for details see :ref:`stateful-transforms`). If ``knots``, ``lower_bound``, or ``upper_bound`` are not specified, they will be calculated from the data and then the chosen values will be remembered and re-used for prediction from the fitted model. Using this function requires scipy be installed. .. note:: This function is very similar to the R function of the same name. In cases where both return output at all (e.g., R's ``bs`` will raise an error if ``degree=0``, while patsy's will not), they should produce identical output given identical input and parameter settings. .. warning:: I'm not sure on what the proper handling of points outside the lower/upper bounds is, so for now attempting to evaluate a spline basis at such points produces an error. Patches gratefully accepted. .. versionadded:: 0.2.0 """ def __init__(self): self._tmp = {} self._degree = None self._all_knots = None def memorize_chunk(self, x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None): args = {"df": df, "knots": knots, "degree": degree, "include_intercept": include_intercept, "lower_bound": lower_bound, "upper_bound": upper_bound, } self._tmp["args"] = args # XX: check whether we need x values before saving them x = np.atleast_1d(x) if x.ndim == 2 and x.shape[1] == 1: x = x[:, 0] if x.ndim > 1: raise ValueError("input to 'bs' must be 1-d, " "or a 2-d column vector") # There's no better way to compute exact quantiles than memorizing # all data. self._tmp.setdefault("xs", []).append(x) def memorize_finish(self): tmp = self._tmp args = tmp["args"] del self._tmp if args["degree"] < 0: raise ValueError("degree must be greater than 0 (not %r)" % (args["degree"],)) if int(args["degree"]) != args["degree"]: raise ValueError("degree must be an integer (not %r)" % (self._degree,)) # These are guaranteed to all be 1d vectors by the code above x = np.concatenate(tmp["xs"]) if args["df"] is None and args["knots"] is None: raise ValueError("must specify either df or knots") order = args["degree"] + 1 if args["df"] is not None: n_inner_knots = args["df"] - order if not args["include_intercept"]: n_inner_knots += 1 if n_inner_knots < 0: raise ValueError("df=%r is too small for degree=%r and " "include_intercept=%r; must be >= %s" % (args["df"], args["degree"], args["include_intercept"], # We know that n_inner_knots is negative; # if df were that much larger, it would # have been zero, and things would work. args["df"] - n_inner_knots)) if args["knots"] is not None: if len(args["knots"]) != n_inner_knots: raise ValueError("df=%s with degree=%r implies %s knots, " "but %s knots were provided" % (args["df"], args["degree"], n_inner_knots, len(args["knots"]))) else: # Need to compute inner knots knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1] inner_knots = _R_compat_quantile(x, knot_quantiles) if args["knots"] is not None: inner_knots = args["knots"] if args["lower_bound"] is not None: lower_bound = args["lower_bound"] else: lower_bound = np.min(x) if args["upper_bound"] is not None: upper_bound = args["upper_bound"] else: upper_bound = np.max(x) if lower_bound > upper_bound: raise ValueError("lower_bound > upper_bound (%r > %r)" % (lower_bound, upper_bound)) inner_knots = np.asarray(inner_knots) if inner_knots.ndim > 1: raise ValueError("knots must be 1 dimensional") if np.any(inner_knots < lower_bound): raise ValueError("some knot values (%s) fall below lower bound " "(%r)" % (inner_knots[inner_knots < lower_bound], lower_bound)) if np.any(inner_knots > upper_bound): raise ValueError("some knot values (%s) fall above upper bound " "(%r)" % (inner_knots[inner_knots > upper_bound], upper_bound)) all_knots = np.concatenate(([lower_bound, upper_bound] * order, inner_knots)) all_knots.sort() self._degree = args["degree"] self._all_knots = all_knots def transform(self, x, df=None, knots=None, degree=3, include_intercept=False, lower_bound=None, upper_bound=None): basis = _eval_bspline_basis(x, self._all_knots, self._degree) if not include_intercept: basis = basis[:, 1:] if have_pandas: if isinstance(x, (pandas.Series, pandas.DataFrame)): basis = pandas.DataFrame(basis) basis.index = x.index return basis __getstate__ = no_pickling bs = stateful_transform(BS) def test_bs_compat(): from patsy.test_state import check_stateful from patsy.test_splines_bs_data import (R_bs_test_x, R_bs_test_data, R_bs_num_tests) lines = R_bs_test_data.split("\n") tests_ran = 0 start_idx = lines.index("--BEGIN TEST CASE--") while True: if not lines[start_idx] == "--BEGIN TEST CASE--": break start_idx += 1 stop_idx = lines.index("--END TEST CASE--", start_idx) block = lines[start_idx:stop_idx] test_data = {} for line in block: key, value = line.split("=", 1) test_data[key] = value # Translate the R output into Python calling conventions kwargs = { "degree": int(test_data["degree"]), # integer, or None "df": eval(test_data["df"]), # np.array() call, or None "knots": eval(test_data["knots"]), } if test_data["Boundary.knots"] != "None": lower, upper = eval(test_data["Boundary.knots"]) kwargs["lower_bound"] = lower kwargs["upper_bound"] = upper kwargs["include_intercept"] = (test_data["intercept"] == "TRUE") # Special case: in R, setting intercept=TRUE increases the effective # dof by 1. Adjust our arguments to match. # if kwargs["df"] is not None and kwargs["include_intercept"]: # kwargs["df"] += 1 output = np.asarray(eval(test_data["output"])) if kwargs["df"] is not None: assert output.shape[1] == kwargs["df"] # Do the actual test check_stateful(BS, False, R_bs_test_x, output, **kwargs) tests_ran += 1 # Set up for the next one start_idx = stop_idx + 1 assert tests_ran == R_bs_num_tests test_bs_compat.slow = 1 # This isn't checked by the above, because R doesn't have zero degree # b-splines. def test_bs_0degree(): x = np.logspace(-1, 1, 10) result = bs(x, knots=[1, 4], degree=0, include_intercept=True) assert result.shape[1] == 3 expected_0 = np.zeros(10) expected_0[x < 1] = 1 assert np.array_equal(result[:, 0], expected_0) expected_1 = np.zeros(10) expected_1[(x >= 1) & (x < 4)] = 1 assert np.array_equal(result[:, 1], expected_1) expected_2 = np.zeros(10) expected_2[x >= 4] = 1 assert np.array_equal(result[:, 2], expected_2) # Check handling of points that exactly fall on knots. They arbitrarily # get included into the larger region, not the smaller. This is consistent # with Python's half-open interval convention -- each basis function is # constant on [knot[i], knot[i + 1]). assert np.array_equal(bs([0, 1, 2], degree=0, knots=[1], include_intercept=True), [[1, 0], [0, 1], [0, 1]]) result_int = bs(x, knots=[1, 4], degree=0, include_intercept=True) result_no_int = bs(x, knots=[1, 4], degree=0, include_intercept=False) assert np.array_equal(result_int[:, 1:], result_no_int) def test_bs_errors(): import pytest x = np.linspace(-10, 10, 20) # error checks: # out of bounds pytest.raises(NotImplementedError, bs, x, 3, lower_bound=0) pytest.raises(NotImplementedError, bs, x, 3, upper_bound=0) # must specify df or knots pytest.raises(ValueError, bs, x) # df/knots match/mismatch (with and without intercept) # match: bs(x, df=10, include_intercept=False, knots=[0] * 7) bs(x, df=10, include_intercept=True, knots=[0] * 6) bs(x, df=10, include_intercept=False, knots=[0] * 9, degree=1) bs(x, df=10, include_intercept=True, knots=[0] * 8, degree=1) # too many knots: pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 8) pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 7) pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 10, degree=1) pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 9, degree=1) # too few knots: pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 6) pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 5) pytest.raises(ValueError, bs, x, df=10, include_intercept=False, knots=[0] * 8, degree=1) pytest.raises(ValueError, bs, x, df=10, include_intercept=True, knots=[0] * 7, degree=1) # df too small pytest.raises(ValueError, bs, x, df=1, degree=3) pytest.raises(ValueError, bs, x, df=3, degree=5) # bad degree pytest.raises(ValueError, bs, x, df=10, degree=-1) pytest.raises(ValueError, bs, x, df=10, degree=1.5) # upper_bound < lower_bound pytest.raises(ValueError, bs, x, 3, lower_bound=1, upper_bound=-1) # multidimensional input pytest.raises(ValueError, bs, np.column_stack((x, x)), 3) # unsorted knots are okay, and get sorted assert np.array_equal(bs(x, knots=[1, 4]), bs(x, knots=[4, 1])) # 2d knots pytest.raises(ValueError, bs, x, knots=[[0], [20]]) # knots > upper_bound pytest.raises(ValueError, bs, x, knots=[0, 20]) pytest.raises(ValueError, bs, x, knots=[0, 4], upper_bound=3) # knots < lower_bound pytest.raises(ValueError, bs, x, knots=[-20, 0]) pytest.raises(ValueError, bs, x, knots=[-4, 0], lower_bound=-3) # differences between bs and ns (since the R code is a pile of copy-paste): # - degree is always 3 # - different number of interior knots given df (b/c fewer dof used at edges I # guess) # - boundary knots always repeated exactly 4 times (same as bs with degree=3) # - complications at the end to handle boundary conditions # the 'rcs' function uses slightly different conventions -- in particular it # picks boundary knots that are not quite at the edges of the data, which # makes sense for a natural spline. patsy-0.5.2/patsy/state.py000066400000000000000000000153251412400214200155040ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # Stateful transform protocol: # def __init__(self): # pass # def memorize_chunk(self, input_data): # return None # def memorize_finish(self): # return None # def transform(self, input_data): # return output_data # BETTER WAY: always run the first row of data through the builder alone, and # check that it gives the same output row as when running the whole block of # data through at once. This gives us the same information, but it's robust # against people writing their own centering functions. # QUESTION: right now we refuse to even fit a model that contains a # my_transform(x)-style function. Maybe we should allow it to be fit (with a # warning), and only disallow making predictions with it? Need to revisit this # question once it's clearer what exactly our public API will look like, # because right now I'm not sure how to tell whether we are being called for # fitting versus being called for prediction. from functools import wraps import numpy as np from patsy.util import (atleast_2d_column_default, asarray_or_pandas, pandas_friendly_reshape, wide_dtype_for, safe_issubdtype, no_pickling, assert_no_pickling) # These are made available in the patsy.* namespace __all__ = ["stateful_transform", "center", "standardize", "scale", ] def stateful_transform(class_): """Create a stateful transform callable object from a class that fulfills the :ref:`stateful transform protocol `. """ @wraps(class_) def stateful_transform_wrapper(*args, **kwargs): transform = class_() transform.memorize_chunk(*args, **kwargs) transform.memorize_finish() return transform.transform(*args, **kwargs) stateful_transform_wrapper.__patsy_stateful_transform__ = class_ return stateful_transform_wrapper # class NonIncrementalStatefulTransform(object): # def __init__(self): # self._data = [] # # def memorize_chunk(self, input_data, *args, **kwargs): # self._data.append(input_data) # self._args = _args # self._kwargs = kwargs # # def memorize_finish(self): # all_data = np.row_stack(self._data) # args = self._args # kwargs = self._kwargs # del self._data # del self._args # del self._kwargs # self.memorize_all(all_data, *args, **kwargs) # # def memorize_all(self, input_data, *args, **kwargs): # raise NotImplementedError # # def transform(self, input_data, *args, **kwargs): # raise NotImplementedError # # class QuantileEstimatingTransform(NonIncrementalStatefulTransform): # def memorize_all(self, input_data, *args, **kwargs): class Center(object): """center(x) A stateful transform that centers input data, i.e., subtracts the mean. If input has multiple columns, centers each column separately. Equivalent to ``standardize(x, rescale=False)`` """ def __init__(self): self._sum = None self._count = 0 def memorize_chunk(self, x): x = atleast_2d_column_default(x) self._count += x.shape[0] this_total = np.sum(x, 0, dtype=wide_dtype_for(x)) # This is to handle potentially multi-column x's: if self._sum is None: self._sum = this_total else: self._sum += this_total def memorize_finish(self): pass def transform(self, x): x = asarray_or_pandas(x) # This doesn't copy data unless our input is a DataFrame that has # heterogeneous types. And in that case we're going to be munging the # types anyway, so copying isn't a big deal. x_arr = np.asarray(x) if safe_issubdtype(x_arr.dtype, np.integer): dt = float else: dt = x_arr.dtype mean_val = np.asarray(self._sum / self._count, dtype=dt) centered = atleast_2d_column_default(x, preserve_pandas=True) - mean_val return pandas_friendly_reshape(centered, x.shape) __getstate__ = no_pickling center = stateful_transform(Center) # See: # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm # or page 232 of Knuth vol. 3 (3rd ed.). class Standardize(object): """standardize(x, center=True, rescale=True, ddof=0) A stateful transform that standardizes input data, i.e. it subtracts the mean and divides by the sample standard deviation. Either centering or rescaling or both can be disabled by use of keyword arguments. The `ddof` argument controls the delta degrees of freedom when computing the standard deviation (cf. :func:`numpy.std`). The default of ``ddof=0`` produces the maximum likelihood estimate; use ``ddof=1`` if you prefer the square root of the unbiased estimate of the variance. If input has multiple columns, standardizes each column separately. .. note:: This function computes the mean and standard deviation using a memory-efficient online algorithm, making it suitable for use with large incrementally processed data-sets. """ def __init__(self): self.current_n = 0 self.current_mean = None self.current_M2 = None def memorize_chunk(self, x, center=True, rescale=True, ddof=0): x = atleast_2d_column_default(x) if self.current_mean is None: self.current_mean = np.zeros(x.shape[1], dtype=wide_dtype_for(x)) self.current_M2 = np.zeros(x.shape[1], dtype=wide_dtype_for(x)) # XX this can surely be vectorized but I am feeling lazy: for i in range(x.shape[0]): self.current_n += 1 delta = x[i, :] - self.current_mean self.current_mean += delta / self.current_n self.current_M2 += delta * (x[i, :] - self.current_mean) def memorize_finish(self): pass def transform(self, x, center=True, rescale=True, ddof=0): # XX: this forces all inputs to double-precision real, even if the # input is single- or extended-precision or complex. But I got all # tangled up in knots trying to do that without breaking something # else (e.g. by requiring an extra copy). x = asarray_or_pandas(x, copy=True, dtype=float) x_2d = atleast_2d_column_default(x, preserve_pandas=True) if center: x_2d -= self.current_mean if rescale: x_2d /= np.sqrt(self.current_M2 / (self.current_n - ddof)) return pandas_friendly_reshape(x_2d, x.shape) __getstate__ = no_pickling standardize = stateful_transform(Standardize) # R compatibility: scale = standardize patsy-0.5.2/patsy/test_build.py000066400000000000000000000744611412400214200165300ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012-2013 Nathaniel Smith # See file LICENSE.txt for license information. # There are a number of unit tests in build.py, but this file contains more # thorough tests of the overall design matrix building system. (These are # still not exhaustive end-to-end tests, though -- for that see # test_highlevel.py.) from __future__ import print_function import six import numpy as np import pytest from patsy import PatsyError from patsy.util import (atleast_2d_column_default, have_pandas, have_pandas_categorical) from patsy.desc import Term, INTERCEPT from patsy.build import * from patsy.categorical import C from patsy.user_util import balanced, LookupFactor from patsy.design_info import DesignMatrix, DesignInfo if have_pandas: import pandas def assert_full_rank(m): m = atleast_2d_column_default(m) if m.shape[1] == 0: return True u, s, v = np.linalg.svd(m) rank = np.sum(s > 1e-10) assert rank == m.shape[1] def test_assert_full_rank(): assert_full_rank(np.eye(10)) assert_full_rank([[1, 0], [1, 0], [1, 0], [1, 1]]) pytest.raises(AssertionError, assert_full_rank, [[1, 0], [2, 0]]) pytest.raises(AssertionError, assert_full_rank, [[1, 2], [2, 4]]) pytest.raises(AssertionError, assert_full_rank, [[1, 2, 3], [1, 10, 100]]) # col1 + col2 = col3 pytest.raises(AssertionError, assert_full_rank, [[1, 2, 3], [1, 5, 6], [1, 6, 7]]) def make_termlist(*entries): terms = [] for entry in entries: terms.append(Term([LookupFactor(name) for name in entry])) return terms def check_design_matrix(mm, expected_rank, termlist, column_names=None): assert_full_rank(mm) assert set(mm.design_info.terms) == set(termlist) if column_names is not None: assert mm.design_info.column_names == column_names assert mm.ndim == 2 assert mm.shape[1] == expected_rank def make_matrix(data, expected_rank, entries, column_names=None): termlist = make_termlist(*entries) def iter_maker(): yield data design_infos = design_matrix_builders([termlist], iter_maker, eval_env=0) matrices = build_design_matrices(design_infos, data) matrix = matrices[0] assert (design_infos[0].term_slices == matrix.design_info.term_slices) assert (design_infos[0].column_names == matrix.design_info.column_names) assert matrix.design_info is design_infos[0] check_design_matrix(matrix, expected_rank, termlist, column_names=column_names) return matrix def test_simple(): data = balanced(a=2, b=2) x1 = data["x1"] = np.linspace(0, 1, len(data["a"])) x2 = data["x2"] = data["x1"] ** 2 m = make_matrix(data, 2, [["a"]], column_names=["a[a1]", "a[a2]"]) assert np.allclose(m, [[1, 0], [1, 0], [0, 1], [0, 1]]) m = make_matrix(data, 2, [[], ["a"]], column_names=["Intercept", "a[T.a2]"]) assert np.allclose(m, [[1, 0], [1, 0], [1, 1], [1, 1]]) m = make_matrix(data, 4, [["a", "b"]], column_names=["a[a1]:b[b1]", "a[a2]:b[b1]", "a[a1]:b[b2]", "a[a2]:b[b2]"]) assert np.allclose(m, [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) m = make_matrix(data, 4, [[], ["a"], ["b"], ["a", "b"]], column_names=["Intercept", "a[T.a2]", "b[T.b2]", "a[T.a2]:b[T.b2]"]) assert np.allclose(m, [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 1, 1]]) m = make_matrix(data, 4, [[], ["b"], ["a"], ["b", "a"]], column_names=["Intercept", "b[T.b2]", "a[T.a2]", "b[T.b2]:a[T.a2]"]) assert np.allclose(m, [[1, 0, 0, 0], [1, 1, 0, 0], [1, 0, 1, 0], [1, 1, 1, 1]]) m = make_matrix(data, 4, [["a"], ["x1"], ["a", "x1"]], column_names=["a[a1]", "a[a2]", "x1", "a[T.a2]:x1"]) assert np.allclose(m, [[1, 0, x1[0], 0], [1, 0, x1[1], 0], [0, 1, x1[2], x1[2]], [0, 1, x1[3], x1[3]]]) m = make_matrix(data, 3, [["x1"], ["x2"], ["x2", "x1"]], column_names=["x1", "x2", "x2:x1"]) assert np.allclose(m, np.column_stack((x1, x2, x1 * x2))) def test_R_bugs(): data = balanced(a=2, b=2, c=2) data["x"] = np.linspace(0, 1, len(data["a"])) # For "1 + a:b", R produces a design matrix with too many columns (5 # instead of 4), because it can't tell that there is a redundancy between # the two terms. make_matrix(data, 4, [[], ["a", "b"]]) # For "0 + a:x + a:b", R produces a design matrix with too few columns (4 # instead of 6), because it thinks that there is a redundancy which # doesn't exist. make_matrix(data, 6, [["a", "x"], ["a", "b"]]) # This can be compared with "0 + a:c + a:b", where the redundancy does # exist. Confusingly, adding another categorical factor increases the # baseline dimensionality to 8, and then the redundancy reduces it to 6 # again, so the result is the same as before but for different reasons. (R # does get this one right, but we might as well test it.) make_matrix(data, 6, [["a", "c"], ["a", "b"]]) def test_redundancy_thoroughly(): # To make sure there aren't any lurking bugs analogous to the ones that R # has (see above), we check that we get the correct matrix rank for every # possible combination of 2 categorical and 2 numerical factors. data = balanced(a=2, b=2, repeat=5) data["x1"] = np.linspace(0, 1, len(data["a"])) data["x2"] = data["x1"] ** 2 def all_subsets(l): if not l: yield tuple() else: obj = l[0] for subset in all_subsets(l[1:]): yield tuple(sorted(subset)) yield tuple(sorted((obj,) + subset)) all_terms = list(all_subsets(("a", "b", "x1", "x2"))) all_termlist_templates = list(all_subsets(all_terms)) print(len(all_termlist_templates)) # eliminate some of the symmetric versions to speed things up redundant = [[("b",), ("a",)], [("x2",), ("x1",)], [("b", "x2"), ("a", "x1")], [("a", "b", "x2"), ("a", "b", "x1")], [("b", "x1", "x2"), ("a", "x1", "x2")]] count = 0 import time start = time.time() for termlist_template in all_termlist_templates: termlist_set = set(termlist_template) for dispreferred, preferred in redundant: if dispreferred in termlist_set and preferred not in termlist_set: break else: expanded_terms = set() for term_template in termlist_template: numeric = tuple([t for t in term_template if t.startswith("x")]) rest = [t for t in term_template if not t.startswith("x")] for subset_rest in all_subsets(rest): expanded_terms.add(frozenset(subset_rest + numeric)) # Because our categorical variables have 2 levels, each expanded # term corresponds to 1 unique dimension of variation expected_rank = len(expanded_terms) if termlist_template in [(), ((),)]: # No data dependence, should fail pytest.raises(PatsyError, make_matrix, data, expected_rank, termlist_template) else: make_matrix(data, expected_rank, termlist_template) count += 1 if count % 100 == 0: print("Completed:", count) print("Took %0.2f seconds" % (time.time() - start,)) test_redundancy_thoroughly.slow = 1 def test_data_types(): basic_dict = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]} # On Python 2, this is identical to basic_dict: basic_dict_bytes = dict(basic_dict) basic_dict_bytes["a"] = [s.encode("ascii") for s in basic_dict_bytes["a"]] # On Python 3, this is identical to basic_dict: basic_dict_unicode = {"a": ["a1", "a2", "a1", "a2"], "x": [1, 2, 3, 4]} basic_dict_unicode = dict(basic_dict) basic_dict_unicode["a"] = [six.text_type(s) for s in basic_dict_unicode["a"]] structured_array_bytes = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "S2"), ("x", int)]) structured_array_unicode = np.array(list(zip(basic_dict["a"], basic_dict["x"])), dtype=[("a", "U2"), ("x", int)]) recarray_bytes = structured_array_bytes.view(np.recarray) recarray_unicode = structured_array_unicode.view(np.recarray) datas = [basic_dict, structured_array_bytes, structured_array_unicode, recarray_bytes, recarray_unicode] if have_pandas: df_bytes = pandas.DataFrame(basic_dict_bytes) datas.append(df_bytes) df_unicode = pandas.DataFrame(basic_dict_unicode) datas.append(df_unicode) for data in datas: m = make_matrix(data, 4, [["a"], ["a", "x"]], column_names=["a[a1]", "a[a2]", "a[a1]:x", "a[a2]:x"]) assert np.allclose(m, [[1, 0, 1, 0], [0, 1, 0, 2], [1, 0, 3, 0], [0, 1, 0, 4]]) def test_build_design_matrices_dtype(): data = {"x": [1, 2, 3]} def iter_maker(): yield data builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0] mat = build_design_matrices([builder], data)[0] assert mat.dtype == np.dtype(np.float64) mat = build_design_matrices([builder], data, dtype=np.float32)[0] assert mat.dtype == np.dtype(np.float32) if hasattr(np, "float128"): mat = build_design_matrices([builder], data, dtype=np.float128)[0] assert mat.dtype == np.dtype(np.float128) def test_return_type(): data = {"x": [1, 2, 3]} def iter_maker(): yield data builder = design_matrix_builders([make_termlist("x")], iter_maker, 0)[0] # Check explicitly passing return_type="matrix" works mat = build_design_matrices([builder], data, return_type="matrix")[0] assert isinstance(mat, DesignMatrix) # Check that nonsense is detected pytest.raises(PatsyError, build_design_matrices, [builder], data, return_type="asdfsadf") def test_NA_action(): initial_data = {"x": [1, 2, 3], "c": ["c1", "c2", "c1"]} def iter_maker(): yield initial_data builder = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0] # By default drops rows containing either NaN or None mat = build_design_matrices([builder], {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)})[0] assert mat.shape == (1, 3) assert np.array_equal(mat, [[1.0, 0.0, 10.0]]) # NA_action="a string" also accepted: mat = build_design_matrices([builder], {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)}, NA_action="drop")[0] assert mat.shape == (1, 3) assert np.array_equal(mat, [[1.0, 0.0, 10.0]]) # And objects from patsy.missing import NAAction # allows NaN's to pass through NA_action = NAAction(NA_types=[]) mat = build_design_matrices([builder], {"x": [10.0, np.nan], "c": np.asarray(["c1", "c2"], dtype=object)}, NA_action=NA_action)[0] assert mat.shape == (2, 3) # According to this (and only this) function, NaN == NaN. np.testing.assert_array_equal(mat, [[1.0, 0.0, 10.0], [0.0, 1.0, np.nan]]) # NA_action="raise" pytest.raises(PatsyError, build_design_matrices, [builder], {"x": [10.0, np.nan, 20.0], "c": np.asarray(["c1", "c2", None], dtype=object)}, NA_action="raise") def test_NA_drop_preserves_levels(): # Even if all instances of some level are dropped, we still include it in # the output matrix (as an all-zeros column) data = {"x": [1.0, np.nan, 3.0], "c": ["c1", "c2", "c3"]} def iter_maker(): yield data design_info = design_matrix_builders([make_termlist("x", "c")], iter_maker, 0)[0] assert design_info.column_names == ["c[c1]", "c[c2]", "c[c3]", "x"] mat, = build_design_matrices([design_info], data) assert mat.shape == (2, 4) assert np.array_equal(mat, [[1.0, 0.0, 0.0, 1.0], [0.0, 0.0, 1.0, 3.0]]) def test_return_type_pandas(): if not have_pandas: return data = pandas.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6], "a": ["a1", "a2", "a1"]}, index=[10, 20, 30]) def iter_maker(): yield data int_builder, = design_matrix_builders([make_termlist([])], iter_maker, 0) (y_builder, x_builder) = design_matrix_builders([make_termlist("y"), make_termlist("x")], iter_maker, eval_env=0) (x_a_builder,) = design_matrix_builders([make_termlist("x", "a")], iter_maker, eval_env=0) (x_y_builder,) = design_matrix_builders([make_termlist("x", "y")], iter_maker, eval_env=0) # Index compatibility is always checked for pandas input, regardless of # whether we're producing pandas output pytest.raises(PatsyError, build_design_matrices, [x_a_builder], {"x": data["x"], "a": data["a"][::-1]}) pytest.raises(PatsyError, build_design_matrices, [y_builder, x_builder], {"x": data["x"], "y": data["y"][::-1]}) # And we also check consistency between data.index and value indexes # Creating a mismatch between these is a bit tricky. We want a data object # such that isinstance(data, DataFrame), but data["x"].index != # data.index. class CheatingDataFrame(pandas.DataFrame): def __getitem__(self, key): if key == "x": return pandas.DataFrame.__getitem__(self, key)[::-1] else: return pandas.DataFrame.__getitem__(self, key) pytest.raises(PatsyError, build_design_matrices, [x_builder], CheatingDataFrame(data)) # A mix of pandas input and unindexed input is fine (mat,) = build_design_matrices([x_y_builder], {"x": data["x"], "y": [40, 50, 60]}) assert np.allclose(mat, [[1, 40], [2, 50], [3, 60]]) # with return_type="dataframe", we get out DataFrames with nice indices # and nice column names and design_info y_df, x_df = build_design_matrices([y_builder, x_builder], data, return_type="dataframe") assert isinstance(y_df, pandas.DataFrame) assert isinstance(x_df, pandas.DataFrame) assert np.array_equal(y_df, [[4], [5], [6]]) assert np.array_equal(x_df, [[1], [2], [3]]) assert np.array_equal(y_df.index, [10, 20, 30]) assert np.array_equal(x_df.index, [10, 20, 30]) assert np.array_equal(y_df.columns, ["y"]) assert np.array_equal(x_df.columns, ["x"]) assert y_df.design_info.column_names == ["y"] assert x_df.design_info.column_names == ["x"] assert y_df.design_info.term_names == ["y"] assert x_df.design_info.term_names == ["x"] # Same with mix of pandas and unindexed info, even if in different # matrices y_df, x_df = build_design_matrices([y_builder, x_builder], {"y": [7, 8, 9], "x": data["x"]}, return_type="dataframe") assert isinstance(y_df, pandas.DataFrame) assert isinstance(x_df, pandas.DataFrame) assert np.array_equal(y_df, [[7], [8], [9]]) assert np.array_equal(x_df, [[1], [2], [3]]) assert np.array_equal(y_df.index, [10, 20, 30]) assert np.array_equal(x_df.index, [10, 20, 30]) assert np.array_equal(y_df.columns, ["y"]) assert np.array_equal(x_df.columns, ["x"]) assert y_df.design_info.column_names == ["y"] assert x_df.design_info.column_names == ["x"] assert y_df.design_info.term_names == ["y"] assert x_df.design_info.term_names == ["x"] # Check categorical works for carrying index too (x_a_df,) = build_design_matrices([x_a_builder], {"x": [-1, -2, -3], "a": data["a"]}, return_type="dataframe") assert isinstance(x_a_df, pandas.DataFrame) assert np.array_equal(x_a_df, [[1, 0, -1], [0, 1, -2], [1, 0, -3]]) assert np.array_equal(x_a_df.index, [10, 20, 30]) # And if we have no indexed input, then we let pandas make up an index as # per its usual rules: (x_y_df,) = build_design_matrices([x_y_builder], {"y": [7, 8, 9], "x": [10, 11, 12]}, return_type="dataframe") assert isinstance(x_y_df, pandas.DataFrame) assert np.array_equal(x_y_df, [[10, 7], [11, 8], [12, 9]]) assert np.array_equal(x_y_df.index, [0, 1, 2]) # If 'data' is a DataFrame, then that suffices, even if no factors are # available. (int_df,) = build_design_matrices([int_builder], data, return_type="dataframe") assert isinstance(int_df, pandas.DataFrame) assert np.array_equal(int_df, [[1], [1], [1]]) assert int_df.index.equals(pandas.Index([10, 20, 30])) import patsy.build had_pandas = patsy.build.have_pandas try: patsy.build.have_pandas = False # return_type="dataframe" gives a nice error if pandas is not available pytest.raises(PatsyError, build_design_matrices, [x_builder], {"x": [1, 2, 3]}, return_type="dataframe") finally: patsy.build.have_pandas = had_pandas x_df, = build_design_matrices([x_a_builder], {"x": [1.0, np.nan, 3.0], "a": np.asarray([None, "a2", "a1"], dtype=object)}, NA_action="drop", return_type="dataframe") assert x_df.index.equals(pandas.Index([2])) def test_data_mismatch(): test_cases_twoway = [ # Data type mismatch ([1, 2, 3], [True, False, True]), (C(["a", "b", "c"], levels=["c", "b", "a"]), C(["a", "b", "c"], levels=["a", "b", "c"])), # column number mismatches ([[1], [2], [3]], [[1, 1], [2, 2], [3, 3]]), ([[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[1, 1], [2, 2], [3, 3]]), ] test_cases_oneway = [ ([1, 2, 3], ["a", "b", "c"]), ([1, 2, 3], C(["a", "b", "c"])), ([True, False, True], C(["a", "b", "c"])), ([True, False, True], ["a", "b", "c"]), ] setup_predict_only = [ # This is not an error if both are fed in during make_builders, but it # is an error to pass one to make_builders and the other to # make_matrices. (["a", "b", "c"], ["a", "b", "d"]), ] termlist = make_termlist(["x"]) def t_incremental(data1, data2): def iter_maker(): yield {"x": data1} yield {"x": data2} try: builders = design_matrix_builders([termlist], iter_maker, 0) build_design_matrices(builders, {"x": data1}) build_design_matrices(builders, {"x": data2}) except PatsyError: pass else: raise AssertionError def t_setup_predict(data1, data2): def iter_maker(): yield {"x": data1} builders = design_matrix_builders([termlist], iter_maker, 0) pytest.raises(PatsyError, build_design_matrices, builders, {"x": data2}) for (a, b) in test_cases_twoway: t_incremental(a, b) t_incremental(b, a) t_setup_predict(a, b) t_setup_predict(b, a) for (a, b) in test_cases_oneway: t_incremental(a, b) t_setup_predict(a, b) for (a, b) in setup_predict_only: t_setup_predict(a, b) t_setup_predict(b, a) pytest.raises(PatsyError, make_matrix, {"x": [1, 2, 3], "y": [1, 2, 3, 4]}, 2, [["x"], ["y"]]) def test_data_independent_builder(): data = {"x": [1, 2, 3]} def iter_maker(): yield data # Trying to build a matrix that doesn't depend on the data at all is an # error, if: # - the index argument is not given # - the data is not a DataFrame # - there are no other matrices null_builder = design_matrix_builders([make_termlist()], iter_maker, 0)[0] pytest.raises(PatsyError, build_design_matrices, [null_builder], data) intercept_builder = design_matrix_builders([make_termlist([])], iter_maker, eval_env=0)[0] pytest.raises(PatsyError, build_design_matrices, [intercept_builder], data) pytest.raises(PatsyError, build_design_matrices, [null_builder, intercept_builder], data) # If data is a DataFrame, it sets the number of rows. if have_pandas: int_m, null_m = build_design_matrices([intercept_builder, null_builder], pandas.DataFrame(data)) assert np.allclose(int_m, [[1], [1], [1]]) assert null_m.shape == (3, 0) # If there are other matrices that do depend on the data, we make the # data-independent matrices have the same number of rows. x_termlist = make_termlist(["x"]) builders = design_matrix_builders([x_termlist, make_termlist()], iter_maker, eval_env=0) x_m, null_m = build_design_matrices(builders, data) assert np.allclose(x_m, [[1], [2], [3]]) assert null_m.shape == (3, 0) builders = design_matrix_builders([x_termlist, make_termlist([])], iter_maker, eval_env=0) x_m, null_m = build_design_matrices(builders, data) x_m, intercept_m = build_design_matrices(builders, data) assert np.allclose(x_m, [[1], [2], [3]]) assert np.allclose(intercept_m, [[1], [1], [1]]) def test_same_factor_in_two_matrices(): data = {"x": [1, 2, 3], "a": ["a1", "a2", "a1"]} def iter_maker(): yield data t1 = make_termlist(["x"]) t2 = make_termlist(["x", "a"]) builders = design_matrix_builders([t1, t2], iter_maker, eval_env=0) m1, m2 = build_design_matrices(builders, data) check_design_matrix(m1, 1, t1, column_names=["x"]) assert np.allclose(m1, [[1], [2], [3]]) check_design_matrix(m2, 2, t2, column_names=["x:a[a1]", "x:a[a2]"]) assert np.allclose(m2, [[1, 0], [0, 2], [3, 0]]) def test_eval_env_type_builder(): data = {"x": [1, 2, 3]} def iter_maker(): yield data pytest.raises(TypeError, design_matrix_builders, [make_termlist("x")], iter_maker, "foo") def test_categorical(): data_strings = {"a": ["a1", "a2", "a1"]} data_categ = {"a": C(["a2", "a1", "a2"])} datas = [data_strings, data_categ] if have_pandas_categorical: data_pandas = {"a": pandas.Categorical(["a1", "a2", "a2"])} datas.append(data_pandas) def t(data1, data2): def iter_maker(): yield data1 builders = design_matrix_builders([make_termlist(["a"])], iter_maker, eval_env=0) build_design_matrices(builders, data2) for data1 in datas: for data2 in datas: t(data1, data2) def test_contrast(): from patsy.contrasts import ContrastMatrix, Sum values = ["a1", "a3", "a1", "a2"] # No intercept in model, full-rank coding of 'a' m = make_matrix({"a": C(values)}, 3, [["a"]], column_names=["a[a1]", "a[a2]", "a[a3]"]) assert np.allclose(m, [[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]]) for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [["a"]], column_names=["a[mean]", "a[S.a1]", "a[S.a2]"]) # Output from R assert np.allclose(m, [[1, 1, 0], [1,-1, -1], [1, 1, 0], [1, 0, 1]]) m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [["a"]], column_names=["a[mean]", "a[S.a2]", "a[S.a3]"]) # Output from R assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]]) # Intercept in model, non-full-rank coding of 'a' m = make_matrix({"a": C(values)}, 3, [[], ["a"]], column_names=["Intercept", "a[T.a2]", "a[T.a3]"]) assert np.allclose(m, [[1, 0, 0], [1, 0, 1], [1, 0, 0], [1, 1, 0]]) for s in (Sum, Sum()): m = make_matrix({"a": C(values, s)}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a1]", "a[S.a2]"]) # Output from R assert np.allclose(m, [[1, 1, 0], [1,-1, -1], [1, 1, 0], [1, 0, 1]]) m = make_matrix({"a": C(values, Sum(omit=0))}, 3, [[], ["a"]], column_names=["Intercept", "a[S.a2]", "a[S.a3]"]) # Output from R assert np.allclose(m, [[1, -1, -1], [1, 0, 1], [1, -1, -1], [1, 1, 0]]) # Weird ad hoc less-than-full-rank coding of 'a' m = make_matrix({"a": C(values, [[7, 12], [2, 13], [8, -1]])}, 2, [["a"]], column_names=["a[custom0]", "a[custom1]"]) assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]]) m = make_matrix({"a": C(values, ContrastMatrix([[7, 12], [2, 13], [8, -1]], ["[foo]", "[bar]"]))}, 2, [["a"]], column_names=["a[foo]", "a[bar]"]) assert np.allclose(m, [[7, 12], [8, -1], [7, 12], [2, 13]]) def test_DesignInfo_subset(): # For each combination of: # formula, term names, term objects, mixed term name and term objects # check that results match subset of full build # and that removed variables don't hurt all_data = {"x": [1, 2], "y": [[3.1, 3.2], [4.1, 4.2]], "z": [5, 6]} all_terms = make_termlist("x", "y", "z") def iter_maker(): yield all_data all_builder = design_matrix_builders([all_terms], iter_maker, 0)[0] full_matrix = build_design_matrices([all_builder], all_data)[0] def t(which_terms, variables, columns): sub_design_info = all_builder.subset(which_terms) sub_data = {} for variable in variables: sub_data[variable] = all_data[variable] sub_matrix = build_design_matrices([sub_design_info], sub_data)[0] sub_full_matrix = full_matrix[:, columns] if not isinstance(which_terms, six.string_types): assert len(which_terms) == len(sub_design_info.terms) assert np.array_equal(sub_matrix, sub_full_matrix) t("~ 0 + x + y + z", ["x", "y", "z"], slice(None)) t(["x", "y", "z"], ["x", "y", "z"], slice(None)) # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in # all versions. if not six.PY3: t([unicode("x"), unicode("y"), unicode("z")], ["x", "y", "z"], slice(None)) t(all_terms, ["x", "y", "z"], slice(None)) t([all_terms[0], "y", all_terms[2]], ["x", "y", "z"], slice(None)) t("~ 0 + x + z", ["x", "z"], [0, 3]) t(["x", "z"], ["x", "z"], [0, 3]) # Compatibility: six.PY2 wasn't added until 1.4.0, but six.PY3 exists in # all versions. if not six.PY3: t([unicode("x"), unicode("z")], ["x", "z"], [0, 3]) t([all_terms[0], all_terms[2]], ["x", "z"], [0, 3]) t([all_terms[0], "z"], ["x", "z"], [0, 3]) t("~ 0 + z + x", ["x", "z"], [3, 0]) t(["z", "x"], ["x", "z"], [3, 0]) t([six.text_type("z"), six.text_type("x")], ["x", "z"], [3, 0]) t([all_terms[2], all_terms[0]], ["x", "z"], [3, 0]) t([all_terms[2], "x"], ["x", "z"], [3, 0]) t("~ 0 + y", ["y"], [1, 2]) t(["y"], ["y"], [1, 2]) t([six.text_type("y")], ["y"], [1, 2]) t([all_terms[1]], ["y"], [1, 2]) # Formula can't have a LHS pytest.raises(PatsyError, all_builder.subset, "a ~ a") # Term must exist pytest.raises(KeyError, all_builder.subset, "~ asdf") pytest.raises(KeyError, all_builder.subset, ["asdf"]) pytest.raises(KeyError, all_builder.subset, [Term(["asdf"])]) # Also check for a minimal DesignInfo (column names only) min_di = DesignInfo(["a", "b", "c"]) min_di_subset = min_di.subset(["c", "a"]) assert min_di_subset.column_names == ["c", "a"] assert min_di_subset.terms is None patsy-0.5.2/patsy/test_highlevel.py000066400000000000000000000700611412400214200173700ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012-2013 Nathaniel Smith # See file LICENSE.txt for license information. # Exhaustive end-to-end tests of the top-level API. import sys import __future__ import six import numpy as np import pytest from patsy import PatsyError from patsy.design_info import DesignMatrix, DesignInfo from patsy.eval import EvalEnvironment from patsy.desc import ModelDesc, Term, INTERCEPT from patsy.categorical import C from patsy.contrasts import Helmert from patsy.user_util import balanced, LookupFactor from patsy.build import (design_matrix_builders, build_design_matrices) from patsy.highlevel import * from patsy.util import (have_pandas, have_pandas_categorical, have_pandas_categorical_dtype, pandas_Categorical_from_codes) from patsy.origin import Origin if have_pandas: import pandas def check_result(expect_full_designs, lhs, rhs, data, expected_rhs_values, expected_rhs_names, expected_lhs_values, expected_lhs_names): # pragma: no cover assert np.allclose(rhs, expected_rhs_values) assert rhs.design_info.column_names == expected_rhs_names if lhs is not None: assert np.allclose(lhs, expected_lhs_values) assert lhs.design_info.column_names == expected_lhs_names else: assert expected_lhs_values is None assert expected_lhs_names is None if expect_full_designs: if lhs is None: new_rhs, = build_design_matrices([rhs.design_info], data) else: new_lhs, new_rhs = build_design_matrices([lhs.design_info, rhs.design_info], data) assert np.allclose(new_lhs, lhs) assert new_lhs.design_info.column_names == expected_lhs_names assert np.allclose(new_rhs, rhs) assert new_rhs.design_info.column_names == expected_rhs_names else: assert rhs.design_info.terms is None assert lhs is None or lhs.design_info.terms is None def dmatrix_pandas(formula_like, data={}, depth=0, return_type="matrix"): return_type = "dataframe" if isinstance(depth, int): depth += 1 return dmatrix(formula_like, data, depth, return_type=return_type) def dmatrices_pandas(formula_like, data={}, depth=0, return_type="matrix"): return_type = "dataframe" if isinstance(depth, int): depth += 1 return dmatrices(formula_like, data, depth, return_type=return_type) def t(formula_like, data, depth, expect_full_designs, expected_rhs_values, expected_rhs_names, expected_lhs_values=None, expected_lhs_names=None): # pragma: no cover if isinstance(depth, int): depth += 1 def data_iter_maker(): return iter([data]) if (isinstance(formula_like, six.string_types + (ModelDesc, DesignInfo)) or (isinstance(formula_like, tuple) and isinstance(formula_like[0], DesignInfo)) or hasattr(formula_like, "__patsy_get_model_desc__")): if expected_lhs_values is None: builder = incr_dbuilder(formula_like, data_iter_maker, depth) lhs = None (rhs,) = build_design_matrices([builder], data) else: builders = incr_dbuilders(formula_like, data_iter_maker, depth) lhs, rhs = build_design_matrices(builders, data) check_result(expect_full_designs, lhs, rhs, data, expected_rhs_values, expected_rhs_names, expected_lhs_values, expected_lhs_names) else: pytest.raises(PatsyError, incr_dbuilders, formula_like, data_iter_maker) pytest.raises(PatsyError, incr_dbuilder, formula_like, data_iter_maker) one_mat_fs = [dmatrix] two_mat_fs = [dmatrices] if have_pandas: one_mat_fs.append(dmatrix_pandas) two_mat_fs.append(dmatrices_pandas) if expected_lhs_values is None: for f in one_mat_fs: rhs = f(formula_like, data, depth) check_result(expect_full_designs, None, rhs, data, expected_rhs_values, expected_rhs_names, expected_lhs_values, expected_lhs_names) # We inline assert_raises here to avoid complications with the # depth argument. for f in two_mat_fs: try: f(formula_like, data, depth) except PatsyError: pass else: raise AssertionError else: for f in one_mat_fs: try: f(formula_like, data, depth) except PatsyError: pass else: raise AssertionError for f in two_mat_fs: (lhs, rhs) = f(formula_like, data, depth) check_result(expect_full_designs, lhs, rhs, data, expected_rhs_values, expected_rhs_names, expected_lhs_values, expected_lhs_names) def t_invalid(formula_like, data, depth, exc=PatsyError): # pragma: no cover if isinstance(depth, int): depth += 1 fs = [dmatrix, dmatrices] if have_pandas: fs += [dmatrix_pandas, dmatrices_pandas] for f in fs: try: f(formula_like, data, depth) except exc: pass else: raise AssertionError # Exercise all the different calling conventions for the high-level API def test_formula_likes(): # Plain array-like, rhs only t([[1, 2, 3], [4, 5, 6]], {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"]) t((None, [[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"]) t(np.asarray([[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"]) t((None, np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"]) dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo") t(dm, {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"]) t((None, dm), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"]) # Plain array-likes, lhs and rhs t(([1, 2], [[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"]) t(([[1], [2]], [[1, 2, 3], [4, 5, 6]]), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"]) t((np.asarray([1, 2]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"]) t((np.asarray([[1], [2]]), np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"], [[1], [2]], ["y0"]) x_dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo") y_dm = DesignMatrix([1, 2], default_column_prefix="bar") t((y_dm, x_dm), {}, 0, False, [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"], [[1], [2]], ["bar0"]) # number of rows must match t_invalid(([1, 2, 3], [[1, 2, 3], [4, 5, 6]]), {}, 0) # tuples must have the right size t_invalid(([[1, 2, 3]],), {}, 0) t_invalid(([[1, 2, 3]], [[1, 2, 3]], [[1, 2, 3]]), {}, 0) # plain Series and DataFrames if have_pandas: # Names are extracted t(pandas.DataFrame({"x": [1, 2, 3]}), {}, 0, False, [[1], [2], [3]], ["x"]) t(pandas.Series([1, 2, 3], name="asdf"), {}, 0, False, [[1], [2], [3]], ["asdf"]) t((pandas.DataFrame({"y": [4, 5, 6]}), pandas.DataFrame({"x": [1, 2, 3]})), {}, 0, False, [[1], [2], [3]], ["x"], [[4], [5], [6]], ["y"]) t((pandas.Series([4, 5, 6], name="y"), pandas.Series([1, 2, 3], name="x")), {}, 0, False, [[1], [2], [3]], ["x"], [[4], [5], [6]], ["y"]) # Or invented t((pandas.DataFrame([[4, 5, 6]]), pandas.DataFrame([[1, 2, 3]], columns=[7, 8, 9])), {}, 0, False, [[1, 2, 3]], ["x7", "x8", "x9"], [[4, 5, 6]], ["y0", "y1", "y2"]) t(pandas.Series([1, 2, 3]), {}, 0, False, [[1], [2], [3]], ["x0"]) # indices must match t_invalid((pandas.DataFrame([[1]], index=[1]), pandas.DataFrame([[1]], index=[2])), {}, 0) # Foreign ModelDesc factories class ForeignModelSource(object): def __patsy_get_model_desc__(self, data): return ModelDesc([Term([LookupFactor("Y")])], [Term([LookupFactor("X")])]) foreign_model = ForeignModelSource() t(foreign_model, {"Y": [1, 2], "X": [[1, 2], [3, 4]]}, 0, True, [[1, 2], [3, 4]], ["X[0]", "X[1]"], [[1], [2]], ["Y"]) class BadForeignModelSource(object): def __patsy_get_model_desc__(self, data): return data t_invalid(BadForeignModelSource(), {}, 0) # string formulas t("y ~ x", {"y": [1, 2], "x": [3, 4]}, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"], [[1], [2]], ["y"]) t("~ x", {"y": [1, 2], "x": [3, 4]}, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"]) t("x + y", {"y": [1, 2], "x": [3, 4]}, 0, True, [[1, 3, 1], [1, 4, 2]], ["Intercept", "x", "y"]) # unicode objects on py2 (must be ascii only) if not six.PY3: # ascii is fine t(unicode("y ~ x"), {"y": [1, 2], "x": [3, 4]}, 0, True, [[1, 3], [1, 4]], ["Intercept", "x"], [[1], [2]], ["y"]) # non-ascii is not (even if this would be valid on py3 with its less # restrict variable naming rules) eacute = "\xc3\xa9".decode("utf-8") assert isinstance(eacute, unicode) pytest.raises(PatsyError, dmatrix, eacute, data={eacute: [1, 2]}) # ModelDesc desc = ModelDesc([], [Term([LookupFactor("x")])]) t(desc, {"x": [1.5, 2.5, 3.5]}, 0, True, [[1.5], [2.5], [3.5]], ["x"]) desc = ModelDesc([], [Term([]), Term([LookupFactor("x")])]) t(desc, {"x": [1.5, 2.5, 3.5]}, 0, True, [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"]) desc = ModelDesc([Term([LookupFactor("y")])], [Term([]), Term([LookupFactor("x")])]) t(desc, {"x": [1.5, 2.5, 3.5], "y": [10, 20, 30]}, 0, True, [[1, 1.5], [1, 2.5], [1, 3.5]], ["Intercept", "x"], [[10], [20], [30]], ["y"]) # builders termlists = ([], [Term([LookupFactor("x")])], [Term([]), Term([LookupFactor("x")])], ) builders = design_matrix_builders(termlists, lambda: iter([{"x": [1, 2, 3]}]), eval_env=0) # twople but with no LHS t((builders[0], builders[2]), {"x": [10, 20, 30]}, 0, True, [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"]) # single DesignInfo t(builders[2], {"x": [10, 20, 30]}, 0, True, [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"]) # twople with LHS t((builders[1], builders[2]), {"x": [10, 20, 30]}, 0, True, [[1, 10], [1, 20], [1, 30]], ["Intercept", "x"], [[10], [20], [30]], ["x"]) # check depth arguments x_in_env = [1, 2, 3] t("~ x_in_env", {}, 0, True, [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"]) t("~ x_in_env", {"x_in_env": [10, 20, 30]}, 0, True, [[1, 10], [1, 20], [1, 30]], ["Intercept", "x_in_env"]) # Trying to pull x_in_env out of our *caller* shouldn't work. t_invalid("~ x_in_env", {}, 1, exc=(NameError, PatsyError)) # But then again it should, if called from one down on the stack: def check_nested_call(): x_in_env = "asdf" t("~ x_in_env", {}, 1, True, [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"]) check_nested_call() # passing in an explicit EvalEnvironment also works: e = EvalEnvironment.capture(1) t_invalid("~ x_in_env", {}, e, exc=(NameError, PatsyError)) e = EvalEnvironment.capture(0) def check_nested_call_2(): x_in_env = "asdf" t("~ x_in_env", {}, e, True, [[1, 1], [1, 2], [1, 3]], ["Intercept", "x_in_env"]) check_nested_call_2() def test_return_pandas(): if not have_pandas: return # basic check of pulling a Series out of the environment s1 = pandas.Series([1, 2, 3], name="AA", index=[10, 20, 30]) s2 = pandas.Series([4, 5, 6], name="BB", index=[10, 20, 30]) df1 = dmatrix("s1", return_type="dataframe") assert np.allclose(df1, [[1, 1], [1, 2], [1, 3]]) assert np.array_equal(df1.columns, ["Intercept", "s1"]) assert df1.design_info.column_names == ["Intercept", "s1"] assert np.array_equal(df1.index, [10, 20, 30]) df2, df3 = dmatrices("s2 ~ s1", return_type="dataframe") assert np.allclose(df2, [[4], [5], [6]]) assert np.array_equal(df2.columns, ["s2"]) assert df2.design_info.column_names == ["s2"] assert np.array_equal(df2.index, [10, 20, 30]) assert np.allclose(df3, [[1, 1], [1, 2], [1, 3]]) assert np.array_equal(df3.columns, ["Intercept", "s1"]) assert df3.design_info.column_names == ["Intercept", "s1"] assert np.array_equal(df3.index, [10, 20, 30]) # indices are preserved if pandas is passed in directly df4 = dmatrix(s1, return_type="dataframe") assert np.allclose(df4, [[1], [2], [3]]) assert np.array_equal(df4.columns, ["AA"]) assert df4.design_info.column_names == ["AA"] assert np.array_equal(df4.index, [10, 20, 30]) df5, df6 = dmatrices((s2, s1), return_type="dataframe") assert np.allclose(df5, [[4], [5], [6]]) assert np.array_equal(df5.columns, ["BB"]) assert df5.design_info.column_names == ["BB"] assert np.array_equal(df5.index, [10, 20, 30]) assert np.allclose(df6, [[1], [2], [3]]) assert np.array_equal(df6.columns, ["AA"]) assert df6.design_info.column_names == ["AA"] assert np.array_equal(df6.index, [10, 20, 30]) # Both combinations of with-index and without-index df7, df8 = dmatrices((s1, [10, 11, 12]), return_type="dataframe") assert np.array_equal(df7.index, s1.index) assert np.array_equal(df8.index, s1.index) df9, df10 = dmatrices(([10, 11, 12], s1), return_type="dataframe") assert np.array_equal(df9.index, s1.index) assert np.array_equal(df10.index, s1.index) # pandas must be available import patsy.highlevel had_pandas = patsy.highlevel.have_pandas try: patsy.highlevel.have_pandas = False pytest.raises(PatsyError, dmatrix, "x", {"x": [1]}, 0, return_type="dataframe") pytest.raises(PatsyError, dmatrices, "y ~ x", {"x": [1], "y": [2]}, 0, return_type="dataframe") finally: patsy.highlevel.have_pandas = had_pandas def test_term_info(): data = balanced(a=2, b=2) rhs = dmatrix("a:b", data) assert rhs.design_info.column_names == ["Intercept", "b[T.b2]", "a[T.a2]:b[b1]", "a[T.a2]:b[b2]"] assert rhs.design_info.term_names == ["Intercept", "a:b"] assert len(rhs.design_info.terms) == 2 assert rhs.design_info.terms[0] == INTERCEPT def test_data_types(): data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": np.asarray([1, 2, 3], dtype=np.float32), "d": [True, False, True], "e": ["foo", "bar", "baz"], "f": C([1, 2, 3]), "g": C(["foo", "bar", "baz"]), "h": np.array(["foo", 1, (1, "hi")], dtype=object), } t("~ 0 + a", data, 0, True, [[1], [2], [3]], ["a"]) t("~ 0 + b", data, 0, True, [[1], [2], [3]], ["b"]) t("~ 0 + c", data, 0, True, [[1], [2], [3]], ["c"]) t("~ 0 + d", data, 0, True, [[0, 1], [1, 0], [0, 1]], ["d[False]", "d[True]"]) t("~ 0 + e", data, 0, True, [[0, 0, 1], [1, 0, 0], [0, 1, 0]], ["e[bar]", "e[baz]", "e[foo]"]) t("~ 0 + f", data, 0, True, [[1, 0, 0], [0, 1, 0], [0, 0, 1]], ["f[1]", "f[2]", "f[3]"]) t("~ 0 + g", data, 0, True, [[0, 0, 1], [1, 0, 0], [0, 1, 0]], ["g[bar]", "g[baz]", "g[foo]"]) # This depends on Python's sorting behavior: t("~ 0 + h", data, 0, True, [[0, 1, 0], [1, 0, 0], [0, 0, 1]], ["h[1]", "h[foo]", "h[(1, 'hi')]"]) def test_categorical(): data = balanced(a=2, b=2) # There are more exhaustive tests for all the different coding options in # test_build; let's just make sure that C() and stuff works. t("~ C(a)", data, 0, True, [[1, 0], [1, 0], [1, 1], [1, 1]], ["Intercept", "C(a)[T.a2]"]) t("~ C(a, levels=['a2', 'a1'])", data, 0, True, [[1, 1], [1, 1], [1, 0], [1, 0]], ["Intercept", "C(a, levels=['a2', 'a1'])[T.a1]"]) t("~ C(a, Treatment(reference=-1))", data, 0, True, [[1, 1], [1, 1], [1, 0], [1, 0]], ["Intercept", "C(a, Treatment(reference=-1))[T.a1]"]) # Different interactions t("a*b", data, 0, True, [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 1, 1]], ["Intercept", "a[T.a2]", "b[T.b2]", "a[T.a2]:b[T.b2]"]) t("0 + a:b", data, 0, True, [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]], ["a[a1]:b[b1]", "a[a2]:b[b1]", "a[a1]:b[b2]", "a[a2]:b[b2]"]) t("1 + a + a:b", data, 0, True, [[1, 0, 0, 0], [1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 0, 1]], ["Intercept", "a[T.a2]", "a[a1]:b[T.b2]", "a[a2]:b[T.b2]"]) # Changing contrast with C() data["a"] = C(data["a"], Helmert) t("a", data, 0, True, [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"]) t("C(a, Treatment)", data, 0, True, [[1, 0], [1, 0], [1, 1], [1, 1]], ["Intercept", "C(a, Treatment)[T.a2]"]) # That didn't affect the original object t("a", data, 0, True, [[1, -1], [1, -1], [1, 1], [1, 1]], ["Intercept", "a[H.a2]"]) def test_builtins(): data = {"x": [1, 2, 3], "y": [4, 5, 6], "a b c": [10, 20, 30]} t("0 + I(x + y)", data, 0, True, [[1], [2], [3], [4], [5], [6]], ["I(x + y)"]) t("Q('a b c')", data, 0, True, [[1, 10], [1, 20], [1, 30]], ["Intercept", "Q('a b c')"]) t("center(x)", data, 0, True, [[1, -1], [1, 0], [1, 1]], ["Intercept", "center(x)"]) def test_incremental(): # incr_dbuilder(s) # stateful transformations datas = [ {"a": ["a2", "a2", "a2"], "x": [1, 2, 3]}, {"a": ["a2", "a2", "a1"], "x": [4, 5, 6]}, ] x = np.asarray([1, 2, 3, 4, 5, 6]) sin_center_x = np.sin(x - np.mean(x)) x_col = sin_center_x - np.mean(sin_center_x) def data_iter_maker(): return iter(datas) builders = incr_dbuilders("1 ~ a + center(np.sin(center(x)))", data_iter_maker) lhs, rhs = build_design_matrices(builders, datas[1]) assert lhs.design_info.column_names == ["Intercept"] assert rhs.design_info.column_names == ["Intercept", "a[T.a2]", "center(np.sin(center(x)))"] assert np.allclose(lhs, [[1], [1], [1]]) assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:]))) builder = incr_dbuilder("~ a + center(np.sin(center(x)))", data_iter_maker) (rhs,) = build_design_matrices([builder], datas[1]) assert rhs.design_info.column_names == ["Intercept", "a[T.a2]", "center(np.sin(center(x)))"] assert np.allclose(lhs, [[1], [1], [1]]) assert np.allclose(rhs, np.column_stack(([1, 1, 1], [1, 1, 0], x_col[3:]))) pytest.raises(PatsyError, incr_dbuilder, "x ~ x", data_iter_maker) pytest.raises(PatsyError, incr_dbuilders, "x", data_iter_maker) def test_env_transform(): t("~ np.sin(x)", {"x": [1, 2, 3]}, 0, True, [[1, np.sin(1)], [1, np.sin(2)], [1, np.sin(3)]], ["Intercept", "np.sin(x)"]) # Term ordering: # 1) all 0-order no-numeric # 2) all 1st-order no-numeric # 3) all 2nd-order no-numeric # 4) ... # 5) all 0-order with the first numeric interaction encountered # 6) all 1st-order with the first numeric interaction encountered # 7) ... # 8) all 0-order with the second numeric interaction encountered # 9) ... def test_term_order(): data = balanced(a=2, b=2) data["x1"] = np.linspace(0, 1, 4) data["x2"] = data["x1"] ** 2 def t_terms(formula, order): m = dmatrix(formula, data) assert m.design_info.term_names == order t_terms("a + b + x1 + x2", ["Intercept", "a", "b", "x1", "x2"]) t_terms("b + a + x2 + x1", ["Intercept", "b", "a", "x2", "x1"]) t_terms("0 + x1 + a + x2 + b + 1", ["Intercept", "a", "b", "x1", "x2"]) t_terms("0 + a:b + a + b + 1", ["Intercept", "a", "b", "a:b"]) t_terms("a + a:x1 + x2 + x1 + b", ["Intercept", "a", "b", "x1", "a:x1", "x2"]) t_terms("0 + a:x1:x2 + a + x2:x1:b + x2 + x1 + a:x1 + x1:x2 + x1:a:x2:a:b", ["a", "x1:x2", "a:x1:x2", "x2:x1:b", "x1:a:x2:b", "x2", "x1", "a:x1"]) def _check_division(expect_true_division): # pragma: no cover # We evaluate the formula "I(x / y)" in our *caller's* scope, so the # result depends on whether our caller has done 'from __future__ import # division'. data = {"x": 5, "y": 2} m = dmatrix("0 + I(x / y)", data, 1) if expect_true_division: assert np.allclose(m, [[2.5]]) else: assert np.allclose(m, [[2]]) def test_future(): if __future__.division.getMandatoryRelease() < sys.version_info: # This is Python 3, where division is already default return # no __future__.division in this module's scope _check_division(False) # create an execution context where __future__.division is in effect exec ("from __future__ import division\n" "_check_division(True)\n") def test_multicolumn(): data = { "a": ["a1", "a2"], "X": [[1, 2], [3, 4]], "Y": [[1, 3], [2, 4]], } t("X*Y", data, 0, True, [[1, 1, 2, 1, 3, 1 * 1, 2 * 1, 1 * 3, 2 * 3], [1, 3, 4, 2, 4, 3 * 2, 4 * 2, 3 * 4, 4 * 4]], ["Intercept", "X[0]", "X[1]", "Y[0]", "Y[1]", "X[0]:Y[0]", "X[1]:Y[0]", "X[0]:Y[1]", "X[1]:Y[1]"]) t("a:X + Y", data, 0, True, [[1, 1, 0, 2, 0, 1, 3], [1, 0, 3, 0, 4, 2, 4]], ["Intercept", "a[a1]:X[0]", "a[a2]:X[0]", "a[a1]:X[1]", "a[a2]:X[1]", "Y[0]", "Y[1]"]) def test_dmatrix_dmatrices_no_data(): x = [1, 2, 3] y = [4, 5, 6] assert np.allclose(dmatrix("x"), [[1, 1], [1, 2], [1, 3]]) lhs, rhs = dmatrices("y ~ x") assert np.allclose(lhs, [[4], [5], [6]]) assert np.allclose(rhs, [[1, 1], [1, 2], [1, 3]]) def test_designinfo_describe(): lhs, rhs = dmatrices("y ~ x + a", {"y": [1, 2, 3], "x": [4, 5, 6], "a": ["a1", "a2", "a3"]}) assert lhs.design_info.describe() == "y" assert rhs.design_info.describe() == "1 + a + x" def test_evalfactor_reraise(): # This will produce a PatsyError, but buried inside the factor evaluation, # so the original code has no way to give it an appropriate origin= # attribute. EvalFactor should notice this, and add a useful origin: def raise_patsy_error(x): raise PatsyError("WHEEEEEE") formula = "raise_patsy_error(X) + Y" try: dmatrix(formula, {"X": [1, 2, 3], "Y": [4, 5, 6]}) except PatsyError as e: assert e.origin == Origin(formula, 0, formula.index(" ")) else: assert False # This will produce a KeyError, which on Python 3 we can do wrap without # destroying the traceback, so we do so. On Python 2 we let the original # exception escape. try: dmatrix("1 + x[1]", {"x": {}}) except Exception as e: if sys.version_info[0] >= 3: assert isinstance(e, PatsyError) assert e.origin == Origin("1 + x[1]", 4, 8) else: assert isinstance(e, KeyError) else: assert False def test_dmatrix_NA_action(): data = {"x": [1, 2, 3, np.nan], "y": [np.nan, 20, 30, 40]} return_types = ["matrix"] if have_pandas: return_types.append("dataframe") for return_type in return_types: mat = dmatrix("x + y", data=data, return_type=return_type) assert np.array_equal(mat, [[1, 2, 20], [1, 3, 30]]) if return_type == "dataframe": assert mat.index.equals(pandas.Index([1, 2])) pytest.raises(PatsyError, dmatrix, "x + y", data=data, return_type=return_type, NA_action="raise") lmat, rmat = dmatrices("y ~ x", data=data, return_type=return_type) assert np.array_equal(lmat, [[20], [30]]) assert np.array_equal(rmat, [[1, 2], [1, 3]]) if return_type == "dataframe": assert lmat.index.equals(pandas.Index([1, 2])) assert rmat.index.equals(pandas.Index([1, 2])) pytest.raises(PatsyError, dmatrices, "y ~ x", data=data, return_type=return_type, NA_action="raise") # Initial release for the NA handling code had problems with # non-data-dependent matrices like "~ 1". lmat, rmat = dmatrices("y ~ 1", data=data, return_type=return_type) assert np.array_equal(lmat, [[20], [30], [40]]) assert np.array_equal(rmat, [[1], [1], [1]]) if return_type == "dataframe": assert lmat.index.equals(pandas.Index([1, 2, 3])) assert rmat.index.equals(pandas.Index([1, 2, 3])) pytest.raises(PatsyError, dmatrices, "y ~ 1", data=data, return_type=return_type, NA_action="raise") def test_0d_data(): # Use case from statsmodels/statsmodels#1881 data_0d = {"x1": 1.1, "x2": 1.2, "a": "a1"} for formula, expected in [ ("x1 + x2", [[1, 1.1, 1.2]]), ("C(a, levels=('a1', 'a2')) + x1", [[1, 0, 1.1]]), ]: mat = dmatrix(formula, data_0d) assert np.allclose(mat, expected) assert np.allclose(build_design_matrices([mat.design_info], data_0d)[0], expected) if have_pandas: data_series = pandas.Series(data_0d) assert np.allclose(dmatrix(formula, data_series), expected) assert np.allclose(build_design_matrices([mat.design_info], data_series)[0], expected) def test_env_not_saved_in_builder(): x_in_env = [1, 2, 3] design_matrix = dmatrix("x_in_env", {}) x_in_env = [10, 20, 30] design_matrix2 = dmatrix(design_matrix.design_info, {}) assert np.allclose(design_matrix, design_matrix2) def test_C_and_pandas_categorical(): if not have_pandas_categorical: return objs = [pandas_Categorical_from_codes([1, 0, 1], ["b", "a"])] if have_pandas_categorical_dtype: objs.append(pandas.Series(objs[0])) for obj in objs: d = {"obj": obj} assert np.allclose(dmatrix("obj", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj)", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d), [[1, 0], [1, 1], [1, 0]]) patsy-0.5.2/patsy/test_regressions.py000066400000000000000000000015271412400214200177650ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2013 Nathaniel Smith # See file LICENSE.txt for license information. # Regression tests for fixed bugs (when not otherwise better covered somewhere # else) from patsy import (EvalEnvironment, dmatrix, build_design_matrices, PatsyError, Origin) def test_issue_11(): # Give a sensible error message for level mismatches # (At some points we've failed to put an origin= on these errors) env = EvalEnvironment.capture() data = {"X" : [0,1,2,3], "Y" : [1,2,3,4]} formula = "C(X) + Y" new_data = {"X" : [0,0,1,2,3,3,4], "Y" : [1,2,3,4,5,6,7]} info = dmatrix(formula, data) try: build_design_matrices([info.design_info], new_data) except PatsyError as e: assert e.origin == Origin(formula, 0, 4) else: assert False patsy-0.5.2/patsy/test_splines_bs_data.py000066400000000000000000004312061412400214200205550ustar00rootroot00000000000000# This file auto-generated by tools/get-R-bs-test-vectors.R # Using: R version 2.15.1 (2012-06-22) import numpy as np R_bs_test_x = np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.893890380859375, 656.8408355712890625, 985.26125335693359375, 1477.8918800354003906, 2216.8378200531005859, ]) R_bs_test_data = """ --BEGIN TEST CASE-- degree=1 df=3 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.98937395581474985029, 0.97343488953687462573, 0.94952629012006184439, 0.91366339099484261688, 0.85986904230701377561, 0.77917751927527056921, 0.65814023472765570411, 0.47658430790623346196, 0.20425041767410004323, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.010626044185250137566, 0.026565110463125343049, 0.050473709879938155609, 0.086336609005157369245, 0.14013095769298619664, 0.22082248072472943079, 0.34185976527234429589, 0.52341569209376659355, 0.79574958232589998453, 0.99556855753085560234, 0.98227423012342263142, 0.96233273901227300851, 0.93242050234554862964, 0.88755214734546206135, 0.82024961484533231992, 0.71929581609513748575, 0.56786511796984540101, 0.34071907078190727391, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0044314424691443508181, 0.017725769876577403272, 0.037667260987726984556, 0.067579497654451342603, 0.11244785265453789702, 0.17975038515466773559, 0.28070418390486245874, 0.43213488203015459899, 0.6592809292180927816, 1, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=5 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.9161205766710354137, 0.79030144167758842322, 0.60157273918741804852, 0.31847968545216254199, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.083879423328964614059, 0.20969855832241152127, 0.39842726081258189597, 0.68152031454783745801, 0.9846005774783446185, 0.89220404234841199642, 0.7536092396535130078, 0.54571703561116458037, 0.23387872954764196698, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015399422521655438748, 0.10779595765158807297, 0.24639076034648701996, 0.45428296438883541963, 0.76612127045235811629, 0.96572040707016604255, 0.86288162828066417021, 0.7086234600964114172, 0.47723620782003217666, 0.13015532940546331586, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.034279592929833957449, 0.13711837171933582979, 0.29137653990358863831, 0.52276379217996793436, 0.86984467059453673965, 0.94202898550724645244, 0.82608695652173913526, 0.65217391304347827052, 0.39130434782608697342, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.057971014492753623892, 0.17391304347826089249, 0.34782608695652178499, 0.60869565217391308209, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=12 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.52173913043478281626, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.47826086956521718374, 0.90243902439024414885, 0.36585365853658557977, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.097560975609755906657, 0.63414634146341442023, 0.77777777777777779011, 0.16666666666666651864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.22222222222222218213, 0.83333333333333348136, 0.625, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.37499999999999994449, 0.96992481203007530066, 0.47368421052631592971, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.030075187969924695869, 0.52631578947368407029, 0.86440677966101697738, 0.30508474576271171763, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.13559322033898302262, 0.69491525423728828237, 0.72815533980582491935, 0.087378640776698143777, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.27184466019417513616, 0.91262135922330189786, 0.57446808510638269762, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42553191489361730238, 0.9375, 0.4218750000000004996, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.062499999999999958367, 0.57812499999999944489, 0.82300884955752262595, 0.23893805309734547637, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.17699115044247745732, 0.76106194690265460689, 0.67346938775510212238, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.32653061224489787762, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=None knots=np.array([]) output=np.array([1, 0.99977435171677508929, 0.9994358792919375567, 0.99892817065468131332, 0.99816660769879694826, 0.99702426326497040066, 0.99531074661423057925, 0.99274047163812084715, 0.98888505917395630451, 0.98310194047770937953, 0.97442726243333910308, 0.96141524536678357737, 0.94189721976695039984, 0.91262018136720068906, 0.86870462376757595635, 0.8028312873681389128, 0.70402128276898334747, 0.55580627587024999947, 0.33348376552215003299, 0, 0, 0.00022564828322499611425, 0.00056412070806249024497, 0.001071829345318731563, 0.0018333923012030933775, 0.0029757367350296362075, 0.0046892533857694502358, 0.0072595283618791719288, 0.011114940826043754468, 0.01689805952229062741, 0.025572737566660935088, 0.038584754633216401809, 0.058102780233049600156, 0.087379818632799394207, 0.13129537623242407141, 0.19716871263186111496, 0.29597871723101670804, 0.44419372412975000053, 0.66651623447785002252, 1, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=None knots=np.array([100, ]) output=np.array([1, 0.994949494949495028, 0.98737373737373745897, 0.97601010101010110542, 0.9589646464646465196, 0.93339646464646475188, 0.89504419191919204479, 0.83751578282828287314, 0.75122316919191922668, 0.62178424873737381251, 0.42762586805555558023, 0.13638829703282828731, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0050505050505050509344, 0.01262626262626262777, 0.023989898989898991721, 0.041035353535353535914, 0.066603535353535359143, 0.10495580808080809398, 0.16248421717171718237, 0.24877683080808082883, 0.37821575126262629851, 0.5723741319444445308, 0.86361170296717182371, 0.98594774828339049044, 0.95530148510216805757, 0.90933209033033446378, 0.84037799817258396207, 0.7369468599359582095, 0.58180015258101969167, 0.3490800915486118039, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.014052251716609454046, 0.044698514897831886916, 0.090667909669665536221, 0.15962200182741601018, 0.26305314006404173499, 0.41819984741898030833, 0.65091990845138814059, 1, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=None knots=np.array([1000, ]) output=np.array([1, 0.99949949949949945527, 0.99874874874874874919, 0.99762262262262257906, 0.99593343343343343488, 0.9933996496496496631, 0.98959897397397400542, 0.98389796046046040789, 0.97534644019019023364, 0.96251915978478475022, 0.94327823917667663611, 0.91441685826451446495, 0.87112478689627126371, 0.80618667984390635084, 0.70877951926535909255, 0.5626687783975381496, 0.34350266709580673519, 0.014753500143209615295, 0, 0, 0, 0.00050050050050050049616, 0.0012512512512512512404, 0.0023773773773773775736, 0.0040665665665665668566, 0.0066003503503503499136, 0.0104010260260260258, 0.016102039539539540064, 0.02465355980980980799, 0.037480840215215215083, 0.056721760823323322254, 0.08558314173548547954, 0.12887521310372873629, 0.19381332015609359365, 0.29122048073464090745, 0.4373312216024618504, 0.6564973329041932093, 0.98524649985679035868, 0.60726740066762052717, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.39273259933237941732, 1, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([1, 0.94444444444444441977, 0.86111111111111104943, 0.73611111111111104943, 0.54861111111111104943, 0.26736111111111110494, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.055555555555555552472, 0.13888888888888889506, 0.26388888888888889506, 0.45138888888888883955, 0.73263888888888883955, 0.98454861111111113825, 0.92126736111111118266, 0.82634548611111113825, 0.68396267361111118266, 0.47038845486111113825, 0.15002712673611112715, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015451388888888889506, 0.07873263888888888673, 0.17365451388888888951, 0.31603732638888892836, 0.52961154513888886175, 0.84997287326388892836, 0.96694851345486110272, 0.89486721462673612937, 0.78674526638454855831, 0.62456234402126731275, 0.38128796047634549993, 0.016376385158962673133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.033051486545138890338, 0.10513278537326388451, 0.21325473361545138618, 0.37543765597873263173, 0.61871203952365450007, 0.98362361484103733034, 0.60726740066762052717, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.39273259933237941732, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=3 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.97919016410100090386, 0.9687852461515014113, 0.95317786922725200593, 0.92976680384087795339, 0.89465020576131693009, 0.84197530864197533962, 0.76296296296296306494, 0.64444444444444448639, 0.46666666666666667407, 0.2000000000000000111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.020809835898999137771, 0.031214753848498706656, 0.046822130772748063454, 0.070233196159122088242, 0.1053497942386831393, 0.15802469135802471589, 0.23703703703703704608, 0.35555555555555556912, 0.53333333333333332593, 0.80000000000000004441, 0.99674423566950098863, 0.98697694267800384349, 0.9723260031907582368, 0.95034959395988971576, 0.9173849801135870452, 0.86793805934413292835, 0.7937676781899518641, 0.68251210645868021221, 0.51562874886177267886, 0.26530371246641143435, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0032557643304990334897, 0.013023057321996133959, 0.027673996809241787481, 0.049650406040110263428, 0.082615019886412982553, 0.13206194065586704389, 0.20623232181004816366, 0.3174878935413198433, 0.48437125113822732114, 0.73469628753358862117, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=5 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.85634118967452310667, 0.78451178451178460449, 0.67676767676767679571, 0.51515151515151524908, 0.2727272727272727626, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.14365881032547700435, 0.21548821548821550653, 0.3232323232323232598, 0.48484848484848486194, 0.72727272727272729291, 0.9846005774783446185, 0.89220404234841199642, 0.7536092396535130078, 0.54571703561116458037, 0.23387872954764196698, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015399422521655438748, 0.10779595765158807297, 0.24639076034648701996, 0.45428296438883541963, 0.76612127045235811629, 0.96572040707016604255, 0.86288162828066417021, 0.7086234600964114172, 0.47723620782003217666, 0.13015532940546331586, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.034279592929833957449, 0.13711837171933582979, 0.29137653990358863831, 0.52276379217996793436, 0.86984467059453673965, 0.9590229415870602514, 0.8770688247611807542, 0.7541376495223615084, 0.56974088666413258419, 0.29314574237678914237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.040977058412939762477, 0.12293117523881928743, 0.24586235047763857486, 0.43025911333586747132, 0.70685425762321085763, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=12 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.51111111111111118266, 0.2666666666666668295, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.48888888888888881734, 0.7333333333333331705, 0.90243902439024414885, 0.36585365853658557977, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.097560975609755906657, 0.63414634146341442023, 0.77777777777777779011, 0.16666666666666651864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.22222222222222218213, 0.83333333333333348136, 0.625, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.37499999999999994449, 0.96992481203007530066, 0.47368421052631592971, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.030075187969924695869, 0.52631578947368407029, 0.86440677966101697738, 0.30508474576271171763, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.13559322033898302262, 0.69491525423728828237, 0.72815533980582491935, 0.087378640776698143777, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.27184466019417513616, 0.91262135922330189786, 0.57446808510638269762, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42553191489361730238, 0.9375, 0.4218750000000004996, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.062499999999999958367, 0.57812499999999944489, 0.82300884955752262595, 0.23893805309734547637, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.17699115044247745732, 0.76106194690265460689, 0.80946623645948467818, 0.41649034915717175753, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.19053376354051526631, 0.58350965084282824247, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.99966666666666659236, 0.99949999999999994404, 0.99924999999999997158, 0.99887499999999995737, 0.99831249999999993605, 0.9974687500000000151, 0.99620312499999996714, 0.99430468750000000622, 0.99145703124999995381, 0.98718554687499993072, 0.98077832031250000711, 0.97116748046875001066, 0.95675122070312501599, 0.93512683105468752398, 0.90269024658203123046, 0.85403536987304684569, 0.78105305480957032405, 0.67157958221435543056, 0.50736937332153320135, 0.26105405998229980202, 0.0003333333333333333222, 0.00050000000000000001041, 0.00075000000000000001561, 0.001124999999999999915, 0.0016874999999999999809, 0.0025312500000000000798, 0.0037968749999999999029, 0.0056953124999999998543, 0.0085429687499999993477, 0.012814453124999999889, 0.019221679687499999833, 0.02883251953124999975, 0.043248779296874997891, 0.064873168945312503775, 0.097309753417968741784, 0.14596463012695312655, 0.21894694519042967595, 0.32842041778564451393, 0.49263062667846679865, 0.73894594001770019798, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.98999999999999999112, 0.98499999999999998668, 0.97750000000000003553, 0.96625000000000005329, 0.94937499999999996891, 0.92406250000000000888, 0.88609375000000001332, 0.82914062499999996447, 0.74371093750000005773, 0.61556640625000003109, 0.42334960937499999112, 0.13502441406250001443, 0, 0, 0, 0, 0, 0, 0, 0, 0.010000000000000000208, 0.014999999999999999445, 0.022499999999999999167, 0.03375000000000000222, 0.050625000000000003331, 0.075937500000000004996, 0.11390625000000000056, 0.17085937500000000777, 0.25628906249999999778, 0.38443359375000002442, 0.57665039062500000888, 0.86497558593750001332, 0.98974264210668094766, 0.96737258384967661495, 0.93381749646417022692, 0.88348486538591053385, 0.80798591876852099425, 0.69473749884243662933, 0.52486486895331019298, 0.27005592411962048294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.010257357893318963873, 0.032627416150323274024, 0.066182503535829731445, 0.11651513461408942451, 0.192014081231478978, 0.30526250115756325965, 0.47513513104668975151, 0.72994407588037946155, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.99899999999999999911, 0.99850000000000005418, 0.99775000000000002576, 0.99662499999999998312, 0.9949375000000000302, 0.9924062500000000453, 0.98860937500000001243, 0.98291406250000001865, 0.97437109374999997247, 0.96155664062500001421, 0.94233496093750002132, 0.91350244140625003197, 0.87025366210937504796, 0.80538049316406257194, 0.7080707397460938024, 0.56210610961914064809, 0.34315916442871097214, 0.014738746643066406167, 0, 0, 0.0010000000000000000208, 0.0015000000000000000312, 0.0022500000000000002637, 0.0033749999999999999618, 0.0050625000000000001596, 0.0075937499999999998057, 0.011390624999999999709, 0.017085937499999998695, 0.025628906249999999778, 0.038443359374999999667, 0.0576650390624999995, 0.086497558593749995781, 0.12974633789062500755, 0.19461950683593751132, 0.29192926025390625311, 0.43789389038085940742, 0.65684083557128902786, 0.9852612533569335973, 0.76105405998229980202, 0.39158108997344970303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.23894594001770019798, 0.60841891002655035248, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.9000000000000000222, 0.85000000000000008882, 0.7750000000000000222, 0.66250000000000008882, 0.4937500000000000222, 0.24062500000000000555, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10000000000000000555, 0.1500000000000000222, 0.22500000000000000555, 0.3375000000000000222, 0.5062499999999999778, 0.7593750000000000222, 0.98454861111111113825, 0.92126736111111118266, 0.82634548611111113825, 0.68396267361111118266, 0.47038845486111113825, 0.15002712673611112715, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015451388888888889506, 0.07873263888888888673, 0.17365451388888888951, 0.31603732638888892836, 0.52961154513888886175, 0.84997287326388892836, 0.96694851345486110272, 0.89486721462673612937, 0.78674526638454855831, 0.62456234402126731275, 0.38128796047634549993, 0.016376385158962673133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.033051486545138890338, 0.10513278537326388451, 0.21325473361545138618, 0.37543765597873263173, 0.61871203952365450007, 0.98362361484103733034, 0.76105405998229980202, 0.39158108997344970303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.23894594001770019798, 0.60841891002655035248, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=3 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.040686586141131603211, 0.10171646535282900803, 0.19326128417037510832, 0.33057851239669427956, 0.53655435473617296704, 0.84551811824539113704, 0.97622585438335818253, 0.92273402674591387118, 0.84249628528974740416, 0.72213967310549775913, 0.54160475482912329159, 0.27080237741456153477, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023774145616641918083, 0.077265973254086212085, 0.1575037147102526236, 0.27786032689450229638, 0.45839524517087676392, 0.72919762258543852074, 0.98941973879980160689, 0.9418085633989089489, 0.87039180029756990642, 0.76326665564556128718, 0.60257893866754841383, 0.3615473632005290483, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.010580261200198394847, 0.058191436601091106606, 0.12960819970243014909, 0.23673334435443876833, 0.39742106133245164168, 0.63845263679947106272, 1, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=5 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.1342281879194630323, 0.33557046979865756686, 0.63758389261744941034, 0.98069963811821481148, 0.83594692400482528694, 0.61881785283474100012, 0.2931242460796145699, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.019300361881785188523, 0.16405307599517471306, 0.38118214716525899988, 0.70687575392038548561, 0.9581151832460734763, 0.80104712041884851281, 0.56544502617801106759, 0.21204188481675489975, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.041884816753926495947, 0.19895287958115145943, 0.4345549738219888769, 0.78795811518324510025, 0.93133047210300479168, 0.75965665236051571618, 0.50214592274678215844, 0.11587982832618169693, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.068669527896995374849, 0.2403433476394844226, 0.49785407725321800809, 0.88412017167381840022, 0.89905362776025266047, 0.70977917981072580211, 0.42586750788643545906, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10094637223974732565, 0.29022082018927419789, 0.57413249211356454094, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=12 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.53333333333333343695, 0.81818181818181801024, 0.16363636363636296922, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.18181818181818207303, 0.83636363636363708629, 0.57446808510638280865, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42553191489361724686, 0.9000000000000000222, 0.29999999999999982236, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10000000000000003331, 0.70000000000000017764, 0.67346938775510178932, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.32653061224489815517, 0.96923076923076900702, 0.41538461538461529665, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.030769230769230916656, 0.58461538461538464784, 0.7714285714285711304, 0.085714285714284479956, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.22857142857142889736, 0.91428571428571558943, 0.5217391304347820391, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.47826086956521790539, 0.86086956521739110837, 0.23478260869565212299, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.13913043478260883612, 0.76521739130434784926, 0.62499999999999966693, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.37500000000000027756, 0.93599999999999949907, 0.35999999999999854339, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.064000000000000500933, 0.64000000000000145661, 0.7199999999999991962, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2800000000000008038, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=None knots=np.array([]) output=np.array([0, 0.00022564828322499611425, 0.00056412070806249024497, 0.001071829345318731563, 0.0018333923012030933775, 0.0029757367350296362075, 0.0046892533857694502358, 0.0072595283618791719288, 0.011114940826043754468, 0.01689805952229062741, 0.025572737566660935088, 0.038584754633216401809, 0.058102780233049600156, 0.087379818632799394207, 0.13129537623242407141, 0.19716871263186111496, 0.29597871723101670804, 0.44419372412975000053, 0.66651623447785002252, 1, ]).reshape((20, 1, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=None knots=np.array([100, ]) output=np.array([0, 0.0050505050505050509344, 0.01262626262626262777, 0.023989898989898991721, 0.041035353535353535914, 0.066603535353535359143, 0.10495580808080809398, 0.16248421717171718237, 0.24877683080808082883, 0.37821575126262629851, 0.5723741319444445308, 0.86361170296717182371, 0.98594774828339049044, 0.95530148510216805757, 0.90933209033033446378, 0.84037799817258396207, 0.7369468599359582095, 0.58180015258101969167, 0.3490800915486118039, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.014052251716609454046, 0.044698514897831886916, 0.090667909669665536221, 0.15962200182741601018, 0.26305314006404173499, 0.41819984741898030833, 0.65091990845138814059, 1, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=None knots=np.array([1000, ]) output=np.array([0, 0.00050050050050050049616, 0.0012512512512512512404, 0.0023773773773773775736, 0.0040665665665665668566, 0.0066003503503503499136, 0.0104010260260260258, 0.016102039539539540064, 0.02465355980980980799, 0.037480840215215215083, 0.056721760823323322254, 0.08558314173548547954, 0.12887521310372873629, 0.19381332015609359365, 0.29122048073464090745, 0.4373312216024618504, 0.6564973329041932093, 0.98524649985679035868, 0.60726740066762052717, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.39273259933237941732, 1, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([0, 0.055555555555555552472, 0.13888888888888889506, 0.26388888888888889506, 0.45138888888888883955, 0.73263888888888883955, 0.98454861111111113825, 0.92126736111111118266, 0.82634548611111113825, 0.68396267361111118266, 0.47038845486111113825, 0.15002712673611112715, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015451388888888889506, 0.07873263888888888673, 0.17365451388888888951, 0.31603732638888892836, 0.52961154513888886175, 0.84997287326388892836, 0.96694851345486110272, 0.89486721462673612937, 0.78674526638454855831, 0.62456234402126731275, 0.38128796047634549993, 0.016376385158962673133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.033051486545138890338, 0.10513278537326388451, 0.21325473361545138618, 0.37543765597873263173, 0.61871203952365450007, 0.98362361484103733034, 0.60726740066762052717, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.39273259933237941732, 1, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=3 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.075249853027630819735, 0.1128747795414462296, 0.16931216931216935828, 0.25396825396825400967, 0.38095238095238104226, 0.57142857142857150787, 0.85714285714285731732, 0.97622585438335818253, 0.92273402674591387118, 0.84249628528974740416, 0.72213967310549775913, 0.54160475482912329159, 0.27080237741456153477, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.023774145616641918083, 0.077265973254086212085, 0.1575037147102526236, 0.27786032689450229638, 0.45839524517087676392, 0.72919762258543852074, 0.99235077739698696053, 0.95792927568342844946, 0.90629702311309068286, 0.82884864425758408846, 0.71267607597432414135, 0.53841722354943410966, 0.27702894491209917316, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0076492226030130125794, 0.042070724316571522783, 0.093702976886909289389, 0.1711513557424159393, 0.28732392402567591416, 0.46158277645056589034, 0.72297105508790082684, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=5 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.21164021164021157295, 0.31746031746031733167, 0.47619047619047605302, 0.71428571428571407953, 0.98069963811821481148, 0.83594692400482528694, 0.61881785283474100012, 0.2931242460796145699, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.019300361881785188523, 0.16405307599517471306, 0.38118214716525899988, 0.70687575392038548561, 0.9581151832460734763, 0.80104712041884851281, 0.56544502617801106759, 0.21204188481675489975, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.041884816753926495947, 0.19895287958115145943, 0.4345549738219888769, 0.78795811518324510025, 0.93133047210300479168, 0.75965665236051571618, 0.50214592274678215844, 0.11587982832618169693, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.068669527896995374849, 0.2403433476394844226, 0.49785407725321800809, 0.88412017167381840022, 0.93044657380826634174, 0.80003389969876526067, 0.60441488853451352803, 0.31098637178813609561, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.069553426191733672135, 0.19996610030123476709, 0.39558511146548641646, 0.68901362821186384888, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=12 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.51612903225806461283, 0.77419354838709697475, 0.81818181818181801024, 0.16363636363636296922, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.18181818181818207303, 0.83636363636363708629, 0.57446808510638280865, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42553191489361724686, 0.9000000000000000222, 0.29999999999999982236, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10000000000000003331, 0.70000000000000017764, 0.67346938775510178932, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.32653061224489815517, 0.96923076923076900702, 0.41538461538461529665, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.030769230769230916656, 0.58461538461538464784, 0.7714285714285711304, 0.085714285714284479956, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.22857142857142889736, 0.91428571428571558943, 0.5217391304347820391, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.47826086956521790539, 0.86086956521739110837, 0.23478260869565212299, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.13913043478260883612, 0.76521739130434784926, 0.62499999999999966693, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.37500000000000027756, 0.93599999999999949907, 0.35999999999999854339, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.064000000000000500933, 0.64000000000000145661, 0.84118724544512846197, 0.43281159087546033915, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.15881275455487159354, 0.56718840912453971637, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.0003333333333333333222, 0.00050000000000000001041, 0.00075000000000000001561, 0.001124999999999999915, 0.0016874999999999999809, 0.0025312500000000000798, 0.0037968749999999999029, 0.0056953124999999998543, 0.0085429687499999993477, 0.012814453124999999889, 0.019221679687499999833, 0.02883251953124999975, 0.043248779296874997891, 0.064873168945312503775, 0.097309753417968741784, 0.14596463012695312655, 0.21894694519042967595, 0.32842041778564451393, 0.49263062667846679865, 0.73894594001770019798, ]).reshape((20, 1, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.010000000000000000208, 0.014999999999999999445, 0.022499999999999999167, 0.03375000000000000222, 0.050625000000000003331, 0.075937500000000004996, 0.11390625000000000056, 0.17085937500000000777, 0.25628906249999999778, 0.38443359375000002442, 0.57665039062500000888, 0.86497558593750001332, 0.98974264210668094766, 0.96737258384967661495, 0.93381749646417022692, 0.88348486538591053385, 0.80798591876852099425, 0.69473749884243662933, 0.52486486895331019298, 0.27005592411962048294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.010257357893318963873, 0.032627416150323274024, 0.066182503535829731445, 0.11651513461408942451, 0.192014081231478978, 0.30526250115756325965, 0.47513513104668975151, 0.72994407588037946155, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.0010000000000000000208, 0.0015000000000000000312, 0.0022500000000000002637, 0.0033749999999999999618, 0.0050625000000000001596, 0.0075937499999999998057, 0.011390624999999999709, 0.017085937499999998695, 0.025628906249999999778, 0.038443359374999999667, 0.0576650390624999995, 0.086497558593749995781, 0.12974633789062500755, 0.19461950683593751132, 0.29192926025390625311, 0.43789389038085940742, 0.65684083557128902786, 0.9852612533569335973, 0.76105405998229980202, 0.39158108997344970303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.23894594001770019798, 0.60841891002655035248, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=1 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.10000000000000000555, 0.1500000000000000222, 0.22500000000000000555, 0.3375000000000000222, 0.5062499999999999778, 0.7593750000000000222, 0.98454861111111113825, 0.92126736111111118266, 0.82634548611111113825, 0.68396267361111118266, 0.47038845486111113825, 0.15002712673611112715, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015451388888888889506, 0.07873263888888888673, 0.17365451388888888951, 0.31603732638888892836, 0.52961154513888886175, 0.84997287326388892836, 0.96694851345486110272, 0.89486721462673612937, 0.78674526638454855831, 0.62456234402126731275, 0.38128796047634549993, 0.016376385158962673133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.033051486545138890338, 0.10513278537326388451, 0.21325473361545138618, 0.37543765597873263173, 0.61871203952365450007, 0.98362361484103733034, 0.76105405998229980202, 0.39158108997344970303, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.23894594001770019798, 0.60841891002655035248, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=5 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.96845940607276881362, 0.92240303675860391142, 0.85609306993676004272, 0.76270864919645953162, 0.63576547531534310931, 0.47305239057526615731, 0.28507250058887945166, 0.10824783418564158655, 0.0085209665393945269174, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.031533426700978403612, 0.077552412011353655252, 0.14374742102560256196, 0.23683044051100490823, 0.36304322063581223601, 0.52407464745263665495, 0.70834870097260460575, 0.8774087197542927985, 0.96206652916722557034, 0.94530079752693674244, 0.90793500327984399956, 0.85375309463923021447, 0.77659028109030259213, 0.66978920735324132263, 0.52868210857343611586, 0.35651607003382906891, 0.175425242216674937, 0.037891852318801787225, 0, 0, 7.1666852050360488124e-06, 4.4542776169713856592e-05, 0.00015945105252372870864, 0.00046062008693220732759, 0.0011900631879368495612, 0.002868106285078954841, 0.0065607821717948554968, 0.014278782285967288324, 0.029185282443915855355, 0.053915675965905400513, 0.089616579303442189808, 0.13947317683937743293, 0.20621746127156848072, 0.28916251068209719577, 0.37804948423631462573, 0.44191546230800060613, 0.4167872147195200716, 0.22637517283088642861, 0, 0, 5.4104786148500394417e-10, 8.4538728357031864466e-09, 5.7985113780088147304e-08, 2.9020560343812349123e-07, 1.2408609078339073411e-06, 4.8556870182056249453e-06, 1.8016266721020254751e-05, 6.4663774098311141273e-05, 0.00022722184946421446292, 0.0007834394838982231728, 0.0024428479280998137424, 0.0067202853620942429314, 0.016883622849287124174, 0.039626432891546964354, 0.087460636122680179838, 0.179450426719582945, 0.32709043481102628714, 0.44917563313558994675, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8.7023259593866311309e-08, 5.5694886140074439238e-06, 5.3443159298083143627e-05, 0.00030863478884180254799, 0.001421849073114332283, 0.0058077710675691540318, 0.022118040938587192612, 0.080697108252778662618, 0.28655734171472191374, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=12 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.25770097670924113631, 0.00075131480090157780165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.64290814257309680801, 0.55191727963673464785, 0.16384783952351522629, 0.0025601224925549254108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.097791408043809202599, 0.4223396450334132024, 0.68523715148898289851, 0.53751029745031897455, 0.17971670556949506659, 0.0066561742803516550301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0015994726738528394164, 0.024991760528950617698, 0.14994115610379393777, 0.44102623014868380658, 0.69378612010084461659, 0.57911832988268308053, 0.21512754628498662046, 0.013768162962239142988, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00097385288370810028057, 0.018903349908442362848, 0.12592791140580222864, 0.39992732893636862013, 0.67912381051853998315, 0.61416869876328705757, 0.2513505357806551932, 0.023605422218318485722, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00056926292385829722204, 0.014298166900596829057, 0.10543398177170558438, 0.36126140155236396989, 0.66048745076429016265, 0.64290852334776726895, 0.28801011480255123143, 0.036001264350318869234, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00031466142476788003723, 0.010801736722109872915, 0.088001307706519454888, 0.32534582615335716493, 0.63869155767138829916, 0.66560703209160065885, 0.32480624491998255632, 0.050709395542809954094, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00016070574853527342459, 0.0081402282805572035579, 0.073225347399450108066, 0.29228033065100789134, 0.61441387376474365656, 0.68269380491744569017, 0.36150149884261828515, 0.067452174711159121334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7.2980126610408391891e-05, 0.006111372907072773128, 0.06075244688059205922, 0.26203239546958334572, 0.58821589703491272694, 0.69467005250274826977, 0.39790980589205254825, 0.08594851807268334698, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.7434434681658694525e-05, 0.004564404070160951038, 0.05026345997459336773, 0.22896647107718723357, 0.49280704180866574671, 0.39171522957479332216, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.9144147875867501007e-05, 0.0089113017089054759323, 0.10925932166656721067, 0.44968558969337457665, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.383063271446895644e-05, 0.072650662659148546041, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=None knots=np.array([]) output=np.array([1, 0.9993232078902789528, 0.99830859239281100059, 0.99678795718714330309, 0.99450990091574942298, 0.99109932847208381812, 0.98599810402219045802, 0.97837913458786918142, 0.96702443008955374371, 0.95015762953345339614, 0.92522695834704149487, 0.88865464163271656872, 0.83562330741598667139, 0.7600990769655836532, 0.65556596659430688145, 0.51745533329454529436, 0.34894530919915062173, 0.17170001728344266856, 0.037087203733223626789, 0, 0, 0.00067663938125675493745, 0.0016904532697119580269, 0.0032085988210942785123, 0.0054800274275235618532, 0.0088741592009194299184, 0.013936134910759763808, 0.021463528321228261819, 0.032607690503775640933, 0.048995387512752772152, 0.072844594278869884141, 0.10699389717286836299, 0.15464119556617272888, 0.21832955541935578081, 0.29724527028257624606, 0.38124822804791375086, 0.44010197217760271826, 0.41166179703994132399, 0.22237265440082615298, 0, 0, 1.5271697506625337419e-07, 9.5415795571804482003e-07, 3.4427604254568415078e-06, 1.0065494095385778498e-05, 2.6485976820866183047e-05, 6.5657954600734260006e-05, 0.00015695450829847096968, 0.00036650624565625982386, 0.00084215780726247437687, 0.0019117237011529813161, 0.0042940168564642817658, 0.0095393459205458924072, 0.020904202366139650049, 0.044925430954654600735, 0.093631406124953353576, 0.18502397635429304601, 0.32899518168053926148, 0.4444443765651269751, 0, 0, 1.1489366970270387462e-11, 1.7952135891047479178e-10, 1.2313370007669466487e-09, 6.1626316488486427104e-09, 2.6350175916113057325e-08, 1.0311244903883365944e-07, 3.825826041044410135e-07, 1.3731610143675871737e-06, 4.8251465313854754769e-06, 1.6723672935651049261e-05, 5.7444337950753896285e-05, 0.0001961510972947611009, 0.00066716524892103249485, 0.0022633321684622557078, 0.0076650325325876428328, 0.025928742268953787475, 0.087643003996076676576, 0.29609576530082332146, 1, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=None knots=np.array([100, ]) output=np.array([1, 0.98492487882601165161, 0.96259746673448087773, 0.92974304223814019377, 0.88187654067159670923, 0.81320203136184721071, 0.71702357673990413378, 0.58746094552406258327, 0.42394246616286163087, 0.24039152271518871018, 0.078197326716827678106, 0.0025370634003338671768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.015071708273510188089, 0.037381259008501491192, 0.070180462942842899987, 0.11790098280764790828, 0.18621717511273339074, 0.28155389351842002865, 0.40920060085307918829, 0.56848075164647782209, 0.74296035718046105067, 0.88664340493727211712, 0.92755941161894106539, 0.87470363121335126255, 0.79564729322794469635, 0.68622539174158070363, 0.5416556180357140482, 0.36526473871966497198, 0.17973006169687555378, 0.038821693326525116841, 0, 0, 3.4126433211387775282e-06, 2.1270238934697356366e-05, 7.6467258985853174243e-05, 0.00022233858750032365649, 0.00058020375050652503793, 0.0014202218581983020763, 0.003329890582419185209, 0.0075460478259266952628, 0.0165401227086783148, 0.03478495575013232366, 0.06861779430820802439, 0.12096540781627201921, 0.191329633412285538, 0.27905349779077742722, 0.37374631578125083742, 0.44360195032526716918, 0.42250875072786614473, 0.23095694223535656597, 0, 0, 2.5715731172923263021e-10, 4.0180829957692598212e-09, 2.7560031267981358852e-08, 1.3793325533914163604e-07, 5.8977491272702679876e-07, 2.3078834777630869564e-06, 8.5630404390810414166e-06, 3.0734364733920997454e-05, 0.00010799739567214114531, 0.00037431259576781883034, 0.0012857306725170922646, 0.0043281861315545133023, 0.012933767638577380737, 0.033975759516814023342, 0.080531027913042049771, 0.17293083479161802662, 0.32462175106248042367, 0.45442872954598834134, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.7748388221855075202e-06, 8.9305721192384863918e-05, 0.00074535095082784331034, 0.0040670382699930507364, 0.018202476163449766294, 0.07313943651277800273, 0.27579263489213001748, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=None knots=np.array([1000, ]) output=np.array([1, 0.9984992498753757495, 0.99625094117633150592, 0.99288481020069696559, 0.98784984394255781481, 0.98032935528140474624, 0.96912034075214636974, 0.95246753354617186282, 0.92784773021983435459, 0.89171926591812056273, 0.83930429776320658597, 0.76459714573223325207, 0.66106035705829924165, 0.52397052211349615103, 0.35606843795797349372, 0.17813877131971986301, 0.040531281972235988498, 3.2113319168708665608e-06, 0, 0, 0, 0.0015004113953972979083, 0.0037469425348356701772, 0.0071075541598605747329, 0.012127833209738946019, 0.019611910086607075437, 0.030734076107229148234, 0.04718451762439003494, 0.071340002591639736784, 0.10641511473995685089, 0.15646348082190494888, 0.22590626848315542574, 0.31787571113275914225, 0.42998547021895328069, 0.54537912943555955092, 0.61788880868397033641, 0.56161589860385985329, 0.31265651834613988891, 0.067535071081621433908, 0, 0, 3.3870374289331948997e-07, 2.1158906445184196164e-06, 7.6329082682329570748e-06, 2.2309178641915861703e-05, 5.8676185825556269023e-05, 0.00014535443145101360611, 0.00034710023984489360048, 0.00080922144066932952776, 0.0018549168973065219106, 0.0041951273738663188637, 0.0093691710332807192491, 0.020628857715017115404, 0.044564197866965707395, 0.093532235391030102423, 0.18697094954821646962, 0.34034142027880176506, 0.49294319032437394767, 0.3494912754584255099, 0, 0, 2.5484057919113140944e-11, 3.9818840498614283815e-10, 2.7311742697999542126e-09, 1.3669061339914938246e-08, 5.844616252249814789e-08, 2.2870917347201761224e-07, 8.4858959306208503663e-07, 3.0457478565146930827e-06, 1.0702444616158128632e-05, 3.7094041022036097191e-05, 0.00012741475133052262828, 0.00043507409392457165433, 0.0014798098005849495782, 0.0050201972154368997708, 0.017001470448093195659, 0.057511399145102344577, 0.1943970799975693331, 0.52239901147403788872, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.060574641985915035625, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([1, 0.84242112482853226396, 0.63852451989026048906, 0.39886884859396426473, 0.16511776352451987271, 0.019111497248478225702, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1567541293972270211, 0.35648024152864221659, 0.58396161300859317222, 0.78843465486178421209, 0.87349958752470602263, 0.78872593441389438063, 0.64620704007646878608, 0.46633671277914778841, 0.26443067498670752569, 0.086017059388510438978, 0.0027907697403672538511, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00082460534200152601353, 0.0049930443273597038822, 0.017154488011057174995, 0.046372256997114502663, 0.10706684228609344989, 0.21001709629214043717, 0.349560411044427366, 0.52164005645501687614, 0.7052885939808604121, 0.84461817796425076033, 0.85332055763972392004, 0.74044769630139117833, 0.58689461845094414993, 0.39882902037092649028, 0.19953161831184262898, 0.045398720470901786361, 3.5969836861605402089e-06, 0, 0, 0, 1.4043223919767127475e-07, 2.1942537374636142735e-06, 1.5050386385262928333e-05, 7.5324616581368114944e-05, 0.0003220729407222703883, 0.001256955617244540557, 0.0042307394405898370374, 0.012003815755055342179, 0.030163701749655900952, 0.068814016157702301291, 0.14161203135855129909, 0.25097331049689530769, 0.38604331096967514636, 0.52843933204950088722, 0.62702364005086641541, 0.58518234575989880319, 0.32861311390225123041, 0.070981847410833104339, 0, 0, 0, 0, 0, 0, 0, 1.3676720629513371727e-08, 1.8094385139768081762e-06, 1.9415010780026244647e-05, 0.00011702928277625300924, 0.00055074648953661010323, 0.0022766412613576141911, 0.0085724666641985424603, 0.026852019770306118779, 0.070978550821878275134, 0.16387889672004127273, 0.32660594492319316995, 0.49935626095003893266, 0.36224174733681202554, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.5265375148376491226e-06, 0.00021005080907461989208, 0.0017530967576943525671, 0.0095658449172495805396, 0.042812988846006136412, 0.17202702816402348773, 0.50620176326643973042, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.060574641985915035625, 1, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=5 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.93886062842918460714, 0.90924840659363448392, 0.86600789475617090396, 0.80375207763469636024, 0.71607712169296244831, 0.5968951736881422665, 0.4441302646954225497, 0.26764334705075454313, 0.1016296296296296392, 0.0080000000000000019013, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.061118708396877156896, 0.090705266259710190524, 0.13388842602098646739, 0.1960165235067286571, 0.28340857382601336578, 0.40196904868840105385, 0.55338648582541094534, 0.72701318595705810566, 0.88717054427173769326, 0.96957793066711428498, 0.95879430886579075644, 0.93088336665495841071, 0.89004087164963097134, 0.83104478735029430059, 0.74753085865141255528, 0.63305354842556382788, 0.48423310979820943789, 0.30782602005074449769, 0.13273505055352541326, 0.018080241660177655966, 2.0660861734307157164e-05, 4.6319342966912629609e-05, 0.00010365288539398855667, 0.00023130996968619285063, 0.00051400448102423420497, 0.0011347651234567903995, 0.0024798322916666664696, 0.005331933984375000421, 0.011160902197265624297, 0.022290701165771482917, 0.040764443570473873901, 0.067755947834115426431, 0.10621083986053703185, 0.15942942065206799906, 0.22958192424619683347, 0.31428339352859285816, 0.39934193575081916583, 0.44659448090264364239, 0.38447678365411958046, 0.15324078379324712618, 2.3122039887776819585e-09, 7.8036884621246768168e-09, 2.6337448559670789633e-08, 8.888888888888889516e-08, 3.0000000000000003936e-07, 1.0125000000000002586e-06, 3.4171875000000000522e-06, 1.1533007812500000494e-05, 3.892390136718749976e-05, 0.0001313681671142578227, 0.00044121305262908775248, 0.0013584768001177710377, 0.0037270943566249025922, 0.0094033956626723494421, 0.022323349639243747489, 0.050359850764330853223, 0.10765352865786298464, 0.21357717575211032646, 0.36914715819588089785, 0.43210561538410807714, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.451110638240695161e-08, 2.208710808474044903e-06, 2.1194133207095667231e-05, 0.00012239633496539237658, 0.00056386746314699115034, 0.0023032072815124867565, 0.0087714257931084897019, 0.032002323294501686113, 0.11364100759647420558, 0.3965733591624671095, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=12 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.19405161102201490264, 0.050007289692374980172, 0.00014579384749963551142, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.62621171786550611227, 0.59601302425037649968, 0.37359381834086596852, 0.11076923076923075873, 0.0017307692307692306051, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.17410393357590786545, 0.33496919687132054033, 0.56209998680912809377, 0.6845598495992195609, 0.49785783114523274318, 0.16619304988878053075, 0.0061552981440288964676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0056327375365711959687, 0.019010489185927782058, 0.064160401002506278756, 0.20369706674784149314, 0.48150804971555583034, 0.70730977578155906915, 0.57961920601900573935, 0.21512754628498662046, 0.013768162962239142988, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00097385288370810028057, 0.018903349908442362848, 0.12592791140580222864, 0.39992732893636862013, 0.67912381051853998315, 0.61416869876328705757, 0.2513505357806551932, 0.023605422218318485722, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00056926292385829722204, 0.014298166900596829057, 0.10543398177170558438, 0.36126140155236396989, 0.66048745076429016265, 0.64290852334776726895, 0.28801011480255123143, 0.036001264350318869234, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00031466142476788003723, 0.010801736722109872915, 0.088001307706519454888, 0.32534582615335716493, 0.63869155767138829916, 0.66560703209160065885, 0.32480624491998255632, 0.050709395542809954094, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00016070574853527342459, 0.0081402282805572035579, 0.073225347399450108066, 0.29228033065100789134, 0.61441387376474365656, 0.68269380491744569017, 0.36150149884261828515, 0.067452174711159121334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7.2980126610408391891e-05, 0.006111372907072773128, 0.06076004701308617556, 0.2632968675132925096, 0.60214193117458769677, 0.75884556552586945877, 0.54356876952344934661, 0.23438768460872402843, 0.031926653602217504313, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.9834302187538887846e-05, 0.0032999320264517715412, 0.03634723886267338111, 0.16935876914548009253, 0.40315927025443520915, 0.49317292913076199445, 0.23502476599459576345, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.3311201208237671141e-06, 0.004343490617491435786, 0.053266330072947364049, 0.25527517275326377932, 0.49679177449105649256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.6301491679855540114e-06, 0.017164213507250107582, 0.23625680591213024662, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.99900033329629622791, 0.99850074987500003765, 0.9977516870781251157, 0.9966287954511718894, 0.99494603816333004875, 0.99242545546139537826, 0.98865256904256049175, 0.98301118751693283837, 0.97458941720955349908, 0.96204716698765402327, 0.94343627795644391387, 0.91597241507140558792, 0.87578413786327058421, 0.81773305675994945041, 0.73555685971681306068, 0.62291325464817093316, 0.47647663168935677769, 0.30289524241999998821, 0.13060889169932207721, 0.017790631148623885227, 0.00099933344444444439057, 0.0014985003750000000684, 0.0022466262656250001253, 0.003367410521484375148, 0.0050454284787597655781, 0.0075553552955017096171, 0.011304291651615143086, 0.016891872202619076515, 0.02519288281652980882, 0.037464410913716578166, 0.055469506915694993809, 0.081581580145842852447, 0.11876628136094780075, 0.1701874001952980997, 0.23787846467254591953, 0.31938947503806314199, 0.40070172821401484065, 0.44437167848162473227, 0.38044436785608970464, 0.1510757732538548781, 3.3322222222222224585e-07, 7.4962500000000004377e-07, 1.6862343750000001263e-06, 3.7926035156249994121e-06, 8.5285524902343742213e-06, 1.9173024810791016186e-05, 4.3084569087982178912e-05, 9.675554396295546727e-05, 0.00021707648827975990682, 0.00048631783460495621712, 0.0010871132367784521085, 0.0024220359003474063384, 0.0053686857976677918383, 0.011806522493618208658, 0.025643231250562635581, 0.054587395598501557703, 0.11232581293261123534, 0.217309662419816918, 0.36939270915445387988, 0.42763873999331791786, 3.7037037037037035514e-11, 1.2500000000000000779e-10, 4.2187500000000000366e-10, 1.4238281249999998508e-09, 4.8054199218749994447e-09, 1.6218292236328128606e-08, 5.4736736297607425773e-08, 1.8473648500442501235e-07, 6.234856368899344564e-07, 2.1042640245035291742e-06, 7.1018910826994105393e-06, 2.3968882404110509299e-05, 8.0894978113872978626e-05, 0.00027302055113432129778, 0.00092144436007833422416, 0.003109874715264377993, 0.010495827164017276431, 0.035423416678558306003, 0.11955403129013431052, 0.40349485560420322861, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.97029899999999991156, 0.95567162499999991354, 0.93400735937500012351, 0.9021287441406251606, 0.8556839255371094799, 0.78904911782836917311, 0.69572725948715208322, 0.57001276798105227073, 0.41135095097535856468, 0.23325165409902481883, 0.075874787916011168787, 0.0024617100802805510704, 0, 0, 0, 0, 0, 0, 0, 0, 0.029691034444444444618, 0.044305991250000002768, 0.06594240796875000532, 0.097758673769531262421, 0.1440642544409179715, 0.21038931479278566439, 0.30302618229869843214, 0.4272397443474625911, 0.58266048231123623857, 0.75392639264340943761, 0.89747740349010274308, 0.94501107412874996161, 0.90598359089303848179, 0.84593074837236126307, 0.76092088936222046502, 0.64439302204983184286, 0.49290686036830000383, 0.31333990595172406257, 0.13511264658550559137, 0.018404101188231606484, 9.9644444444444446617e-06, 2.2379999999999999178e-05, 5.0219999999999996902e-05, 0.00011253937500000000703, 0.00025167585937500001371, 0.0005610808300781249848, 0.0012449161120605468141, 0.002741945576934814565, 0.0059698621442985533997, 0.012758825336830616898, 0.02643475186140507513, 0.051808149318846051512, 0.091620856894323976505, 0.14688590543057178373, 0.21984251899359169569, 0.30818245617558825966, 0.39752224089834975462, 0.44889001546541440479, 0.38890408238266155339, 0.15565065849749706861, 1.1111111111111110654e-09, 3.7499999999999996649e-09, 1.2656249999999997938e-08, 4.2714843750000004622e-08, 1.4416259765625001146e-07, 4.8654876708984372583e-07, 1.6421020889282224026e-06, 5.5420945501327518529e-06, 1.8704569106698035598e-05, 6.3127920735105882001e-05, 0.00021305673248098232295, 0.00071906647212331535352, 0.002394473001231371169, 0.0071486127371715351211, 0.018946704087009914874, 0.045842738337222695144, 0.10249145334758576198, 0.20932413300445151805, 0.3687199031810673433, 0.43701763935524273741, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0792114061605856464e-06, 3.473345989527011488e-05, 0.00028988755717800383316, 0.0015817834373569547059, 0.0070794453857644188549, 0.028445945578409920912, 0.10726336785076537317, 0.38892760095902845219, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.99700299900000011188, 0.99550674662500016066, 0.99326517610937503644, 0.98990913343164044225, 0.9848892569724120305, 0.97739130722329725653, 0.96621588612179176714, 0.94961298739566668559, 0.92506696964451773368, 0.88904678238644452293, 0.83678890194350608045, 0.76230564732187677812, 0.65907915850713516548, 0.52240018193475146191, 0.35500130049334505111, 0.17760488924393591503, 0.040409809679633923452, 3.2017075519046728555e-06, 0, 0, 0.0029960014444444446613, 0.0044910048749999993689, 0.0067297664531250009357, 0.010079493029296874088, 0.015085171786376951053, 0.02255122235714721729, 0.033655024381153107738, 0.050097300181899548366, 0.074283671347553537068, 0.10950057690181402847, 0.15997106401940666687, 0.23050015162429349225, 0.3250574690342031281, 0.44299931223779698275, 0.57083333883520204211, 0.6679625481063525827, 0.65410023301458430911, 0.45433806106867219432, 0.19591333754898310193, 0.026685946722935827841, 9.9944444444444452216e-07, 2.2481249999999997573e-06, 5.0561718750000005462e-06, 1.1369267578125000521e-05, 2.5556824951171875081e-05, 5.7421764678955089446e-05, 0.00012892528684616086799, 0.00028915821297883983077, 0.00064748855101794003473, 0.0014463279196678473747, 0.0032187283638391594488, 0.0071222944066175568334, 0.015620687524320137074, 0.033781444174048706752, 0.071401027591217913759, 0.14510293850391842163, 0.2740024758137300509, 0.43938848718810108451, 0.4727098830096430615, 0.21327068651931446741, 1.1111111111111111947e-10, 3.750000000000000492e-10, 1.2656250000000001661e-09, 4.2714843749999999659e-09, 1.4416259765625000816e-08, 4.8654876708984385818e-08, 1.6421020889282225085e-07, 5.5420945501327514294e-07, 1.8704569106698034751e-06, 6.3127920735105875225e-06, 2.1305673248098235006e-05, 7.190664721233152451e-05, 0.00024268493434161889522, 0.00081906165340296394756, 0.0027643330802350029977, 0.0093296241457931361474, 0.031487481492051827558, 0.10627025003567493189, 0.31773412222685926132, 0.53482276673031980962, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.013642657214514538819, 0.22522060002743005125, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.72899999999999987033, 0.61412500000000003197, 0.46548437500000000577, 0.29077539062500001865, 0.12037084960937501077, 0.013932281494140625472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.26810999999999995946, 0.37949624999999997943, 0.52058109375000005681, 0.6792815039062500837, 0.81701452880859382066, 0.86124092926025386241, 0.77303028831905795659, 0.63334751997894722653, 0.45705661219484272628, 0.2591685045544720456, 0.084305319906679110353, 0.002735233422533945441, 0, 0, 0, 0, 0, 0, 0, 0, 0.0028890000000000000749, 0.0063753749999999990983, 0.013923140625000001921, 0.029904662109375004103, 0.062484875244140626604, 0.12438889535522459906, 0.22549483803134068305, 0.36206390866050391919, 0.53039385361573909705, 0.70984283151236515774, 0.8454844390975239099, 0.85252274549222006872, 0.73970724860508996201, 0.58630772383249318835, 0.39843019135055551816, 0.19933208669353075226, 0.045353321750430879156, 3.5933867024743799741e-06, 0, 0, 9.9999999999999995475e-07, 3.3749999999999998737e-06, 1.1390625000000002433e-05, 3.8443359375000005034e-05, 0.00012974633789062501275, 0.00043789389038085933673, 0.0014748635551852823811, 0.00458723586310897085, 0.012535204497088376849, 0.030902287796947387061, 0.069803749964903766267, 0.14306169398214990673, 0.2539595816136605011, 0.39367833655080086697, 0.54755337589389629915, 0.67082414394774525501, 0.67380308481828354861, 0.47157640817504958841, 0.20334679251999171479, 0.027698480049178006435, 0, 0, 0, 0, 0, 0, 1.009441616706038712e-08, 1.3354974400350171813e-06, 1.4329692329799520979e-05, 8.6376136215493001325e-05, 0.00040649103089326774291, 0.0016803271030961133663, 0.0063296923222741757059, 0.019902020690376800299, 0.053082350626863387955, 0.12474691161612924684, 0.25803204718826688868, 0.43676084046337160238, 0.48134381599312991984, 0.21958058358824134038, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.4774589754063323979e-06, 0.00011191892632920368619, 0.00093408212868467904808, 0.0050968577425946322637, 0.022811546243018676616, 0.091659157974876420694, 0.30166673427236384564, 0.52750033633515069909, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.013642657214514538819, 0.22522060002743005125, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=5 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.11681123728381428983, 0.2730259078882545376, 0.46749900909245512004, 0.67935641673089897097, 0.85057627482867270707, 0.88934886597560480759, 0.80216081429413377268, 0.67739211125208298458, 0.51560190205519351725, 0.32469390916595280983, 0.13698024292938629221, 0.017122530366173265709, 0, 0, 0, 0, 0, 0, 0, 0, 0.0003496520800200754106, 0.0021367360720303471555, 0.0074505054668944550172, 0.02064467788236522966, 0.049822794327671247883, 0.10672495197833482827, 0.1969631195446764349, 0.31985257188474064405, 0.47686971233712693863, 0.65666375567302859295, 0.82030294962505467815, 0.89243231477003071017, 0.82865930832017831165, 0.7146974084023058893, 0.56412932414843675044, 0.38041985226043306678, 0.187187199501233692, 0.04043243509226646798, 0, 0, 2.6689299275350298406e-08, 4.1702030117734841467e-07, 2.86034224577543261e-06, 1.4315525026353663709e-05, 6.1210311482175452555e-05, 0.00023952573005394856335, 0.00087599557622739540694, 0.0027528938137588820738, 0.0075078612376215788857, 0.018529647319498028513, 0.042210845625950982329, 0.088408426246782453872, 0.16366728062036642322, 0.25992689327612594763, 0.36501727743515482993, 0.44566378087513086603, 0.43199704724253940036, 0.23880246889964723556, 0, 0, 0, 0, 0, 0, 0, 0, 7.058496255109760934e-08, 2.4230494175743949962e-06, 2.0524370058046372483e-05, 0.00011268784152075793045, 0.00050596181960801099326, 0.0020367286170134709343, 0.0076722266846277449928, 0.025178647959627011715, 0.068676202883725015469, 0.1606491966704423624, 0.31804567985939574681, 0.46051790249468016469, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.1843748275951593331e-06, 0.00019705036194114395882, 0.0021771955326834735445, 0.013267170193993682234, 0.062770073396831202461, 0.26024719351340613871, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=12 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.67103518571234044288, 0.42848228882111855098, 0.069535818471346141911, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.13629558602559715963, 0.52330141887680614587, 0.67103578567914412556, 0.33835295499291540011, 0.045464904980911415022, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0030869584374568800715, 0.048215269973619155619, 0.25524093849303608472, 0.60346220790852211913, 0.66476640009174547963, 0.3023720875414114273, 0.032137228723967029009, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0223284561703245952e-06, 0.0041874573564736911752, 0.058175656367661768287, 0.28413057856289342107, 0.62749346465948352414, 0.64458417746071206, 0.26642650281928137446, 0.020972348618427394396, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.1807309007930243297e-06, 0.0056381163644495775958, 0.070099504751495456123, 0.31573089553161143295, 0.64918292160807222757, 0.61885031117979483195, 0.23077208437307619726, 0.012169621636861570682, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.494304760976545528e-05, 0.0075476982837094025794, 0.084296696676371143941, 0.3501161628650154567, 0.66789290688121760731, 0.58719180485958433202, 0.19572881144127046715, 0.0058504368174458563218, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.3878896275372805593e-05, 0.010061177336762315224, 0.10112604997332759471, 0.38726521207131708868, 0.68285613364861541541, 0.54940228374944388712, 0.16169301614535666611, 0.001987124003607085819, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00020895877237870428959, 0.013373361432237074534, 0.12100109812882933746, 0.42699888243551975542, 0.69315383369930994029, 0.50558220978145163027, 0.12915009496447266146, 0.00028659920802505926699, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00041395678128490346389, 0.017748396997590374508, 0.14439460468269080251, 0.46888258402048205165, 0.69769409523153691488, 0.45636616933565676835, 0.098685730485585029803, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00075854547264270536476, 0.023548082194459314664, 0.16992724106945886198, 0.46662458342302287617, 0.42010939261246582621, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0032285687345316718341, 0.076722648033295162695, 0.42847050190194924113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.052734374999999840405, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=None knots=np.array([]) output=np.array([0, 0.00067663938125675493745, 0.0016904532697119580269, 0.0032085988210942785123, 0.0054800274275235618532, 0.0088741592009194299184, 0.013936134910759763808, 0.021463528321228261819, 0.032607690503775640933, 0.048995387512752772152, 0.072844594278869884141, 0.10699389717286836299, 0.15464119556617272888, 0.21832955541935578081, 0.29724527028257624606, 0.38124822804791375086, 0.44010197217760271826, 0.41166179703994132399, 0.22237265440082615298, 0, 0, 1.5271697506625337419e-07, 9.5415795571804482003e-07, 3.4427604254568415078e-06, 1.0065494095385778498e-05, 2.6485976820866183047e-05, 6.5657954600734260006e-05, 0.00015695450829847096968, 0.00036650624565625982386, 0.00084215780726247437687, 0.0019117237011529813161, 0.0042940168564642817658, 0.0095393459205458924072, 0.020904202366139650049, 0.044925430954654600735, 0.093631406124953353576, 0.18502397635429304601, 0.32899518168053926148, 0.4444443765651269751, 0, 0, 1.1489366970270387462e-11, 1.7952135891047479178e-10, 1.2313370007669466487e-09, 6.1626316488486427104e-09, 2.6350175916113057325e-08, 1.0311244903883365944e-07, 3.825826041044410135e-07, 1.3731610143675871737e-06, 4.8251465313854754769e-06, 1.6723672935651049261e-05, 5.7444337950753896285e-05, 0.0001961510972947611009, 0.00066716524892103249485, 0.0022633321684622557078, 0.0076650325325876428328, 0.025928742268953787475, 0.087643003996076676576, 0.29609576530082332146, 1, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=None knots=np.array([100, ]) output=np.array([0, 0.015071708273510188089, 0.037381259008501491192, 0.070180462942842899987, 0.11790098280764790828, 0.18621717511273339074, 0.28155389351842002865, 0.40920060085307918829, 0.56848075164647782209, 0.74296035718046105067, 0.88664340493727211712, 0.92755941161894106539, 0.87470363121335126255, 0.79564729322794469635, 0.68622539174158070363, 0.5416556180357140482, 0.36526473871966497198, 0.17973006169687555378, 0.038821693326525116841, 0, 0, 3.4126433211387775282e-06, 2.1270238934697356366e-05, 7.6467258985853174243e-05, 0.00022233858750032365649, 0.00058020375050652503793, 0.0014202218581983020763, 0.003329890582419185209, 0.0075460478259266952628, 0.0165401227086783148, 0.03478495575013232366, 0.06861779430820802439, 0.12096540781627201921, 0.191329633412285538, 0.27905349779077742722, 0.37374631578125083742, 0.44360195032526716918, 0.42250875072786614473, 0.23095694223535656597, 0, 0, 2.5715731172923263021e-10, 4.0180829957692598212e-09, 2.7560031267981358852e-08, 1.3793325533914163604e-07, 5.8977491272702679876e-07, 2.3078834777630869564e-06, 8.5630404390810414166e-06, 3.0734364733920997454e-05, 0.00010799739567214114531, 0.00037431259576781883034, 0.0012857306725170922646, 0.0043281861315545133023, 0.012933767638577380737, 0.033975759516814023342, 0.080531027913042049771, 0.17293083479161802662, 0.32462175106248042367, 0.45442872954598834134, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.7748388221855075202e-06, 8.9305721192384863918e-05, 0.00074535095082784331034, 0.0040670382699930507364, 0.018202476163449766294, 0.07313943651277800273, 0.27579263489213001748, 1, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=None knots=np.array([1000, ]) output=np.array([0, 0.0015004113953972979083, 0.0037469425348356701772, 0.0071075541598605747329, 0.012127833209738946019, 0.019611910086607075437, 0.030734076107229148234, 0.04718451762439003494, 0.071340002591639736784, 0.10641511473995685089, 0.15646348082190494888, 0.22590626848315542574, 0.31787571113275914225, 0.42998547021895328069, 0.54537912943555955092, 0.61788880868397033641, 0.56161589860385985329, 0.31265651834613988891, 0.067535071081621433908, 0, 0, 3.3870374289331948997e-07, 2.1158906445184196164e-06, 7.6329082682329570748e-06, 2.2309178641915861703e-05, 5.8676185825556269023e-05, 0.00014535443145101360611, 0.00034710023984489360048, 0.00080922144066932952776, 0.0018549168973065219106, 0.0041951273738663188637, 0.0093691710332807192491, 0.020628857715017115404, 0.044564197866965707395, 0.093532235391030102423, 0.18697094954821646962, 0.34034142027880176506, 0.49294319032437394767, 0.3494912754584255099, 0, 0, 2.5484057919113140944e-11, 3.9818840498614283815e-10, 2.7311742697999542126e-09, 1.3669061339914938246e-08, 5.844616252249814789e-08, 2.2870917347201761224e-07, 8.4858959306208503663e-07, 3.0457478565146930827e-06, 1.0702444616158128632e-05, 3.7094041022036097191e-05, 0.00012741475133052262828, 0.00043507409392457165433, 0.0014798098005849495782, 0.0050201972154368997708, 0.017001470448093195659, 0.057511399145102344577, 0.1943970799975693331, 0.52239901147403788872, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.060574641985915035625, 1, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([0, 0.1567541293972270211, 0.35648024152864221659, 0.58396161300859317222, 0.78843465486178421209, 0.87349958752470602263, 0.78872593441389438063, 0.64620704007646878608, 0.46633671277914778841, 0.26443067498670752569, 0.086017059388510438978, 0.0027907697403672538511, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00082460534200152601353, 0.0049930443273597038822, 0.017154488011057174995, 0.046372256997114502663, 0.10706684228609344989, 0.21001709629214043717, 0.349560411044427366, 0.52164005645501687614, 0.7052885939808604121, 0.84461817796425076033, 0.85332055763972392004, 0.74044769630139117833, 0.58689461845094414993, 0.39882902037092649028, 0.19953161831184262898, 0.045398720470901786361, 3.5969836861605402089e-06, 0, 0, 0, 1.4043223919767127475e-07, 2.1942537374636142735e-06, 1.5050386385262928333e-05, 7.5324616581368114944e-05, 0.0003220729407222703883, 0.001256955617244540557, 0.0042307394405898370374, 0.012003815755055342179, 0.030163701749655900952, 0.068814016157702301291, 0.14161203135855129909, 0.25097331049689530769, 0.38604331096967514636, 0.52843933204950088722, 0.62702364005086641541, 0.58518234575989880319, 0.32861311390225123041, 0.070981847410833104339, 0, 0, 0, 0, 0, 0, 0, 1.3676720629513371727e-08, 1.8094385139768081762e-06, 1.9415010780026244647e-05, 0.00011702928277625300924, 0.00055074648953661010323, 0.0022766412613576141911, 0.0085724666641985424603, 0.026852019770306118779, 0.070978550821878275134, 0.16387889672004127273, 0.32660594492319316995, 0.49935626095003893266, 0.36224174733681202554, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.5265375148376491226e-06, 0.00021005080907461989208, 0.0017530967576943525671, 0.0095658449172495805396, 0.042812988846006136412, 0.17202702816402348773, 0.50620176326643973042, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.060574641985915035625, 1, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=5 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.20791834248678603414, 0.29902312242189532654, 0.42058546782444744538, 0.57127689681172544311, 0.73389736255977222612, 0.86146730066325027941, 0.87986332643537079612, 0.79291382076038952054, 0.66958340212426392668, 0.50965824665676062732, 0.32095096582466564605, 0.13540118870728076739, 0.016925148588410078576, 0, 0, 0, 0, 0, 0, 0, 0.0012695557913002517882, 0.0028166847710175994465, 0.0062031625467877079053, 0.013503589345765960872, 0.028852424480264381862, 0.059752006107076843788, 0.11700693595529969293, 0.20637160010579103098, 0.32827217823508614281, 0.48461478028531612683, 0.66503213888472001436, 0.83266225254172265835, 0.91555098377957688793, 0.87163430177910428132, 0.78404142346700400612, 0.66397286412208100792, 0.507883804798330174, 0.32286071959112722096, 0.1392180359854287286, 0.018963308663199267973, 1.4499417527118528165e-07, 4.893553415402503421e-07, 1.6515742776983449575e-06, 5.5740631872319126435e-06, 1.8812463256907710572e-05, 6.3492063492063516403e-05, 0.000214285714285714356, 0.0007145407126011744112, 0.0021431007122621795712, 0.0057158011131347528228, 0.013955556387630695808, 0.03166115066306447734, 0.066415223634956249699, 0.12418853438999710725, 0.20209136094044644061, 0.29685688827833944803, 0.39377505733963352741, 0.45246903544196670488, 0.39638281649040951748, 0.1597891894713379668, 0, 0, 0, 0, 0, 0, 0, 3.8421218399486777976e-08, 1.3189283878698809637e-06, 1.117194478839918966e-05, 6.133890298378388427e-05, 0.00027540808793216300541, 0.0011086439970568826713, 0.0041767162702452207551, 0.013792752688869525796, 0.038347512235655537016, 0.093327637726685397368, 0.20095020769675242533, 0.36605493991801690834, 0.4433598240831516657, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4.4756065322375969297e-07, 7.4462903680102773569e-05, 0.0008227353639237797861, 0.0050135001353509671407, 0.023720037270153770254, 0.09834420760614477619, 0.37788767778231108219, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=12 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.61574942641905272556, 0.5337910278271761344, 0.26630652260904380535, 0.04321728691476599965, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21709357831448763965, 0.40428360185244682778, 0.62558529044677246844, 0.63046184489143586305, 0.30534794185536534572, 0.041029980550522253402, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0094918061936686368846, 0.032034845903631647968, 0.10810716461572758562, 0.32213341083732455195, 0.63646722104607222903, 0.66920132452213465513, 0.3023720875414114273, 0.032137228723967029009, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0223284561703245952e-06, 0.0041874573564736911752, 0.058175656367661768287, 0.28413057856289342107, 0.62749346465948352414, 0.64458417746071206, 0.26642650281928137446, 0.020972348618427394396, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.1807309007930243297e-06, 0.0056381163644495775958, 0.070099504751495456123, 0.31573089553161143295, 0.64918292160807222757, 0.61885031117979483195, 0.23077208437307619726, 0.012169621636861570682, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.494304760976545528e-05, 0.0075476982837094025794, 0.084296696676371143941, 0.3501161628650154567, 0.66789290688121760731, 0.58719180485958433202, 0.19572881144127046715, 0.0058504368174458563218, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9.3878896275372805593e-05, 0.010061177336762315224, 0.10112604997332759471, 0.38726521207131708868, 0.68285613364861541541, 0.54940228374944388712, 0.16169301614535666611, 0.001987124003607085819, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00020895877237870428959, 0.013373361432237074534, 0.12100109812882933746, 0.42699888243551975542, 0.69315383369930994029, 0.50558220978145163027, 0.12915009496447266146, 0.00028659920802505926699, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00041395678128490346389, 0.017748396997590374508, 0.14460859026960704021, 0.4755254951851585199, 0.74591378938143138022, 0.59473172632230031365, 0.2566313326020329133, 0.03495652799821041129, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00054455988572647427617, 0.016905171029782832537, 0.12340338564379622899, 0.3685583814319431939, 0.50219016290765505772, 0.24887848966988834754, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0015327300102998579149, 0.03642329303773143151, 0.22969990963832651043, 0.49849681328683803638, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.011478594851985721506, 0.21766816904506322561, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.00099933344444444439057, 0.0014985003750000000684, 0.0022466262656250001253, 0.003367410521484375148, 0.0050454284787597655781, 0.0075553552955017096171, 0.011304291651615143086, 0.016891872202619076515, 0.02519288281652980882, 0.037464410913716578166, 0.055469506915694993809, 0.081581580145842852447, 0.11876628136094780075, 0.1701874001952980997, 0.23787846467254591953, 0.31938947503806314199, 0.40070172821401484065, 0.44437167848162473227, 0.38044436785608970464, 0.1510757732538548781, 3.3322222222222224585e-07, 7.4962500000000004377e-07, 1.6862343750000001263e-06, 3.7926035156249994121e-06, 8.5285524902343742213e-06, 1.9173024810791016186e-05, 4.3084569087982178912e-05, 9.675554396295546727e-05, 0.00021707648827975990682, 0.00048631783460495621712, 0.0010871132367784521085, 0.0024220359003474063384, 0.0053686857976677918383, 0.011806522493618208658, 0.025643231250562635581, 0.054587395598501557703, 0.11232581293261123534, 0.217309662419816918, 0.36939270915445387988, 0.42763873999331791786, 3.7037037037037035514e-11, 1.2500000000000000779e-10, 4.2187500000000000366e-10, 1.4238281249999998508e-09, 4.8054199218749994447e-09, 1.6218292236328128606e-08, 5.4736736297607425773e-08, 1.8473648500442501235e-07, 6.234856368899344564e-07, 2.1042640245035291742e-06, 7.1018910826994105393e-06, 2.3968882404110509299e-05, 8.0894978113872978626e-05, 0.00027302055113432129778, 0.00092144436007833422416, 0.003109874715264377993, 0.010495827164017276431, 0.035423416678558306003, 0.11955403129013431052, 0.40349485560420322861, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.029691034444444444618, 0.044305991250000002768, 0.06594240796875000532, 0.097758673769531262421, 0.1440642544409179715, 0.21038931479278566439, 0.30302618229869843214, 0.4272397443474625911, 0.58266048231123623857, 0.75392639264340943761, 0.89747740349010274308, 0.94501107412874996161, 0.90598359089303848179, 0.84593074837236126307, 0.76092088936222046502, 0.64439302204983184286, 0.49290686036830000383, 0.31333990595172406257, 0.13511264658550559137, 0.018404101188231606484, 9.9644444444444446617e-06, 2.2379999999999999178e-05, 5.0219999999999996902e-05, 0.00011253937500000000703, 0.00025167585937500001371, 0.0005610808300781249848, 0.0012449161120605468141, 0.002741945576934814565, 0.0059698621442985533997, 0.012758825336830616898, 0.02643475186140507513, 0.051808149318846051512, 0.091620856894323976505, 0.14688590543057178373, 0.21984251899359169569, 0.30818245617558825966, 0.39752224089834975462, 0.44889001546541440479, 0.38890408238266155339, 0.15565065849749706861, 1.1111111111111110654e-09, 3.7499999999999996649e-09, 1.2656249999999997938e-08, 4.2714843750000004622e-08, 1.4416259765625001146e-07, 4.8654876708984372583e-07, 1.6421020889282224026e-06, 5.5420945501327518529e-06, 1.8704569106698035598e-05, 6.3127920735105882001e-05, 0.00021305673248098232295, 0.00071906647212331535352, 0.002394473001231371169, 0.0071486127371715351211, 0.018946704087009914874, 0.045842738337222695144, 0.10249145334758576198, 0.20932413300445151805, 0.3687199031810673433, 0.43701763935524273741, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0792114061605856464e-06, 3.473345989527011488e-05, 0.00028988755717800383316, 0.0015817834373569547059, 0.0070794453857644188549, 0.028445945578409920912, 0.10726336785076537317, 0.38892760095902845219, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.0029960014444444446613, 0.0044910048749999993689, 0.0067297664531250009357, 0.010079493029296874088, 0.015085171786376951053, 0.02255122235714721729, 0.033655024381153107738, 0.050097300181899548366, 0.074283671347553537068, 0.10950057690181402847, 0.15997106401940666687, 0.23050015162429349225, 0.3250574690342031281, 0.44299931223779698275, 0.57083333883520204211, 0.6679625481063525827, 0.65410023301458430911, 0.45433806106867219432, 0.19591333754898310193, 0.026685946722935827841, 9.9944444444444452216e-07, 2.2481249999999997573e-06, 5.0561718750000005462e-06, 1.1369267578125000521e-05, 2.5556824951171875081e-05, 5.7421764678955089446e-05, 0.00012892528684616086799, 0.00028915821297883983077, 0.00064748855101794003473, 0.0014463279196678473747, 0.0032187283638391594488, 0.0071222944066175568334, 0.015620687524320137074, 0.033781444174048706752, 0.071401027591217913759, 0.14510293850391842163, 0.2740024758137300509, 0.43938848718810108451, 0.4727098830096430615, 0.21327068651931446741, 1.1111111111111111947e-10, 3.750000000000000492e-10, 1.2656250000000001661e-09, 4.2714843749999999659e-09, 1.4416259765625000816e-08, 4.8654876708984385818e-08, 1.6421020889282225085e-07, 5.5420945501327514294e-07, 1.8704569106698034751e-06, 6.3127920735105875225e-06, 2.1305673248098235006e-05, 7.190664721233152451e-05, 0.00024268493434161889522, 0.00081906165340296394756, 0.0027643330802350029977, 0.0093296241457931361474, 0.031487481492051827558, 0.10627025003567493189, 0.31773412222685926132, 0.53482276673031980962, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.013642657214514538819, 0.22522060002743005125, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=3 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.26810999999999995946, 0.37949624999999997943, 0.52058109375000005681, 0.6792815039062500837, 0.81701452880859382066, 0.86124092926025386241, 0.77303028831905795659, 0.63334751997894722653, 0.45705661219484272628, 0.2591685045544720456, 0.084305319906679110353, 0.002735233422533945441, 0, 0, 0, 0, 0, 0, 0, 0, 0.0028890000000000000749, 0.0063753749999999990983, 0.013923140625000001921, 0.029904662109375004103, 0.062484875244140626604, 0.12438889535522459906, 0.22549483803134068305, 0.36206390866050391919, 0.53039385361573909705, 0.70984283151236515774, 0.8454844390975239099, 0.85252274549222006872, 0.73970724860508996201, 0.58630772383249318835, 0.39843019135055551816, 0.19933208669353075226, 0.045353321750430879156, 3.5933867024743799741e-06, 0, 0, 9.9999999999999995475e-07, 3.3749999999999998737e-06, 1.1390625000000002433e-05, 3.8443359375000005034e-05, 0.00012974633789062501275, 0.00043789389038085933673, 0.0014748635551852823811, 0.00458723586310897085, 0.012535204497088376849, 0.030902287796947387061, 0.069803749964903766267, 0.14306169398214990673, 0.2539595816136605011, 0.39367833655080086697, 0.54755337589389629915, 0.67082414394774525501, 0.67380308481828354861, 0.47157640817504958841, 0.20334679251999171479, 0.027698480049178006435, 0, 0, 0, 0, 0, 0, 1.009441616706038712e-08, 1.3354974400350171813e-06, 1.4329692329799520979e-05, 8.6376136215493001325e-05, 0.00040649103089326774291, 0.0016803271030961133663, 0.0063296923222741757059, 0.019902020690376800299, 0.053082350626863387955, 0.12474691161612924684, 0.25803204718826688868, 0.43676084046337160238, 0.48134381599312991984, 0.21958058358824134038, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.4774589754063323979e-06, 0.00011191892632920368619, 0.00093408212868467904808, 0.0050968577425946322637, 0.022811546243018676616, 0.091659157974876420694, 0.30166673427236384564, 0.52750033633515069909, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.013642657214514538819, 0.22522060002743005125, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=12 intercept=TRUE Boundary.knots=None knots=None output=np.array([1, 0.24780328615799376846, 0.0091743090758214639741, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.64369896942547122354, 0.57211971408849160436, 0.24237606578683060232, 0.044144491052264721309, 0.00040499520490831021667, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10612816657504857421, 0.39035389287158045457, 0.63115822592338399755, 0.61942792972003923868, 0.3746409686363950664, 0.1299270153679348283, 0.01456850160548600788, 2.4933115730197022697e-06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0023615475345642161081, 0.028071577965966144214, 0.12345862383415923125, 0.3183634697319876472, 0.55248624494736087165, 0.66157907323456133231, 0.54063561857561015511, 0.27886806629206489783, 0.077562585629719432712, 0.0043678218067639239253, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8.026857613403984366e-06, 0.00028016915158263086755, 0.0029987443674941152358, 0.017947370921348515527, 0.071410333320899577192, 0.20176799613356366514, 0.41383987857855386583, 0.6154467549253210823, 0.65231295225138286042, 0.47044608242977714596, 0.21195012900662815736, 0.045727490940629646199, 0.00095767216067723433693, 0, 0, 0, 0, 0, 0, 0, 3.4493087511455702552e-09, 3.3684655772905958113e-07, 8.3400865772486489595e-06, 0.00011672374464387017944, 0.001056583561129178974, 0.0067076933166381781659, 0.030729882600253097258, 0.10383387100350169319, 0.2594160581947105304, 0.48008955662192837055, 0.64645432100283450882, 0.61990700734803594329, 0.39505102004440167951, 0.15152949559297854143, 0.022327498493473717928, 5.4255821339140527268e-05, 0, 0, 0, 0, 0, 0, 1.5550084505986725427e-12, 1.4829716211306275165e-08, 8.7432930709587782537e-07, 1.8221855962693547294e-05, 0.00022606146610042746276, 0.0018465551075490084856, 0.010668371345813800616, 0.044660809207147045274, 0.13839012473302086947, 0.31749140013695392737, 0.53873603574199457888, 0.65962064818889976081, 0.56521993555148342114, 0.310506638922921685, 0.095251921793756827439, 0.0074067894386825334357, 0, 0, 0, 0, 0, 0, 0, 9.1339471742565674011e-11, 5.7173996413972389293e-08, 2.2593599905185296879e-06, 4.0028808856421427135e-05, 0.00043513707847336695594, 0.0031885139595084076476, 0.016618589891805510966, 0.062812605113451408512, 0.1727378974109962384, 0.33696595086343833492, 0.43637044859132123609, 0.31354081400102012944, 0.068636818178064884499, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.7695168114557225724e-09, 5.9285591015796308443e-07, 1.6911296921410870622e-05, 0.00025529307410796154469, 0.0024263360322963763681, 0.015741731604329466804, 0.070644470707028478307, 0.21311252564833091383, 0.3762972750744253414, 0.24384934178453185338, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0868704401501409189e-12, 2.1860846716771802376e-07, 1.6330907178590211063e-05, 0.00037021035039809161273, 0.0048110469704648085865, 0.038471273482950811562, 0.18655513337806783891, 0.39552144655799137407, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.6852398034939565224e-08, 3.1097414111410212796e-05, 0.0014848575331362907741, 0.028249998152729726558, 0.25456576340175340878, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00010485760000000057303, 0.030019840638976030833, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=None knots=np.array([]) output=np.array([1, 0.99887226764047176708, 0.99718257698671308731, 0.99465232914808876519, 0.99086659019744105503, 0.98520960330607720845, 0.9767725953351565904, 0.96422555375161622671, 0.94564705949867622348, 0.91831730137969458383, 0.87851085191767241955, 0.82140061321171964348, 0.74134024099742168445, 0.63306797132011838336, 0.49472136419952228437, 0.33351966492579832035, 0.17295333506564730675, 0.053041675157364839843, 0.0041245206602366912829, 0, 0, 0.0011272234177996938269, 0.0028142442804627715303, 0.0053362072774053633667, 0.0090999196124958610377, 0.014702372431044495246, 0.023009568696928141274, 0.035255048800207078319, 0.053144756365466998271, 0.078922539871719557536, 0.11527760116980134697, 0.16482753559964863355, 0.22865514515079901625, 0.30306893078627178406, 0.37385911082806172478, 0.40954833229285447782, 0.36355794566479110452, 0.2119513600790149388, 0.041217298467029310494, 0, 0, 5.0882687398560041373e-07, 3.1769391294621280289e-06, 1.1451280924188917779e-05, 3.3428733100138665235e-05, 8.7761935886847988286e-05, 0.00021681208262686383053, 0.00051561316170019592855, 0.0011946804469067555735, 0.0027131220507094727483, 0.0060506596144855023106, 0.013230141811374926397, 0.028210083582453096551, 0.058035333309565624582, 0.11300958063559926603, 0.20116335451605230067, 0.30568795867586873172, 0.33877798094471850421, 0.16475763686175207146, 0, 0, 1.1484182443411814437e-10, 1.7931887260774516498e-09, 1.2286988490864389756e-08, 6.1400553206305291637e-08, 2.6193586874264870438e-07, 1.0214767558696939837e-06, 3.7704802796487805225e-06, 1.3428054504627923681e-05, 4.6634530984498030184e-05, 0.00015879269432593164958, 0.00053096908751411363639, 0.00174019442073816092, 0.0055566565395111476577, 0.017080184681265514479, 0.049404053208565550104, 0.12851476524402274948, 0.27074730808567093465, 0.32929231067852438031, 0, 0, 1.2959854631941128047e-14, 5.06072933143619642e-13, 6.5918427455453850986e-12, 5.6389033989814727996e-11, 3.9088927700060016774e-10, 2.406265255469648607e-09, 1.378603437152614992e-08, 7.5464802427343835863e-08, 4.0078909822273000137e-07, 2.0836670196190539855e-06, 1.0654767572225721294e-05, 5.3673655611779830157e-05, 0.00026601408260545672708, 0.0012907432586925205419, 0.0060666130749972404027, 0.027014549341864656923, 0.10818900424286860551, 0.32906949849914446382, 0, 0, 5.8500579526198340832e-19, 5.7129472193553077444e-17, 1.4145822896898457119e-15, 2.0714622186680568112e-14, 2.3333104795078233906e-13, 2.2673496764008742317e-12, 2.0162390952550372537e-11, 1.6964293385718098169e-10, 1.3777936465799164788e-09, 1.0936695210263550545e-08, 8.5522170452256458058e-08, 6.6219297623306537514e-07, 5.0939619277349284707e-06, 3.9016396858695978102e-05, 0.00029798198173219142288, 0.00227144600780565184, 0.017292671490362138825, 0.13153873483331313121, 1, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=None knots=np.array([100, ]) output=np.array([1, 0.97500126574733914087, 0.93844290960000364965, 0.88566924020111725824, 0.81098530154789516544, 0.70848516022771668155, 0.57441053105822748037, 0.41206430922482584212, 0.2392461015407435776, 0.092939125284492898893, 0.014299466806025581608, 4.7193863677059842883e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.024987397851780435848, 0.061486796626462968118, 0.1140799960900419352, 0.1882939537142028219, 0.28966625638367565765, 0.42117968175620157378, 0.57798465087862005429, 0.73943782746930653005, 0.86397935690336380432, 0.90462871246453147034, 0.85976637084822715718, 0.77601114642224255924, 0.66267521310641597232, 0.51785842958629058064, 0.34911766989097126057, 0.18104199448456753663, 0.055522321426452851678, 0.0043174157141209739547, 0, 0, 1.1333836374491041766e-05, 7.0253868335539153452e-05, 0.00025049169236306427153, 0.00071939598049462782393, 0.0018428967293926601032, 0.0043879998552115674279, 0.0098727402942039785283, 0.021048289930652556295, 0.042207103219050953746, 0.078361329513138483494, 0.1323267251748408424, 0.20305646979998087653, 0.28625091024051202426, 0.36712456913477936604, 0.41237454577086030127, 0.37209382828918063923, 0.21926721332006066101, 0.042943027456719351509, 0, 0, 2.5642163564224008165e-09, 3.9893896948733327568e-08, 2.7186958324120860093e-07, 1.3475047468249140318e-06, 5.6780166980285093331e-06, 2.1734507210374597486e-05, 7.8000238621073664019e-05, 0.00026616938183067457386, 0.00086607259863206911138, 0.0026688434677071591963, 0.0076602480569788727188, 0.020032890189762748295, 0.047362176442216685768, 0.10112516340575794516, 0.19128545657973258787, 0.30258229747242648688, 0.34436724521558481626, 0.17045464690987927048, 0, 0, 2.8979008849349871451e-13, 1.1299684644739179399e-11, 1.4686295227238142049e-10, 1.2521970298985838129e-09, 8.6372943219355255736e-09, 5.2772400925513216102e-08, 2.9891245041530529781e-07, 1.607880484564848822e-06, 8.3111564072833673416e-06, 4.1402961295506068027e-05, 0.00019754788193526286806, 0.00088468396831814228969, 0.0036015013400145573161, 0.013149579882492583999, 0.042768561910798960635, 0.12037399719492403172, 0.26730426029673903798, 0.33672081019045274619, 0, 0, 1.309371682920953332e-17, 1.2786832841024935257e-15, 3.1661464010829005262e-14, 4.6363882090260461786e-13, 5.2224622287067364928e-12, 5.0748274386406234902e-11, 4.5127867086219768747e-10, 3.7969821085404869e-09, 3.0838053235562167103e-08, 2.44787301750460405e-07, 1.9141743408195724709e-06, 1.4809071760067401229e-05, 0.00011002044182894339316, 0.0007361307065949982114, 0.0043501410333869746858, 0.022648326644076191561, 0.10074752233050462968, 0.32871166293410319925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4793570769921117731e-10, 1.7842901170031562027e-07, 6.1272840845677704696e-06, 0.00010362481425005589904, 0.0012595559148250148798, 0.012791437410658173385, 0.11685243679472454015, 1, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=None knots=np.array([1000, ]) output=np.array([1, 0.9975000012515636838, 0.99375938046297329631, 0.9881694981371595965, 0.97983186568317970355, 0.96743102855471574397, 0.94906548971852167096, 0.92204114544049331492, 0.88266217520230549898, 0.82612719043156179755, 0.74679100501527517775, 0.63932416597329111418, 0.50165120112244221406, 0.34054780971703357828, 0.17887753393501265586, 0.056398049910775530091, 0.0047824514208348522723, 6.9899702309353164834e-10, 0, 0, 0, 0.0024988701975066367374, 0.0062335737743966241481, 0.011805108206123089004, 0.020094017057672219906, 0.032374436158623724757, 0.050454096263104174225, 0.076816980711751164934, 0.11469423978741608017, 0.16787638509201657788, 0.23985925947403505254, 0.33155764168655321722, 0.4364690436594241274, 0.53267348082579712987, 0.57514542430970139186, 0.50463302930351572329, 0.30623588293439429897, 0.096587849561305616497, 0.0075106712808647091081, 0, 0, 1.1282962103271197581e-06, 7.0417854820895302466e-06, 2.536640667453833115e-05, 7.3981096505374880712e-05, 0.00019395448667024030114, 0.00047814948330325205252, 0.0011335173229715391711, 0.0026138368581788955919, 0.0058931764589364287604, 0.012998501527337540801, 0.027945386445582046098, 0.058044007669012950834, 0.1145680954987259581, 0.2086069927114643785, 0.33148565972609811414, 0.41061823564260202524, 0.3066625410662744966, 0.068889778730265194273, 0, 0, 2.5469057156714312774e-10, 3.976025713192789869e-09, 2.7235422387831816846e-08, 1.3603757883811768205e-07, 5.7993308882550318022e-07, 2.2591989366454093128e-06, 8.3259563410340815473e-06, 2.9580849000601624262e-05, 0.00010235971125482330811, 0.00034661758311907198181, 0.001149214388416119545, 0.0037170166582407206807, 0.011623043605661884797, 0.034525979049628140183, 0.094171296281739258482, 0.21954225794050497012, 0.36514412761590397949, 0.24346326935859208263, 0, 0, 2.8744101198050118059e-14, 1.1223436953991581798e-12, 1.4617253735361878953e-11, 1.2501806315302350017e-10, 8.6638386346496238246e-10, 5.3311050540562872207e-09, 3.0523721489433262118e-08, 1.6692682142232341016e-07, 8.8525020719146049419e-07, 4.5921420319874677743e-06, 2.3401813204582281116e-05, 0.0001172621098580812533, 0.00057627166059595505247, 0.0027575294458836020588, 0.012651024092369157759, 0.053782977897194939043, 0.19324936944781076487, 0.3997562713358867037, 0, 0, 1.2975755416333770318e-18, 1.2671636148763449243e-16, 3.1376225596317031531e-15, 4.5946189458816680778e-14, 5.1754130194391066624e-13, 5.0291082725267428713e-12, 4.4721309725082660758e-11, 3.762775062517599803e-10, 3.0560232936142684245e-09, 2.4258201074369941878e-08, 1.8969295269383146249e-07, 1.4687810219326831175e-06, 1.1298692185571222767e-05, 8.6540548310009559815e-05, 0.00066094068550206401882, 0.0050381941644688631871, 0.038356111609708157251, 0.27103704347500323646, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0093429658193878890871, 1, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([1, 0.75141884282545001739, 0.47347381451739223301, 0.21613090194838843749, 0.049696178730542467372, 0.0013661273532290147976, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.24594066521407775827, 0.51146600459087232515, 0.73649217207800155016, 0.8374180350990879651, 0.77783093616193677011, 0.63185158416405007298, 0.45327074014730833751, 0.26317071169481792703, 0.10223303781294215686, 0.015729413486628138208, 5.1913250044765826493e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0026391300640233733565, 0.015039867450995366219, 0.047247102051768179176, 0.1123025082166889399, 0.21868817428784292911, 0.36168436874353226962, 0.52963938325215309533, 0.69654238826126158024, 0.81185888378284087885, 0.81901416673831617388, 0.71604356256126133751, 0.5618949499117826818, 0.38144450577487010179, 0.20035909960120826256, 0.06317094299551503922, 0.0053567803596460189172, 7.8294021104867289565e-10, 0, 0, 0, 1.3617399508643758347e-06, 2.0307441416267721758e-05, 0.00012974796415461092966, 0.00058265627417901637731, 0.0021107453507099383297, 0.0064420295146436082692, 0.01698600253976614155, 0.039859183304507203593, 0.084333549165298365979, 0.1600060050847842974, 0.26809274258076365438, 0.39566435336691235802, 0.51703688602850017553, 0.58200587206136744634, 0.52329606670761874554, 0.32126381870611603331, 0.10151738768556310688, 0.0078939921772932797328, 0, 0, 1.5649080494945798507e-10, 5.9986253804949622248e-09, 7.5940397022685528773e-08, 6.2142631098330448954e-07, 4.0139943226430085122e-06, 2.1989865743143280786e-05, 0.00010363219420155694861, 0.00042588199885955281988, 0.0015624557228769457226, 0.0051797373818038370402, 0.015437236122329209276, 0.040650688803865277221, 0.093964606272201794956, 0.18957613958735397564, 0.32176934741298385267, 0.41523761089925581569, 0.31715186300322989466, 0.072004316292680670131, 0, 0, 7.1504090682135632783e-15, 6.9828213556773098054e-13, 1.7290156975971226543e-11, 2.531906923450678459e-10, 2.8519588261786051741e-09, 2.7712025453436415246e-08, 2.4184791568371559048e-07, 1.8337667890099757592e-06, 1.2054075181315261422e-05, 7.0420380839424076883e-05, 0.00037180990877366712185, 0.0017643270358445378374, 0.0073398980933030329166, 0.026537861233464601213, 0.082516974679995302999, 0.20957251000715282352, 0.36763462897045218192, 0.25222621956325630421, 0, 0, 0, 0, 0, 0, 0, 5.430767431219823664e-15, 1.8655102027007631066e-11, 9.7376457669355941836e-10, 1.9440860511832082848e-08, 2.5692762827202482626e-07, 2.7355768273926624709e-06, 2.5679592827130744701e-05, 0.00021368415859150435886, 0.0015066158869563137275, 0.0090029382862195743431, 0.045606751586747915073, 0.18361012124256628764, 0.40566571262016642985, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2887678100169456754e-09, 4.196725335132502196e-07, 1.441162964935832764e-05, 0.00024372991766721753461, 0.0029625284410813028553, 0.030085998315248142776, 0.25286679352721525005, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0093429658193878890871, 1, ]).reshape((20, 9, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=12 intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.13756522087537645382, 0.03408911379396852015, 0.0012620658543943507734, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.63650820114425754603, 0.56442389527296787932, 0.36731014666049194295, 0.15313548110822144954, 0.027890905208072903215, 0.00025587978478329849409, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.21582542819545830204, 0.37161327360106727324, 0.54958413949399687048, 0.64871936385648787393, 0.56949455728677522703, 0.3353237146654387546, 0.11624293381549648252, 0.013034128145880629485, 2.2307127685738320293e-06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.010030255096273907323, 0.029534067226298671427, 0.080270381333806692381, 0.19129075916133608803, 0.37605173703394079165, 0.57813180094222393901, 0.65912921049582962052, 0.52910502202296083585, 0.27213902033160508598, 0.075690939564343556745, 0.0042624228385304534922, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7.0831944077479075004e-05, 0.00033917363922361448356, 0.0015696484900234396707, 0.006826920796842389158, 0.026357789549328776435, 0.084918297819850163677, 0.21706326994359459448, 0.42522472470932526356, 0.61970296224817356112, 0.65157393079529135616, 0.46867117785961504017, 0.21110309109651023696, 0.045544745506395555024, 0.00095384491778118137267, 0, 0, 0, 0, 0, 0, 6.2744556242273010119e-08, 4.764664739647606222e-07, 3.6181672866699010542e-06, 2.7475075557388156146e-05, 0.00020499609216608624608, 0.0013694324583970120431, 0.0075463637977772560911, 0.032410006481736260142, 0.10630697223991346367, 0.26202672571617779962, 0.48196986016032394851, 0.64730135891295237371, 0.6200897527822700761, 0.39505484728729772792, 0.15152949559297854143, 0.022327498493473717928, 5.4255821339140527268e-05, 0, 0, 0, 0, 0, 0, 1.5550084505986725427e-12, 1.4829716211306275165e-08, 8.7432930709587782537e-07, 1.8221879880749471527e-05, 0.00022607643762169348975, 0.0018471467411283852555, 0.010678853506751016453, 0.044774794636566098149, 0.13922623229659780719, 0.32186074315609991547, 0.55535177436803673245, 0.70594649934249964485, 0.65842055649705455433, 0.44019721934864292079, 0.20692042038665983683, 0.050925719374514837046, 0.0018364038370179928007, 0, 0, 0, 0, 0, 0, 6.7421415818675005971e-11, 4.2202475147942958715e-08, 1.6677264111419149931e-06, 2.9548372537568077557e-05, 0.0003214228907650466577, 0.0023601436034748866921, 0.012366085974335782813, 0.047309807190604125093, 0.13367877749039860924, 0.27692812785680914756, 0.41099272250632723491, 0.40841144311061738925, 0.22284961609469711163, 0.025059128845635693372, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.0448984495923750749e-09, 3.2161419941923913368e-07, 9.1740900392921949651e-06, 0.00013858698681402402734, 0.0013233320235020520645, 0.0087002635234140028586, 0.040418548886275447451, 0.13266384308046400009, 0.2919610966000151242, 0.36977070091947494834, 0.13378226109816304668, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4.2555387649176344133e-13, 8.5594084815015888324e-08, 6.3942127780347038856e-06, 0.00014495964162350503772, 0.001897132263692037548, 0.015703476623777332805, 0.085263653984348167225, 0.27486427411742569982, 0.34006034727733425171, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4.4090854418790210764e-09, 8.1360026955471610933e-06, 0.00038848261944930331932, 0.0074310477767932352905, 0.07805738431434718072, 0.38159617170452009294, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2338141566056994185e-05, 0.0035323051795402063933, 0.11766568723732878654, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.9983344440741356296, 0.99750249875031249402, 0.99625562078283180778, 0.99438764201972595913, 0.99159092854883290613, 0.98740766028786153274, 0.98115924126864673127, 0.97184596123454425332, 0.95800877113085403103, 0.93754892830310510021, 0.90751599227918267054, 0.86391429083558635149, 0.801669064575886825, 0.71507663977921398502, 0.59936827269939108032, 0.45433823528182060159, 0.29067165046987419874, 0.13661155030676189193, 0.033621821670569602969, 0.0012124176758172098417, 0.0016644455553086624083, 0.002495003748750156472, 0.0037387626499230611904, 0.0055997301828166279672, 0.0083806908754831561242, 0.012528591196985617434, 0.018697687754151010031, 0.027833351892417868001, 0.041273795716988054272, 0.060850651795732749183, 0.088929278684076601413, 0.12824165844093327049, 0.18119239198615591513, 0.2480374111869024234, 0.32305865186741777872, 0.38825858275226748928, 0.40740939128236214328, 0.33403355026188807919, 0.1632256493056772062, 0.017159494075893101661, 1.1100003703292180217e-06, 2.4962518746875000594e-06, 5.6123532398144536609e-06, 1.2613583192428893271e-05, 2.8332643039885458129e-05, 6.3586947395334136954e-05, 0.00014252672253270057788, 0.00031885525421459126778, 0.00071127791904317637373, 0.0015797796625579053637, 0.0034857420363066235133, 0.0076146085944587566657, 0.016381164929213501424, 0.034414525059745272595, 0.069651262704549549154, 0.13271586265443335861, 0.22841224706537738287, 0.32670272008482859061, 0.31696810306481593145, 0.097144158424494309045, 3.7012349794238681896e-10, 1.2487503125000001559e-09, 4.2124242480468747114e-09, 1.4206263137512205132e-08, 4.7892153138227467571e-08, 1.6136291046154529314e-07, 5.4321868305356652586e-07, 1.8263821320052253243e-06, 6.1287830369107454558e-06, 2.0506795807293100125e-05, 6.8314944883560930534e-05, 0.00022606641535874693596, 0.00074049070575379688161, 0.0023874614910316710632, 0.0075083864312185822493, 0.022682692646808663012, 0.064029150684570476648, 0.15976638757864897178, 0.30776038811264622153, 0.27497860584540806395, 6.1707818930041155992e-14, 3.1234375000000000385e-13, 1.5808447265625000092e-12, 8.0000230407714837838e-12, 4.0477309670448300476e-11, 2.0474318975195293786e-10, 1.0351972331039468262e-09, 5.2306989582536832363e-09, 2.6404574434177521861e-08, 1.3309725534797475592e-07, 6.6943159388078163953e-07, 3.3557879908853077946e-06, 1.6736492419104248054e-05, 8.2813468459374991711e-05, 0.00040470096744437702128, 0.0019383686901439161829, 0.0089744140037595811904, 0.039065023078631577746, 0.14941007561236896439, 0.3891805482645412928, 4.1152263374485595055e-18, 3.1249999999999996265e-17, 2.3730468749999997491e-16, 1.802032470703124905e-15, 1.3684184074401855573e-14, 1.0391427281498912374e-13, 7.8909900918882334723e-13, 5.9922206010276265389e-12, 4.550342518905353542e-11, 3.4554163502937537769e-10, 2.6239567910043193503e-09, 1.9925671881689045749e-08, 1.5131057085157624606e-07, 1.1490146474041569126e-06, 8.7253299787253137094e-06, 6.625797452594535961e-05, 0.00050314649405639756945, 0.0038207686892407699206, 0.02901396223392209775, 0.22032477571384589954, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.95099004990000002291, 0.92721650236562502823, 0.89244986942880866199, 0.84226263493375552738, 0.77123895238719930578, 0.67376233787836592448, 0.54625869959414907751, 0.39186905799172611076, 0.22752066204261220395, 0.088384183391962217735, 0.013598650651405662371, 4.4880894773220947292e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0.048976959490485305615, 0.072709651432435309926, 0.10738526002140330595, 0.15737069698548675212, 0.22795032016720712109, 0.32446067835465053353, 0.44989711207706667428, 0.59997610680291524332, 0.75567735422921566979, 0.87844628783911327119, 0.92474207754597603781, 0.89365801028359981295, 0.82931282542333117913, 0.73973445494401424138, 0.62003614417178387619, 0.47000507098119359561, 0.30069481083090426887, 0.14132229342078814205, 0.034781194831623722663, 0.0012542251818798723622, 3.2979557543950620822e-05, 7.3809001036874996929e-05, 0.00016474549918236330078, 0.00036624856893145208737, 0.00080932434818232947688, 0.0017723123294799305975, 0.003828742087843573557, 0.008104291378262778317, 0.016639190251049165714, 0.032657698828719619599, 0.060108147688838667322, 0.10184799113601375464, 0.15884341152280503917, 0.23108234071251923525, 0.31281804868450863166, 0.38543973833058026157, 0.4110892044013776947, 0.34067876601502938838, 0.16765476842547211156, 0.017707951623962527032, 1.1050122962962964101e-08, 3.7191558750000007188e-08, 1.250033797265625178e-07, 4.1927333935913091575e-07, 1.401894586697731242e-06, 4.665382495865281394e-06, 1.5415847866808392633e-05, 5.0391939592239989756e-05, 0.00016203956276710770662, 0.00050812727751784608765, 0.0015332452896675867124, 0.004365181610267205789, 0.011468673667365519159, 0.027632876244132381638, 0.061266201119033726619, 0.12400124625180757032, 0.22211304163999795458, 0.32622078746654581405, 0.32211683874203467237, 0.099883337969340235674, 1.8476543209876543964e-12, 9.3431250000000011463e-12, 4.7218886718750000078e-11, 2.3843292297363282786e-10, 1.2024140499687195818e-09, 6.0518902752095459008e-09, 3.0369400855124124217e-08, 1.5170773682471650721e-07, 7.5254925311084902029e-07, 3.6922964379636862066e-06, 1.7800105408249686863e-05, 8.3338305189489776193e-05, 0.00037055336224994447902, 0.0015169299478213021069, 0.0056546686833628886926, 0.019188949419050076173, 0.058577982030935032975, 0.15402658068596292162, 0.30726533809094308536, 0.28101637370320353693, 1.2345679012345678054e-16, 9.3749999999999994959e-16, 7.1191406249999982613e-15, 5.4060974121093752673e-14, 4.1052552223205572084e-13, 3.1174281844496722735e-12, 2.3672970275664694964e-11, 1.7976661803082883009e-10, 1.3651027556716062823e-09, 1.0366249050881264123e-08, 7.8718703730129575545e-08, 5.9777015645067147174e-07, 4.5359107007994105463e-06, 3.3361176067584402466e-05, 0.00022366759792994563949, 0.0013435210788023243395, 0.0072639461407535297829, 0.035100831436999477275, 0.14396679069931464512, 0.39291034738734598175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.1354749159583518215e-10, 3.6975445151343504115e-08, 1.2697433810604620809e-06, 2.1473938565867883887e-05, 0.00026101495603127997146, 0.0026507409746741200131, 0.024215069210611606804, 0.20722776413426768904, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.9950099900049993451, 0.99252246627530493761, 0.98880051122183698631, 0.98323852246469989336, 0.9749424948799998436, 0.96260353805275378214, 0.94432964343891578896, 0.91744015090894337483, 0.87825768212583654737, 0.82200480749416604542, 0.74306451043607213425, 0.63613393199503898146, 0.4991479566148370739, 0.33884847274276985729, 0.17798493325279582389, 0.056116623078022197235, 0.0047585869404442801556, 6.9550902096181903072e-10, 0, 0, 0.0049866811037051986949, 0.0074700487125113432946, 0.011182664341492391802, 0.016723679332538900905, 0.024972650503249212151, 0.037206183352661716113, 0.055244396744596628579, 0.081608715488401706306, 0.11962663350752600344, 0.17331618121340841565, 0.2466772227646655824, 0.34167053826082144363, 0.45378166194157465441, 0.56434225055466613608, 0.6320750091698927875, 0.59733241830569760999, 0.42886959529414492298, 0.2049173244168793484, 0.050432732505854400984, 0.0018186265137258147626, 3.3277811103950621097e-06, 7.4812668695624991679e-06, 1.6811804138396487754e-05, 3.7755607955489316522e-05, 8.4711061600122608767e-05, 0.00018979511914756760698, 0.00042433325892820140772, 0.00094567009442594711366, 0.002097376821719070146, 0.0046178870868948916628, 0.010055306643782090104, 0.021527218530989256778, 0.04489775700844654549, 0.089884991503020608694, 0.16855047321618027434, 0.28372166497555245668, 0.39667928927647067017, 0.39859166318439254173, 0.21962210770558857065, 0.024829927856976751616, 1.1100002962962965174e-09, 3.7443772500000006212e-09, 1.2627790523437500951e-08, 4.2570810898681638324e-08, 1.4343375976686856963e-07, 4.8286151921739802212e-07, 1.6234543349501860428e-06, 5.4478341089133651746e-06, 1.8228467705229433379e-05, 6.0725950389411787269e-05, 0.000200959732568890164, 0.00065830362619350942834, 0.0021228688895969845957, 0.0066792918381076149537, 0.020201657448734186562, 0.057212961493873837338, 0.14427872595983071147, 0.29075824853504678158, 0.36564110074442957021, 0.13330127370825312072, 1.8509876543209877828e-13, 9.3684375000000001865e-13, 4.7411103515625013439e-12, 2.3989256927490236273e-11, 1.2134982390689849712e-10, 6.1360608361896876588e-10, 3.1008571052567070342e-09, 1.5656143551154886378e-08, 7.8940702751398238619e-08, 3.9721851623374797589e-07, 1.9925510408963188817e-06, 9.9478099413657914188e-06, 4.930161383220328208e-05, 0.00024154631749370003959, 0.0011617509224607792256, 0.0054175582232760775836, 0.023904363046940359239, 0.094270457100450164023, 0.27882003179675446392, 0.34581727191398570209, 1.2345679012345680828e-17, 9.3750000000000019611e-17, 7.1191406250000002335e-16, 5.4060974121093743207e-15, 4.1052552223205574609e-14, 3.1174281844496730813e-13, 2.3672970275664701427e-12, 1.7976661803082880424e-11, 1.3651027556716061272e-10, 1.0366249050881260814e-09, 7.8718703730129572237e-09, 5.9777015645067147174e-08, 4.5393171255472860584e-07, 3.4470439422124707377e-06, 2.6175989936175949598e-05, 0.00019877392357783613304, 0.0015094394821691928168, 0.011462306067722308894, 0.084705097520176186876, 0.41086218643981919918, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00077892972719669731786, 0.083370713567239546071, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=TRUE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.59048999999999984833, 0.44370531250000000423, 0.27958155273437496069, 0.12762313629150390248, 0.029345096578598028197, 0.00080668454080820085113, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.40055561099999992258, 0.53723465540624992798, 0.68096479632714856933, 0.79404388738027953387, 0.82432650645400162848, 0.74772850370839760714, 0.60695411066016569102, 0.43541006443525137604, 0.25280073560290239332, 0.098204648213291390046, 0.015109611834895182725, 4.9867660859134372328e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0.0089449327889999999397, 0.01902900664771874778, 0.039353322248692389207, 0.078015642723590794549, 0.14535820079903546964, 0.24864829948443537, 0.38546024005089984943, 0.54588573189455047441, 0.70480915137930777448, 0.81344804238272472308, 0.81717844525199323513, 0.71389962178539645432, 0.56021095018500233209, 0.38030131620961826755, 0.19975862317934431345, 0.062981619616186521049, 0.0053407260835513798575, 7.8059373845321992395e-10, 0, 0, 9.4545814444444459148e-06, 3.1017290343749996553e-05, 0.00010028811495410157556, 0.00031713361145809936415, 0.00096922483761113851879, 0.0028119072707210087531, 0.0075647083349296134064, 0.01861553313384527869, 0.042046394339120052308, 0.087124325583113268467, 0.16371060089498334911, 0.27411331174625735985, 0.40781324509205862938, 0.54286075051770132927, 0.63352683070691229172, 0.61289306201898841042, 0.44453962951650222157, 0.21269241367644950436, 0.052346279846925344859, 0.0018876298723944570646, 1.6294444444444447802e-09, 8.1548437499999982349e-09, 4.0568422851562507446e-08, 1.9994451278686527012e-07, 9.7096128099918361425e-07, 4.602189952521174856e-06, 2.0919648984453888767e-05, 8.8510989072612499115e-05, 0.0003426071500521665281, 0.0012159913347531664096, 0.0039613824662355322237, 0.011728070771626651625, 0.030981021316488138728, 0.072628957837475555115, 0.15091202659488145432, 0.27128094978421118944, 0.39491276471324787689, 0.40566968711467132902, 0.22597501897826155481, 0.025700635234072819607, 1.1111111111111111028e-13, 8.4374999999999991361e-13, 6.4072265625000016033e-12, 4.8654876708984379097e-11, 3.694729700088501736e-10, 2.8056853660047051398e-09, 2.1305018189503776537e-08, 1.5953977978988917004e-07, 1.1111370996233737328e-06, 6.9846696024166045466e-06, 3.9856249948076781855e-05, 0.00020802815256623063945, 0.00098445759075916142167, 0.0041227618062229087065, 0.015186511992247679614, 0.04905127538388008579, 0.1347633037922978394, 0.28642046903795781443, 0.37096041450757122337, 0.1373866644432741313, 0, 0, 0, 0, 0, 0, 2.1835286481805527197e-15, 7.5005881261891668233e-12, 3.9151793493690686266e-10, 7.8165151445884371071e-09, 1.0330194469679949402e-07, 1.0998832941312460716e-06, 1.0325449816590390154e-05, 8.6094485881001237893e-05, 0.00061191613127527461416, 0.0037238993946882535668, 0.01960252770274439893, 0.08667615291637777164, 0.27442290749348607903, 0.35255941616703945218, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.6587525069769108936e-10, 1.1914310104321792976e-07, 4.09139533897260065e-06, 6.9193802045574309841e-05, 0.00084104819165634655664, 0.0085412764739499439509, 0.075516449446559150149, 0.39909494071597967357, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00077892972719669731786, 0.083370713567239546071, ]).reshape((20, 9, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=12 intercept=FALSE Boundary.knots=None knots=None output=np.array([0, 0.66169375447141842717, 0.42704385876993139481, 0.10949465799552411671, 0.0045536628406788173459, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.16320829536765205092, 0.50699791406824767925, 0.63821768315482607647, 0.43263056231744068114, 0.15013545271065373288, 0.016177354520514077713, 1.5876171140511428811e-06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0058674236731072158813, 0.063738396787633516682, 0.24043272488013067711, 0.50054000504763473955, 0.64199088129854386953, 0.51113227122624471654, 0.23070045750246975791, 0.045936684143309251815, 0.00066416458897031876996, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.5532476724270158346e-05, 0.0011936972661650952464, 0.011779052740953399256, 0.061287867695344819263, 0.20002288232936923928, 0.43156920172035251326, 0.62168249332663905182, 0.58254996098081013312, 0.32664310035477628347, 0.092349295039142950681, 0.0054390297948912662632, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.1496907292528123154e-08, 3.0758698527859495562e-06, 7.5879789673663857776e-05, 0.00098724833216487398006, 0.0078266015648352053879, 0.040722255850196634186, 0.14383543945950985621, 0.34844598770889789741, 0.57762867947116447453, 0.6345732251821071257, 0.43704729558185051452, 0.16650826117204259313, 0.023068804299786735412, 3.4807107343935620814e-05, 0, 0, 0, 0, 0, 0, 0, 0, 1.4388921206346279741e-09, 6.5376673591293467664e-07, 2.4182089760364024493e-05, 0.00039880176232783617072, 0.0037728522997899590664, 0.022917112728778440273, 0.09334968530661211239, 0.26084062164632215719, 0.49935247038577756928, 0.64262810933455249973, 0.53564397016856835076, 0.2604583835482605636, 0.059387695355091404958, 0.0015752311746957685552, 0, 0, 0, 0, 0, 0, 0, 0, 6.8376488468020759222e-12, 1.1492036416820246223e-07, 7.1697944771843335622e-06, 0.00015024167017123804383, 0.0017124892221411901899, 0.012184251164569406822, 0.05741986723560143363, 0.1846190190189749003, 0.40690335279758527154, 0.61033896958284106216, 0.60106063282500987732, 0.35935886624758728303, 0.11099693368003141214, 0.0086311215629592420023, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2768033036904166236e-08, 1.8810563357318595251e-06, 5.2605966584901652059e-05, 0.00074031169779739149084, 0.0062001835283552559838, 0.03359211253079281978, 0.12126275319647328299, 0.28972724543117178708, 0.43422014732068858756, 0.34126033432252839139, 0.077899048558433395262, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.0012735173756063062e-09, 1.0253040817926208576e-06, 4.44269427274169616e-05, 0.0007905173401505205099, 0.0078092535818893079463, 0.047648780279836128182, 0.17901740350372433164, 0.37155904838956399505, 0.26547319923364387506, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.3473917264272251468e-12, 1.2428631162803442157e-06, 9.5832983192035734268e-05, 0.0021733294132454694138, 0.025265394711489429919, 0.1581848201187764924, 0.40157681487049512459, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.3166956453278957354e-06, 0.00056295704181467877848, 0.017992654275869164604, 0.22701602442886983924, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.2092132305915514147e-06, 0.019403791345598601914, 1, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=None knots=np.array([]) output=np.array([0, 0.0011272234177996938269, 0.0028142442804627715303, 0.0053362072774053633667, 0.0090999196124958610377, 0.014702372431044495246, 0.023009568696928141274, 0.035255048800207078319, 0.053144756365466998271, 0.078922539871719557536, 0.11527760116980134697, 0.16482753559964863355, 0.22865514515079901625, 0.30306893078627178406, 0.37385911082806172478, 0.40954833229285447782, 0.36355794566479110452, 0.2119513600790149388, 0.041217298467029310494, 0, 0, 5.0882687398560041373e-07, 3.1769391294621280289e-06, 1.1451280924188917779e-05, 3.3428733100138665235e-05, 8.7761935886847988286e-05, 0.00021681208262686383053, 0.00051561316170019592855, 0.0011946804469067555735, 0.0027131220507094727483, 0.0060506596144855023106, 0.013230141811374926397, 0.028210083582453096551, 0.058035333309565624582, 0.11300958063559926603, 0.20116335451605230067, 0.30568795867586873172, 0.33877798094471850421, 0.16475763686175207146, 0, 0, 1.1484182443411814437e-10, 1.7931887260774516498e-09, 1.2286988490864389756e-08, 6.1400553206305291637e-08, 2.6193586874264870438e-07, 1.0214767558696939837e-06, 3.7704802796487805225e-06, 1.3428054504627923681e-05, 4.6634530984498030184e-05, 0.00015879269432593164958, 0.00053096908751411363639, 0.00174019442073816092, 0.0055566565395111476577, 0.017080184681265514479, 0.049404053208565550104, 0.12851476524402274948, 0.27074730808567093465, 0.32929231067852438031, 0, 0, 1.2959854631941128047e-14, 5.06072933143619642e-13, 6.5918427455453850986e-12, 5.6389033989814727996e-11, 3.9088927700060016774e-10, 2.406265255469648607e-09, 1.378603437152614992e-08, 7.5464802427343835863e-08, 4.0078909822273000137e-07, 2.0836670196190539855e-06, 1.0654767572225721294e-05, 5.3673655611779830157e-05, 0.00026601408260545672708, 0.0012907432586925205419, 0.0060666130749972404027, 0.027014549341864656923, 0.10818900424286860551, 0.32906949849914446382, 0, 0, 5.8500579526198340832e-19, 5.7129472193553077444e-17, 1.4145822896898457119e-15, 2.0714622186680568112e-14, 2.3333104795078233906e-13, 2.2673496764008742317e-12, 2.0162390952550372537e-11, 1.6964293385718098169e-10, 1.3777936465799164788e-09, 1.0936695210263550545e-08, 8.5522170452256458058e-08, 6.6219297623306537514e-07, 5.0939619277349284707e-06, 3.9016396858695978102e-05, 0.00029798198173219142288, 0.00227144600780565184, 0.017292671490362138825, 0.13153873483331313121, 1, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=None knots=np.array([100, ]) output=np.array([0, 0.024987397851780435848, 0.061486796626462968118, 0.1140799960900419352, 0.1882939537142028219, 0.28966625638367565765, 0.42117968175620157378, 0.57798465087862005429, 0.73943782746930653005, 0.86397935690336380432, 0.90462871246453147034, 0.85976637084822715718, 0.77601114642224255924, 0.66267521310641597232, 0.51785842958629058064, 0.34911766989097126057, 0.18104199448456753663, 0.055522321426452851678, 0.0043174157141209739547, 0, 0, 1.1333836374491041766e-05, 7.0253868335539153452e-05, 0.00025049169236306427153, 0.00071939598049462782393, 0.0018428967293926601032, 0.0043879998552115674279, 0.0098727402942039785283, 0.021048289930652556295, 0.042207103219050953746, 0.078361329513138483494, 0.1323267251748408424, 0.20305646979998087653, 0.28625091024051202426, 0.36712456913477936604, 0.41237454577086030127, 0.37209382828918063923, 0.21926721332006066101, 0.042943027456719351509, 0, 0, 2.5642163564224008165e-09, 3.9893896948733327568e-08, 2.7186958324120860093e-07, 1.3475047468249140318e-06, 5.6780166980285093331e-06, 2.1734507210374597486e-05, 7.8000238621073664019e-05, 0.00026616938183067457386, 0.00086607259863206911138, 0.0026688434677071591963, 0.0076602480569788727188, 0.020032890189762748295, 0.047362176442216685768, 0.10112516340575794516, 0.19128545657973258787, 0.30258229747242648688, 0.34436724521558481626, 0.17045464690987927048, 0, 0, 2.8979008849349871451e-13, 1.1299684644739179399e-11, 1.4686295227238142049e-10, 1.2521970298985838129e-09, 8.6372943219355255736e-09, 5.2772400925513216102e-08, 2.9891245041530529781e-07, 1.607880484564848822e-06, 8.3111564072833673416e-06, 4.1402961295506068027e-05, 0.00019754788193526286806, 0.00088468396831814228969, 0.0036015013400145573161, 0.013149579882492583999, 0.042768561910798960635, 0.12037399719492403172, 0.26730426029673903798, 0.33672081019045274619, 0, 0, 1.309371682920953332e-17, 1.2786832841024935257e-15, 3.1661464010829005262e-14, 4.6363882090260461786e-13, 5.2224622287067364928e-12, 5.0748274386406234902e-11, 4.5127867086219768747e-10, 3.7969821085404869e-09, 3.0838053235562167103e-08, 2.44787301750460405e-07, 1.9141743408195724709e-06, 1.4809071760067401229e-05, 0.00011002044182894339316, 0.0007361307065949982114, 0.0043501410333869746858, 0.022648326644076191561, 0.10074752233050462968, 0.32871166293410319925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4793570769921117731e-10, 1.7842901170031562027e-07, 6.1272840845677704696e-06, 0.00010362481425005589904, 0.0012595559148250148798, 0.012791437410658173385, 0.11685243679472454015, 1, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=None knots=np.array([1000, ]) output=np.array([0, 0.0024988701975066367374, 0.0062335737743966241481, 0.011805108206123089004, 0.020094017057672219906, 0.032374436158623724757, 0.050454096263104174225, 0.076816980711751164934, 0.11469423978741608017, 0.16787638509201657788, 0.23985925947403505254, 0.33155764168655321722, 0.4364690436594241274, 0.53267348082579712987, 0.57514542430970139186, 0.50463302930351572329, 0.30623588293439429897, 0.096587849561305616497, 0.0075106712808647091081, 0, 0, 1.1282962103271197581e-06, 7.0417854820895302466e-06, 2.536640667453833115e-05, 7.3981096505374880712e-05, 0.00019395448667024030114, 0.00047814948330325205252, 0.0011335173229715391711, 0.0026138368581788955919, 0.0058931764589364287604, 0.012998501527337540801, 0.027945386445582046098, 0.058044007669012950834, 0.1145680954987259581, 0.2086069927114643785, 0.33148565972609811414, 0.41061823564260202524, 0.3066625410662744966, 0.068889778730265194273, 0, 0, 2.5469057156714312774e-10, 3.976025713192789869e-09, 2.7235422387831816846e-08, 1.3603757883811768205e-07, 5.7993308882550318022e-07, 2.2591989366454093128e-06, 8.3259563410340815473e-06, 2.9580849000601624262e-05, 0.00010235971125482330811, 0.00034661758311907198181, 0.001149214388416119545, 0.0037170166582407206807, 0.011623043605661884797, 0.034525979049628140183, 0.094171296281739258482, 0.21954225794050497012, 0.36514412761590397949, 0.24346326935859208263, 0, 0, 2.8744101198050118059e-14, 1.1223436953991581798e-12, 1.4617253735361878953e-11, 1.2501806315302350017e-10, 8.6638386346496238246e-10, 5.3311050540562872207e-09, 3.0523721489433262118e-08, 1.6692682142232341016e-07, 8.8525020719146049419e-07, 4.5921420319874677743e-06, 2.3401813204582281116e-05, 0.0001172621098580812533, 0.00057627166059595505247, 0.0027575294458836020588, 0.012651024092369157759, 0.053782977897194939043, 0.19324936944781076487, 0.3997562713358867037, 0, 0, 1.2975755416333770318e-18, 1.2671636148763449243e-16, 3.1376225596317031531e-15, 4.5946189458816680778e-14, 5.1754130194391066624e-13, 5.0291082725267428713e-12, 4.4721309725082660758e-11, 3.762775062517599803e-10, 3.0560232936142684245e-09, 2.4258201074369941878e-08, 1.8969295269383146249e-07, 1.4687810219326831175e-06, 1.1298692185571222767e-05, 8.6540548310009559815e-05, 0.00066094068550206401882, 0.0050381941644688631871, 0.038356111609708157251, 0.27103704347500323646, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0093429658193878890871, 1, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=None knots=np.array([10, 100, 1000, ]) output=np.array([0, 0.24594066521407775827, 0.51146600459087232515, 0.73649217207800155016, 0.8374180350990879651, 0.77783093616193677011, 0.63185158416405007298, 0.45327074014730833751, 0.26317071169481792703, 0.10223303781294215686, 0.015729413486628138208, 5.1913250044765826493e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0026391300640233733565, 0.015039867450995366219, 0.047247102051768179176, 0.1123025082166889399, 0.21868817428784292911, 0.36168436874353226962, 0.52963938325215309533, 0.69654238826126158024, 0.81185888378284087885, 0.81901416673831617388, 0.71604356256126133751, 0.5618949499117826818, 0.38144450577487010179, 0.20035909960120826256, 0.06317094299551503922, 0.0053567803596460189172, 7.8294021104867289565e-10, 0, 0, 0, 1.3617399508643758347e-06, 2.0307441416267721758e-05, 0.00012974796415461092966, 0.00058265627417901637731, 0.0021107453507099383297, 0.0064420295146436082692, 0.01698600253976614155, 0.039859183304507203593, 0.084333549165298365979, 0.1600060050847842974, 0.26809274258076365438, 0.39566435336691235802, 0.51703688602850017553, 0.58200587206136744634, 0.52329606670761874554, 0.32126381870611603331, 0.10151738768556310688, 0.0078939921772932797328, 0, 0, 1.5649080494945798507e-10, 5.9986253804949622248e-09, 7.5940397022685528773e-08, 6.2142631098330448954e-07, 4.0139943226430085122e-06, 2.1989865743143280786e-05, 0.00010363219420155694861, 0.00042588199885955281988, 0.0015624557228769457226, 0.0051797373818038370402, 0.015437236122329209276, 0.040650688803865277221, 0.093964606272201794956, 0.18957613958735397564, 0.32176934741298385267, 0.41523761089925581569, 0.31715186300322989466, 0.072004316292680670131, 0, 0, 7.1504090682135632783e-15, 6.9828213556773098054e-13, 1.7290156975971226543e-11, 2.531906923450678459e-10, 2.8519588261786051741e-09, 2.7712025453436415246e-08, 2.4184791568371559048e-07, 1.8337667890099757592e-06, 1.2054075181315261422e-05, 7.0420380839424076883e-05, 0.00037180990877366712185, 0.0017643270358445378374, 0.0073398980933030329166, 0.026537861233464601213, 0.082516974679995302999, 0.20957251000715282352, 0.36763462897045218192, 0.25222621956325630421, 0, 0, 0, 0, 0, 0, 0, 5.430767431219823664e-15, 1.8655102027007631066e-11, 9.7376457669355941836e-10, 1.9440860511832082848e-08, 2.5692762827202482626e-07, 2.7355768273926624709e-06, 2.5679592827130744701e-05, 0.00021368415859150435886, 0.0015066158869563137275, 0.0090029382862195743431, 0.045606751586747915073, 0.18361012124256628764, 0.40566571262016642985, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2887678100169456754e-09, 4.196725335132502196e-07, 1.441162964935832764e-05, 0.00024372991766721753461, 0.0029625284410813028553, 0.030085998315248142776, 0.25286679352721525005, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0093429658193878890871, 1, ]).reshape((20, 8, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=12 intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=None output=np.array([0.59169275909036811445, 0.45594291761992611356, 0.23025182795481774489, 0.058881690084655928519, 0.00244877119161214735, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2905201731274964505, 0.4652923293619554701, 0.60774274744642686752, 0.58452788522997400911, 0.36691850516129698168, 0.12666647389296178949, 0.013648531489621140711, 1.3394428704094372146e-06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.021589057498422929704, 0.061252310080865124409, 0.15611979121414976124, 0.33262545668279458466, 0.54628842278228206819, 0.6385151816856337037, 0.49255505710881447579, 0.221198927694986891, 0.044044707273317475205, 0.00063680989274803034839, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00027107258187882916, 0.00127840807191650788, 0.0057589114783163615832, 0.023748086219301876854, 0.082775728340449605813, 0.22525005798130448564, 0.44911867244320990977, 0.62614581126197998984, 0.57974912164267822234, 0.32404333353141845375, 0.091606580575315788018, 0.0053952866770240375319, 0, 0, 0, 0, 0, 0, 0, 0, 4.9568448953450556364e-07, 3.7641040924026516272e-06, 2.8583665451682636297e-05, 0.00021688034438166792803, 0.0015679187576233750197, 0.0095441043435020560953, 0.04427882227566239115, 0.1488738995058955239, 0.35313880391702157091, 0.58025580099074469675, 0.63531593964593435775, 0.4370910386997177155, 0.16650826117204259313, 0.023068804299786735412, 3.4807107343935620814e-05, 0, 0, 0, 0, 0, 0, 0, 0, 1.4388921206346279741e-09, 6.5376673591293467664e-07, 2.4182089760364024493e-05, 0.00039880176232783617072, 0.0037728522997899590664, 0.022917112728778440273, 0.09334968530661211239, 0.26084062164632215719, 0.49935247038577756928, 0.64262810933455249973, 0.53564397016856835076, 0.2604583835482605636, 0.059387695355091404958, 0.0015752311746957685552, 0, 0, 0, 0, 0, 0, 0, 0, 6.8376488468020759222e-12, 1.1492036416820246223e-07, 7.1697944771843335622e-06, 0.00015024502361066615209, 0.0017129832691462772674, 0.012198067843520864206, 0.057614376649993519208, 0.18625055189529041155, 0.41578120601587292837, 0.64273379913532080465, 0.68051701302793721204, 0.48636048533160558538, 0.22961899064651025704, 0.056512123157649481187, 0.0020378520142550187594, 0, 0, 0, 0, 0, 0, 0, 0, 9.4145936087959968535e-09, 1.3870093306447404973e-06, 3.87897484409095467e-05, 0.00054627415024945797078, 0.0045890968808638254312, 0.025078293403640077724, 0.092478958938429417502, 0.23258667138901650828, 0.39413899790146145197, 0.42298881551441958049, 0.23945155313138169473, 0.027468762021704565962, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4046605238618360691e-10, 5.5343723764345201347e-07, 2.3980715961026268597e-05, 0.00042724725529509507927, 0.004257128234965919765, 0.026669687498425763417, 0.10780673176995581031, 0.27416234388481963702, 0.37835275864003403701, 0.14369344182750692918, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.2897006858339270209e-12, 4.7885683674533647596e-07, 3.6923035679543052192e-05, 0.00083835438251815758755, 0.0099780154986416052382, 0.068735165020702704286, 0.25981214905110344704, 0.35274625004443893594, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.7834701086365511565e-07, 0.00014053832363986818919, 0.0044940505180239382829, 0.063888867507214189279, 0.37188043953033556033, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.344155240374716805e-07, 0.0019825485126170991519, 0.10217325456175885279, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([]) output=np.array([0.0016644455553086624083, 0.002495003748750156472, 0.0037387626499230611904, 0.0055997301828166279672, 0.0083806908754831561242, 0.012528591196985617434, 0.018697687754151010031, 0.027833351892417868001, 0.041273795716988054272, 0.060850651795732749183, 0.088929278684076601413, 0.12824165844093327049, 0.18119239198615591513, 0.2480374111869024234, 0.32305865186741777872, 0.38825858275226748928, 0.40740939128236214328, 0.33403355026188807919, 0.1632256493056772062, 0.017159494075893101661, 1.1100003703292180217e-06, 2.4962518746875000594e-06, 5.6123532398144536609e-06, 1.2613583192428893271e-05, 2.8332643039885458129e-05, 6.3586947395334136954e-05, 0.00014252672253270057788, 0.00031885525421459126778, 0.00071127791904317637373, 0.0015797796625579053637, 0.0034857420363066235133, 0.0076146085944587566657, 0.016381164929213501424, 0.034414525059745272595, 0.069651262704549549154, 0.13271586265443335861, 0.22841224706537738287, 0.32670272008482859061, 0.31696810306481593145, 0.097144158424494309045, 3.7012349794238681896e-10, 1.2487503125000001559e-09, 4.2124242480468747114e-09, 1.4206263137512205132e-08, 4.7892153138227467571e-08, 1.6136291046154529314e-07, 5.4321868305356652586e-07, 1.8263821320052253243e-06, 6.1287830369107454558e-06, 2.0506795807293100125e-05, 6.8314944883560930534e-05, 0.00022606641535874693596, 0.00074049070575379688161, 0.0023874614910316710632, 0.0075083864312185822493, 0.022682692646808663012, 0.064029150684570476648, 0.15976638757864897178, 0.30776038811264622153, 0.27497860584540806395, 6.1707818930041155992e-14, 3.1234375000000000385e-13, 1.5808447265625000092e-12, 8.0000230407714837838e-12, 4.0477309670448300476e-11, 2.0474318975195293786e-10, 1.0351972331039468262e-09, 5.2306989582536832363e-09, 2.6404574434177521861e-08, 1.3309725534797475592e-07, 6.6943159388078163953e-07, 3.3557879908853077946e-06, 1.6736492419104248054e-05, 8.2813468459374991711e-05, 0.00040470096744437702128, 0.0019383686901439161829, 0.0089744140037595811904, 0.039065023078631577746, 0.14941007561236896439, 0.3891805482645412928, 4.1152263374485595055e-18, 3.1249999999999996265e-17, 2.3730468749999997491e-16, 1.802032470703124905e-15, 1.3684184074401855573e-14, 1.0391427281498912374e-13, 7.8909900918882334723e-13, 5.9922206010276265389e-12, 4.550342518905353542e-11, 3.4554163502937537769e-10, 2.6239567910043193503e-09, 1.9925671881689045749e-08, 1.5131057085157624606e-07, 1.1490146474041569126e-06, 8.7253299787253137094e-06, 6.625797452594535961e-05, 0.00050314649405639756945, 0.0038207686892407699206, 0.02901396223392209775, 0.22032477571384589954, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([100, ]) output=np.array([0.048976959490485305615, 0.072709651432435309926, 0.10738526002140330595, 0.15737069698548675212, 0.22795032016720712109, 0.32446067835465053353, 0.44989711207706667428, 0.59997610680291524332, 0.75567735422921566979, 0.87844628783911327119, 0.92474207754597603781, 0.89365801028359981295, 0.82931282542333117913, 0.73973445494401424138, 0.62003614417178387619, 0.47000507098119359561, 0.30069481083090426887, 0.14132229342078814205, 0.034781194831623722663, 0.0012542251818798723622, 3.2979557543950620822e-05, 7.3809001036874996929e-05, 0.00016474549918236330078, 0.00036624856893145208737, 0.00080932434818232947688, 0.0017723123294799305975, 0.003828742087843573557, 0.008104291378262778317, 0.016639190251049165714, 0.032657698828719619599, 0.060108147688838667322, 0.10184799113601375464, 0.15884341152280503917, 0.23108234071251923525, 0.31281804868450863166, 0.38543973833058026157, 0.4110892044013776947, 0.34067876601502938838, 0.16765476842547211156, 0.017707951623962527032, 1.1050122962962964101e-08, 3.7191558750000007188e-08, 1.250033797265625178e-07, 4.1927333935913091575e-07, 1.401894586697731242e-06, 4.665382495865281394e-06, 1.5415847866808392633e-05, 5.0391939592239989756e-05, 0.00016203956276710770662, 0.00050812727751784608765, 0.0015332452896675867124, 0.004365181610267205789, 0.011468673667365519159, 0.027632876244132381638, 0.061266201119033726619, 0.12400124625180757032, 0.22211304163999795458, 0.32622078746654581405, 0.32211683874203467237, 0.099883337969340235674, 1.8476543209876543964e-12, 9.3431250000000011463e-12, 4.7218886718750000078e-11, 2.3843292297363282786e-10, 1.2024140499687195818e-09, 6.0518902752095459008e-09, 3.0369400855124124217e-08, 1.5170773682471650721e-07, 7.5254925311084902029e-07, 3.6922964379636862066e-06, 1.7800105408249686863e-05, 8.3338305189489776193e-05, 0.00037055336224994447902, 0.0015169299478213021069, 0.0056546686833628886926, 0.019188949419050076173, 0.058577982030935032975, 0.15402658068596292162, 0.30726533809094308536, 0.28101637370320353693, 1.2345679012345678054e-16, 9.3749999999999994959e-16, 7.1191406249999982613e-15, 5.4060974121093752673e-14, 4.1052552223205572084e-13, 3.1174281844496722735e-12, 2.3672970275664694964e-11, 1.7976661803082883009e-10, 1.3651027556716062823e-09, 1.0366249050881264123e-08, 7.8718703730129575545e-08, 5.9777015645067147174e-07, 4.5359107007994105463e-06, 3.3361176067584402466e-05, 0.00022366759792994563949, 0.0013435210788023243395, 0.0072639461407535297829, 0.035100831436999477275, 0.14396679069931464512, 0.39291034738734598175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.1354749159583518215e-10, 3.6975445151343504115e-08, 1.2697433810604620809e-06, 2.1473938565867883887e-05, 0.00026101495603127997146, 0.0026507409746741200131, 0.024215069210611606804, 0.20722776413426768904, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([1000, ]) output=np.array([0.0049866811037051986949, 0.0074700487125113432946, 0.011182664341492391802, 0.016723679332538900905, 0.024972650503249212151, 0.037206183352661716113, 0.055244396744596628579, 0.081608715488401706306, 0.11962663350752600344, 0.17331618121340841565, 0.2466772227646655824, 0.34167053826082144363, 0.45378166194157465441, 0.56434225055466613608, 0.6320750091698927875, 0.59733241830569760999, 0.42886959529414492298, 0.2049173244168793484, 0.050432732505854400984, 0.0018186265137258147626, 3.3277811103950621097e-06, 7.4812668695624991679e-06, 1.6811804138396487754e-05, 3.7755607955489316522e-05, 8.4711061600122608767e-05, 0.00018979511914756760698, 0.00042433325892820140772, 0.00094567009442594711366, 0.002097376821719070146, 0.0046178870868948916628, 0.010055306643782090104, 0.021527218530989256778, 0.04489775700844654549, 0.089884991503020608694, 0.16855047321618027434, 0.28372166497555245668, 0.39667928927647067017, 0.39859166318439254173, 0.21962210770558857065, 0.024829927856976751616, 1.1100002962962965174e-09, 3.7443772500000006212e-09, 1.2627790523437500951e-08, 4.2570810898681638324e-08, 1.4343375976686856963e-07, 4.8286151921739802212e-07, 1.6234543349501860428e-06, 5.4478341089133651746e-06, 1.8228467705229433379e-05, 6.0725950389411787269e-05, 0.000200959732568890164, 0.00065830362619350942834, 0.0021228688895969845957, 0.0066792918381076149537, 0.020201657448734186562, 0.057212961493873837338, 0.14427872595983071147, 0.29075824853504678158, 0.36564110074442957021, 0.13330127370825312072, 1.8509876543209877828e-13, 9.3684375000000001865e-13, 4.7411103515625013439e-12, 2.3989256927490236273e-11, 1.2134982390689849712e-10, 6.1360608361896876588e-10, 3.1008571052567070342e-09, 1.5656143551154886378e-08, 7.8940702751398238619e-08, 3.9721851623374797589e-07, 1.9925510408963188817e-06, 9.9478099413657914188e-06, 4.930161383220328208e-05, 0.00024154631749370003959, 0.0011617509224607792256, 0.0054175582232760775836, 0.023904363046940359239, 0.094270457100450164023, 0.27882003179675446392, 0.34581727191398570209, 1.2345679012345680828e-17, 9.3750000000000019611e-17, 7.1191406250000002335e-16, 5.4060974121093743207e-15, 4.1052552223205574609e-14, 3.1174281844496730813e-13, 2.3672970275664701427e-12, 1.7976661803082880424e-11, 1.3651027556716061272e-10, 1.0366249050881260814e-09, 7.8718703730129572237e-09, 5.9777015645067147174e-08, 4.5393171255472860584e-07, 3.4470439422124707377e-06, 2.6175989936175949598e-05, 0.00019877392357783613304, 0.0015094394821691928168, 0.011462306067722308894, 0.084705097520176186876, 0.41086218643981919918, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00077892972719669731786, 0.083370713567239546071, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- degree=5 df=None intercept=FALSE Boundary.knots=np.array([0, 3000, ]) knots=np.array([10, 100, 1000, ]) output=np.array([0.40055561099999992258, 0.53723465540624992798, 0.68096479632714856933, 0.79404388738027953387, 0.82432650645400162848, 0.74772850370839760714, 0.60695411066016569102, 0.43541006443525137604, 0.25280073560290239332, 0.098204648213291390046, 0.015109611834895182725, 4.9867660859134372328e-05, 0, 0, 0, 0, 0, 0, 0, 0, 0.0089449327889999999397, 0.01902900664771874778, 0.039353322248692389207, 0.078015642723590794549, 0.14535820079903546964, 0.24864829948443537, 0.38546024005089984943, 0.54588573189455047441, 0.70480915137930777448, 0.81344804238272472308, 0.81717844525199323513, 0.71389962178539645432, 0.56021095018500233209, 0.38030131620961826755, 0.19975862317934431345, 0.062981619616186521049, 0.0053407260835513798575, 7.8059373845321992395e-10, 0, 0, 9.4545814444444459148e-06, 3.1017290343749996553e-05, 0.00010028811495410157556, 0.00031713361145809936415, 0.00096922483761113851879, 0.0028119072707210087531, 0.0075647083349296134064, 0.01861553313384527869, 0.042046394339120052308, 0.087124325583113268467, 0.16371060089498334911, 0.27411331174625735985, 0.40781324509205862938, 0.54286075051770132927, 0.63352683070691229172, 0.61289306201898841042, 0.44453962951650222157, 0.21269241367644950436, 0.052346279846925344859, 0.0018876298723944570646, 1.6294444444444447802e-09, 8.1548437499999982349e-09, 4.0568422851562507446e-08, 1.9994451278686527012e-07, 9.7096128099918361425e-07, 4.602189952521174856e-06, 2.0919648984453888767e-05, 8.8510989072612499115e-05, 0.0003426071500521665281, 0.0012159913347531664096, 0.0039613824662355322237, 0.011728070771626651625, 0.030981021316488138728, 0.072628957837475555115, 0.15091202659488145432, 0.27128094978421118944, 0.39491276471324787689, 0.40566968711467132902, 0.22597501897826155481, 0.025700635234072819607, 1.1111111111111111028e-13, 8.4374999999999991361e-13, 6.4072265625000016033e-12, 4.8654876708984379097e-11, 3.694729700088501736e-10, 2.8056853660047051398e-09, 2.1305018189503776537e-08, 1.5953977978988917004e-07, 1.1111370996233737328e-06, 6.9846696024166045466e-06, 3.9856249948076781855e-05, 0.00020802815256623063945, 0.00098445759075916142167, 0.0041227618062229087065, 0.015186511992247679614, 0.04905127538388008579, 0.1347633037922978394, 0.28642046903795781443, 0.37096041450757122337, 0.1373866644432741313, 0, 0, 0, 0, 0, 0, 2.1835286481805527197e-15, 7.5005881261891668233e-12, 3.9151793493690686266e-10, 7.8165151445884371071e-09, 1.0330194469679949402e-07, 1.0998832941312460716e-06, 1.0325449816590390154e-05, 8.6094485881001237893e-05, 0.00061191613127527461416, 0.0037238993946882535668, 0.01960252770274439893, 0.08667615291637777164, 0.27442290749348607903, 0.35255941616703945218, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.6587525069769108936e-10, 1.1914310104321792976e-07, 4.09139533897260065e-06, 6.9193802045574309841e-05, 0.00084104819165634655664, 0.0085412764739499439509, 0.075516449446559150149, 0.39909494071597967357, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00077892972719669731786, 0.083370713567239546071, ]).reshape((20, 8, ), order="F") --END TEST CASE-- """ R_bs_num_tests = 72 patsy-0.5.2/patsy/test_splines_crs_data.py000066400000000000000000004041421412400214200207370ustar00rootroot00000000000000# This file auto-generated by tools/get-R-crs-test-vectors.R # Using: R version 3.0.3 (2014-03-06) and package 'mgcv' version 1.7.28 import numpy as np R_crs_test_x = np.array([1, -1.5, 2.25, -3.375, 5.0625, -7.59375, 11.390625, -17.0859375, 25.628906250000000000, -38.443359375000000000, 57.665039062500000000, -86.497558593750000000, 129.74633789062500000, -194.6195068359375, 291.92926025390625000, -437.89389038085937500, 656.84083557128906250, -985.26125335693359375, 1477.8918800354003906, -2216.8378200531005859, ]) R_crs_test_data = """ --BEGIN TEST CASE-- spline_type=cr nb_knots=4 knots=None absorb_cons=FALSE output=np.array([-1.693557754132211208e-05, -1.9836972061171899575e-05, -1.4954358368392469422e-05, -2.088026100812221311e-05, -9.5355264085171175196e-06, -1.8699295604068575353e-05, 4.9022718833308546933e-06, 1.7974067511175931267e-05, 3.744122291002612696e-05, 0.0002988262983956019303, 0.00010718085083579747424, 0.001935560671294481172, 0.00024699248283329116715, 0.010566342124751632037, 0.00048044994314723433668, 0.053454984128479515748, 0.00065729103443297996496, 0.25076507420610638643, 0, 1, 0.35560680685948470314, 0.46359416252769836131, 0.30168882277557279581, 0.54466866236250932598, 0.18063664369405590948, 0.72711735964503432239, -0.089776348720933471514, 1.1356863997659447652, -0.68566908660007752641, 2.0361466894716735432, -1.9628257407671605428, 3.9650378336575471394, -4.523225924226894179, 7.8266636243390204086, -8.7985820993738865781, 14.246329329102128014, -12.037110654561336887, 18.944322912930825709, 0, 0, 0.6444562685323340645, 0.53647236902629924504, 0.69836999393477805498, 0.45539628464397530205, 0.81940601333710849641, 0.27293213874197075341, 1.0897461189737138731, -0.13572091900106228457, 1.6852766572444068949, -1.0365718798973906356, 2.9606222279687766097, -2.967335129700412466, 5.5120421008950604147, -6.8380635660933348774, 9.743925990387539926, -13.301405831788731149, 12.784702352769150124, -18.197306344378148424, 0, 0, -4.6139814277377641984e-05, -4.6694581936484837246e-05, -4.3862351982477081188e-05, -4.4066745476440737986e-05, -3.3121504755945730075e-05, -3.0799091401098394798e-05, 2.5327475336172373395e-05, 1.6545167606241587368e-05, 0.00035498813276064257764, 0.00012636412732133810903, 0.0020963319475481821674, 0.00036173537157081639762, 0.010936830849001568516, 0.00083359962956235762136, 0.054175659043200374843, 0.0016215185581219158321, 0.25175101075775591086, 0.0022183572412113072327, 1, 0, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=7 knots=None absorb_cons=FALSE output=np.array([-2.9605259404128104445e-07, 4.3004543792069181325e-07, -4.6506508782294522313e-07, 1.2225175036037286749e-06, -4.8490441759023038782e-07, 2.7062259965304665474e-06, 2.8104230735656565092e-07, -5.4449455566502238182e-06, 1.7542735683802410547e-06, -0.00011038827024139799939, 2.8120883750482203265e-06, -0.00053376130083573273665, -7.6784755538918750706e-07, 0.00061879177472234751789, -9.2078635815506674409e-06, 0.029574116402683980898, -1.6901017048743381161e-05, 0.21556638245963724576, 0, 1, 0.00010456869825724268133, -0.00015189629322606839636, 0.00016426557921581520349, -0.00043180524853199072667, 0.00017127302630398999017, -0.000955865732450367038, -9.9266916848531769006e-05, 0.0019305664182940607265, -0.00061962673904836988552, 0.042415216868389754579, -0.00099325736940550032363, 0.27732557133728741317, 0.00027121133522598855859, 1.1504608369631013076, 0.0032523082987030572759, 2.7821404636666882126, 0.0059696060348119403538, 4.1098324719022825136, 0, 0, -0.031642380707528651451, 0.046727930564370717681, -0.049706595580857972083, 0.14260551573254778845, -0.051827041873556585483, 0.44422176162356585838, 0.030038067097830360025, 1.2696071710389067455, 0.18749841492049976188, 2.6526069738107440621, 0.30055865997270758694, 3.4401400654673524038, -0.082068271523342908869, -0.89294447099362916909, -0.98414515128277502143, -10.707999025460104292, -1.8063966557448927208, -19.65451295888456329, 0, 0, 0.9404055233767310007, 1.0229997547271381109, 0.84839014691027592185, 0.9942253298077886603, 0.55270681506501917468, 0.71325376394101502875, -0.27008001161472772189, -0.36441609214674652861, -1.6858466263672904351, -2.2746949537899268101, -2.7024004611201863923, -3.6463202499480562579, 0.73789700429246496416, 0.99563659340566201816, 8.8487029815652924469, 11.939460983544531558, 16.241778413219694954, 21.914859168852931504, 0, 0, 0.091452250263845791256, -0.069826465452058242289, 0.20181123367547218472, -0.13689085681302756714, 0.50012121362724204499, -0.15708533369949770342, 1.2380919977577322655, 0.093217876961678852732, 2.4558884227495125785, 0.58186847095202909319, 3.1248015911458146832, 0.93273112726773044212, -0.80684053529361110524, -0.25468449792074498994, -9.6754590529148973843, -3.0541220021223365322, -17.759287695723489975, -5.6058354437652431201, 0, 0, -0.00032057317842933900735, 0.00025095691201626593508, -0.0006604553857512240455, 0.00049198690620599915858, -0.0011751018630891821651, 0.00056456602826813432217, 0.0020547291059278675364, -0.00033502584436379451204, 0.043190243671805170211, -0.002091240244288253107, 0.27856793393313283858, -0.0033522436217435750035, 1.1501216068412509763, 0.00091533825638770416853, 2.7780724886333088008, 0.010976540508122837117, 4.1023657110424904815, 0.02014742036749034293, 0, 0, 9.0759971803981566744e-07, -7.1050367876085636178e-07, 1.8698667331515860834e-06, -1.392902486538716717e-06, 3.3269224981118857968e-06, -1.5983868974364642471e-06, -5.7964722216058981582e-06, 9.4851778732840647118e-07, -0.00011258250904733756745, 5.9206732932833133743e-06, -0.00053727865043814320746, 9.4907982657877548258e-06, 0.00061975219556745361413, -2.5914854994384806915e-06, 0.029585633563949290115, -3.1076539587733474873e-05, 0.21558752218843252324, -5.7040932539508893209e-05, 1, 0, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=10 knots=None absorb_cons=FALSE output=np.array([1.9320747025477182898e-08, 4.8242814281426965595e-08, -1.2568948282371909281e-08, -7.8978523641721503504e-08, -3.5770522726027550527e-08, -7.01473863118866388e-07, 2.1638270976696590321e-08, 2.2109425903379564006e-06, 8.5370349850805669926e-08, 3.744561791897189516e-05, -3.2925418782971613435e-08, -7.9813891850179092256e-05, -1.991767035296446912e-07, -0.0021868285157625511929, 3.7614188014338205365e-08, 0.0021874060835819046644, 4.688717754303082178e-07, 0.15874967738905676473, 0, 1, -1.4544731165165144453e-06, -3.6317372379498608116e-06, 9.4619516292459995175e-07, 5.9455330204143990165e-06, 2.6928184298539113977e-06, 5.2807216744776855019e-05, -1.6289371928530509096e-06, -0.00016644059132742171966, -6.4267121060004543239e-06, -0.002818920227187673988, 2.4786379329293728427e-06, 0.0060407977628119044478, 1.4994097295422416361e-05, 0.20920617714101982787, -2.8316102474873858596e-06, 1.0967921747761912865, -3.5296843934527352639e-05, 2.0750595382021144175, 0, 0, 9.7994633378747003421e-05, 0.00024468706579685965685, -6.3749578484895432179e-05, -0.00040057827261335182721, -0.00018142772925296013898, -0.0035578683345143034648, 0.00010974909140496827962, 0.011347453981486406438, 0.00043299755045772883848, 0.2521011214081396723, -0.00016699739084748591402, 1.1986460188118959191, -0.0010102222245463542399, 2.0307158061521746184, 0.00019077878093656664284, -0.33272379892704945226, 0.0023781128997935555708, -4.1474987648643217852, 0, 0, -0.0066281488228501295526, -0.016550113319571305159, 0.0043118860597063267057, 0.027486913394933615296, 0.012271386183288513286, 0.32813715189346348566, -0.007423195392682361074, 1.293350915313034033, -0.029287034456989179121, 1.9521712704316960263, 0.011295348749219022086, -0.66799495509124628967, 0.068329285161611297283, -4.0409215143626093791, -0.012903871453880378511, 0.76312128325689243535, -0.16085050450109564246, 9.5125283792626493806, 0, 0, 0.14341751361350341121, 0.77593360542645228861, -0.090432986778547594375, 1.1259563994195789238, -0.25736721450922656063, 1.1109363559631879603, 0.15568633342939960928, -0.56801737059939128027, 0.61423561827887318554, -2.241022016113413784, -0.23689682657495314544, 0.86431165517469521475, -1.4330669354534046889, 5.2285058979714911231, 0.27063229881828543277, -0.9873946118959923135, 3.3735101868301691219, -12.308160541801825616, 0, 0, 0.88922814381914494497, 0.26150528401135941792, 1.052718109833140403, -0.16738921270899279059, 0.90250428645253855109, -0.47638032258407536634, -0.44193283907468589033, 0.28817153685428037457, -1.7435755898887204118, 1.1369348754708179516, 0.6724577863061328431, -0.43849014288054521948, 4.0679186503944446684, -2.652571308655904847, -0.76821964733539216397, 0.50093366421426255286, -9.5760809678860194794, 6.2442835778727587837, 0, 0, -0.026500124336832579092, -0.021442252392463622551, 0.033954742484493150023, 0.014552615451508882013, 0.34654423116839672137, 0.041415928368598843579, 1.2822161222240113787, -0.025053284450302942821, 1.9082407187462107068, -0.09884374129233876316, -0.65105193196742539019, 0.038121802028613864521, -3.9384275866202003158, 0.23061133742043829487, 0.74376547607607035317, -0.043550566156846479138, 9.271252622511015673, -0.54287045269119871271, 0, 0, 0.00039179415524385585289, 0.00031701546211065640812, -0.0004962026403406965408, -0.0002151548273865228544, -0.0038300099283937543392, -0.00061231858622874310843, 0.011512077618593902564, 0.00037040318349176827237, 0.25275061773382700991, 0.0014613667327948419179, 1.1983955227256271048, -0.00056361619411026103298, 2.0292004728153583137, -0.0034095000078439544229, -0.33243763075564392029, 0.00064387838566091657681, -4.1439315955146351911, 0.0080261310368032780238, 0, 0, -5.8151558545865533576e-06, -4.7052624338916936783e-06, 7.3648257648012915326e-06, 3.1934086748705229843e-06, 5.6846444389557634026e-05, 9.088262200756952661e-06, -0.00016888399711670137003, -5.4976630258790269937e-06, -0.0028285602953466680576, -2.1690153357751540543e-05, 0.0060445157197113750028, 8.3654030236365385289e-06, 0.20922866828696309871, 5.0605078372050556115e-05, 1.0967879273608196478, -9.5566845852699449579e-06, 2.0750065929362113692, -0.00011912684827902968556, 0, 0, 7.7246635846578622837e-08, 6.2503173239414331853e-08, -9.7831946065279388105e-08, -4.2420200453005213882e-08, -7.5512964720790736305e-07, -1.2072551420034312034e-07, 2.2433999968030079565e-06, 7.3029164546350807041e-08, 3.7573673443748081119e-05, 2.8812493074646954793e-07, -7.9863279978354456061e-05, -1.1112328839252804556e-07, -0.0021871272808178477895, -6.7222137441255028355e-07, 0.0021874625048639208952, 1.2694788454839182361e-07, 0.15875038069671990049, 1.5824422420772902814e-06, 1, 0, ]).reshape((20, 10, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=12 knots=None absorb_cons=FALSE output=np.array([-3.6802322454608713193e-09, -1.1576165417746612791e-08, 4.0903329323109361483e-09, 4.0685732923556337374e-08, 1.7703518712245050831e-09, 7.8790091688795366003e-08, -9.3409196093845724123e-09, -2.611905916838673793e-06, 3.0933985220706180644e-09, 5.5610182678566951338e-06, 1.6919855821268632348e-08, 0.00016301336905366541432, -1.4352199746873791695e-08, -0.00096630860312933643686, -2.1293612839737406843e-08, -0.0074687624866493825676, 3.8568109059678503697e-08, 0.11083787128447247783, 0, 1, 1.4460849968779115402e-07, 4.5486583496536781572e-07, -1.6072271234908852579e-07, -1.5986770411107210554e-06, -6.9562981611555798878e-08, -3.0959233519658970583e-06, 3.6703563262437939878e-07, 0.00010263042405151407538, -1.2154986136127495596e-07, -0.00021851080443170532329, -6.6483710865399255783e-07, -0.006405334542942880266, 5.6394540729725285131e-07, 0.039346935329722818042, 8.3669649095786947889e-07, 0.58363014399245827235, -1.5154685940796589346e-06, 1.5869193120902693739, 0, 0, -5.4865400297656400151e-06, -1.725790404504806206e-05, 6.09792368290666473e-06, 6.0654841172256407627e-05, 2.6392645247385444743e-06, 0.0001174613348200111455, -1.3925569348217424364e-05, -0.0038938646832389072101, 4.6116803743272812418e-06, 0.008333356501832860086, 2.5224350005560149737e-05, 0.3457041582444345118, -2.1396453586193263292e-05, 1.3943941859934374516, -3.1744806151200007029e-05, 1.0659241155872372087, 5.7497858861837635377e-05, -1.8607361322222011335, 0, 0, 0.00023256871331852414829, 0.00073154456479976239392, -0.00025848462913861898265, -0.0025710954976070178948, -0.00011187567233550075858, -0.0049790635547287236118, 0.00059029037024653681354, 0.20118319692573388702, -0.00019548432437836398673, 1.1583324452787904235, -0.0010692339057516754303, 1.5709796196933376589, 0.00090697336629712820011, -1.2050531219054301246, 0.0013456292455859321117, -1.7878746869280606191, -0.0024372743079464744345, 3.238292460253569427, 0, 0, -0.0096036016628270864243, -0.030208115694913360155, 0.010673763374229244122, 0.11304537553087100343, 0.0046197503419112585787, 0.84717528615215309529, -0.024375220124670693433, 1.6209052609300536041, 0.008072253382102512545, -0.43744451537192496904, 0.044152527520605257261, -2.3926752654369827233, -0.037452204143992279262, 2.0295771844456078625, -0.055565888790719414336, 3.0111781856549497682, 0.10064385386396751398, -5.4540039558593536029, 0, 0, 0.10357732090048776818, 0.85603572021517670976, -0.11294852267110891408, 1.0800087615333373581, -0.048885660842739625531, 0.2401585690492896441, 0.25793574452960393861, -1.2534276012546561319, -0.085419646489135220291, 0.4150930798449654624, -0.46721691129938069942, 2.2704203849822679473, 0.39631486857335096463, -1.9258749730998911964, 0.58799177288976445244, -2.8573206043806020915, -1.0650015567431445618, 5.1753290302415839719, 0, 0, 0.94933893888308096276, 0.20425894956750886844, 0.97636209569699528021, -0.2257137560392956932, 0.19529887869007700463, -0.097691991575737782694, -1.0167341121476407562, 0.51545291906204826482, 0.33670815416251220764, -0.17070067666802030137, 1.8416810448534708389, -0.93367563771983919096, -1.5621985497383736874, 0.79198660986818203433, -2.3177527963386914678, 1.1750293712615387243, 4.1980354999103974833, -2.1282748625244334306, 0, 0, -0.044595007867203530216, -0.03154770903003507182, 0.12905602059221518707, 0.036023951388023812969, 0.85410491166502156002, 0.015591657403950714977, 1.5843424307430495901, -0.082266367920763708299, -0.42533613529877184467, 0.027243855164596028628, -2.3264464741560830774, 0.14901478038204274412, 1.9733988782296159048, -0.12640118898597454966, 2.9278293524688816518, -0.18753487466867790889, -5.3030381750634045801, 0.33967300679089129645, 0, 0, 0.001079949373603220162, 0.00076398525832885994229, -0.0029588224413149436312, -0.00087238562334283919521, -0.0051468770632319233924, -0.000377580394132319148, 0.20206863248110379372, 0.0019922299995820589268, 1.1580392187922230463, -0.00065975959477697777422, 1.5693757688347129697, -0.0036086644319118939793, -1.2036926618559800062, 0.0030610351112528135301, -1.7858562430596849335, 0.0045414987038525046412, 3.234636548791644195, -0.0082258007893193495902, 0, 0, -2.5477139138140648842e-05, -1.8023214052146796125e-05, 6.9801726696616189999e-05, 2.058049242980996096e-05, 0.00012142023160711750383, 8.9075177709926672744e-06, -0.0039147530372612262053, -4.699879655023364312e-05, 0.0083402740223943259285, 1.5564421263354520723e-05, 0.34574199476944195153, 8.5132181268765064475e-05, 1.3943620913130563288, -7.2213030853402265731e-05, 1.0658764983780117941, -0.00010713872076029942911, -1.8606498854339041937, 0.00019405527365870157302, 0, 0, 6.7149986095353481289e-07, 4.7503707792034416037e-07, -1.8397611096343514486e-06, -5.4243915417817383743e-07, -3.2002678243831975437e-06, -2.3477506293900330442e-07, 0.00010318097750045069814, 1.2387452601072785849e-06, -0.00021869312922374677631, -4.1023078209430247345e-07, -0.00640633179860586148, -2.2438252417072174446e-06, 0.039347781247833550633, 1.9033157496282319598e-06, 0.58363139903719374324, 2.8238506569827991151e-06, 1.5869170388873774513, -5.1147065050188478719e-06, 0, 0, -1.7089420375974873326e-08, -1.2089516008577751082e-08, 4.6821232322022749866e-08, 1.3804873646549429766e-08, 8.1445619495631410706e-08, 5.9749375653827772664e-09, -2.6259172962527552498e-06, -3.1525603681672934994e-08, 5.5656583656398050473e-06, 1.0440220011988337053e-08, 0.00016303874883739764194, 5.7104513396781534087e-08, -0.00096633013142895208495, -4.8438674145699212406e-08, -0.0074687944270686538323, -7.1865943334113596723e-08, 0.11083792913663595425, 1.3016736807641509225e-07, 1, 0, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=FALSE output=np.array([-0.0051418610449036764379, -0.0051207276560822079237, -0.0051517581620670691717, -0.0051036969019558836927, -0.0051724091286762859804, -0.0050616345732373531699, -0.0052108015793779615538, -0.0049476511776847470828, -0.00525764719160080711, -0.0045887810243909702562, -0.0051780211262217532009, -0.0032118234048454639452, -0.0042344880890217376918, 0.0033309098171163710467, -0.0002048781708894489937, 0.036873131791193267115, 0.0053670164361401782871, 0.19556964857406355929, 0.00027785197169554257113, 0.82912284340298503249, 0.65949448911253716332, 0.6654093842010145865, 0.65653556733089935005, 0.66984284447353759084, 0.64987461295218140744, 0.67980900603329297294, 0.63487206047522759533, 0.70218091452751663084, 0.60105778412747068451, 0.75218902517658670082, 0.52490094418674082544, 0.86229539201840321727, 0.35569403068909899446, 1.0895778496922625678, 0.014595118281322956924, 1.4713138143490975818, -0.3821845778229204238, 1.7530984807387750557, -0.019785804601727036839, 0.46986487896086814864, 0.35950128095327349431, 0.35338118891054576265, 0.36256112212690350116, 0.34879091185540550546, 0.36944502095300485456, 0.3384628569483467686, 0.38492806794448852781, 0.31523169436694697954, 0.41971361558839714867, 0.26307215994266325287, 0.49746707294522052312, 0.14713121281897215131, 0.66691371985704051006, -0.097089653687300894735, 0.9873402560545572193, -0.53105535871660392022, 1.2102463767433073727, -0.99135819513191647534, 0.044702741392222536398, -0.3124421698702265493, -0.013853909020907106964, -0.013669845455478047552, -0.013944931295735877447, -0.013530059426987159701, -0.014147224776509979491, -0.013210228408402318115, -0.014589326840338213628, -0.012464957716778975183, -0.015513752524267036478, -0.010672404094858929657, -0.017189996005739616169, -0.0062147814325298833885, -0.018373262457117778973, 0.004180894177922047858, -0.0017304961649907309174, 0.022868412576313085216, 0.16657118464347281384, 0.042690065819077735454, 0.97480521123780894399, 0.013454447506373390722, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=FALSE output=np.array([-0.00093170433340244659687, -0.0012410506081968345274, -0.00078243418040293037045, -0.0014789118938009710238, -0.00046560203080319340248, -0.0020151841680895970881, 0.00010431589287147467922, -0.0030688678143442819739, 0.00055449747551044887423, -0.0028609656322684854333, -0.00014909864114985113439, 0.026923379556954812886, 0.00037005603844857983247, 0.24511712352852058072, 0.0023876748113693508323, 1.1555668732201982429, 0.0069273170504410850126, 3.402689781560706006, 0.017141512088352486159, 8.4587163253268489171, 0.084019186191167327671, 0.11446091282800011091, 0.069879045425061070418, 0.13908517998091646239, 0.040857714895860555715, 0.19963820957805075706, -0.009029120060764306635, 0.3576254481931359086, -0.047994837046959955285, 0.77395141822244861718, 0.012905315717303777676, 1.5659844614539342178, -0.032030405994604847775, 2.0431817208334850378, -0.20666651978408043244, -0.51809792415464139825, -0.59959777581040041294, -8.0018873070243543566, -1.4836931018696206674, -24.840413418481212204, 1.0495495283018867472, 1.0438716260482179266, 1.049399522569444354, 1.0346122703952109756, 1.0414030908228715244, 0.99920617535829048261, 0.98168936282733698651, 0.85586872666349445016, 0.66418975301951443946, 0.32605391435347208517, -0.11777757243608029392, -0.85263295720592457982, 0.29231857203837463555, -1.8526421434409372502, 1.8860972902309971477, 0.52133890068006216723, 5.472099406164398161, 8.051943344140203962, 13.540604167014549830, 24.995803341925523000, -0.15192966618287376268, -0.17994115928882439825, -0.13573193509615386065, -0.19726850771611939561, -0.093692687860718673609, -0.2254589026981791422, 0.031190848164371551277, -0.24103262443025505468, 0.43031985595064925487, -0.11127445668091072439, 1.0214738883945315706, 0.29750331491431342146, -1.0287095298822455103, 0.64642959712677627859, -6.6374375162553720386, -0.1819071733449817152, -19.257075485594906894, -2.8095088468476898669, -47.65126091660886232, -8.7216126122287835898, 0.019292656023222061468, 0.02284967102080309731, 0.017235801282051283617, 0.025049969233792938189, 0.011897484172789671297, 0.028629701929927506981, -0.0039554068238157324977, 0.030607317387968895062, -0.047069269398714280728, 0.014130089737258502702, 0.083547466965394667771, -0.037778198719277893136, 1.7680513078000272831, -0.082086298047844605263, 5.9556190709970859842, 0.023099323599362758808, 15.377646538190468561, 0.35676302817113519916, 36.577208339375580692, 1.1075063634576232108, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=FALSE output=np.array([-4.3178765217391302305e-05, 6.5885993478260890537e-05, -9.6313081793478265121e-05, 0.00015008344269701090522, -0.00021248546106063178089, 0.00034663246069176721631, -0.00045706603540709121736, 0.00082099033067846731351, -0.00092573826895791448514, 0.0020071162494608137464, -0.0016063078222631245202, 0.0047785217809616390913, -0.0016924900384041397308, 0.0058508393862588175052, 0.00037271449492305578147, -0.023079518000319019372, -0.00039460853415255482939, 0.96024636175750266442, 0.0010388953913813049352, 4.2828168145345602014, 0.0006476814782608695956, -0.00098828990217391310055, 0.0014446962269021740039, -0.0022512516404551634157, 0.0031872819159094768759, -0.0051994869103765082446, 0.0068559905311063691277, -0.01231485496017700916, 0.013886074034368717928, -0.030106743741912203594, 0.024094617333946868887, -0.071677826714424588972, 0.02538735057606209683, -0.087762590793882266915, -0.0055907174238458371557, 0.77073211937350760703, 0.0059191280122883223325, 0.065812149128730040859, -0.015583430870719573377, -5.436090696106786524, -0.0031088710956521745793, 0.0047440075304347817733, -0.0069345418891304357392, 0.010808468249184780344, -0.015298953196365492127, 0.024985562378791605076, -0.032908754549310574589, 0.059430528454937264771, -0.066653155364969851604, 0.14814853819552065151, -0.11565416320294498453, 0.38547179702354089637, -0.12185928276509808144, 0.89303994817074205947, 0.026835443634460023205, 0.3158234170040984945, -0.028411814458983948584, -0.033037325841022327499, 0.07480046817945394666, 2.7299491962930431121, 0.99996527520000000333, 0.99992195380000004068, 0.99982468157500004047, 0.99960649719062499852, 0.99911787098710935773, 0.99802618626118166922, 0.9955959649107849474, 0.99021595070231704927, 0.97840786415926050967, 0.95284186025012007626, 0.89870074544754108281, 0.78829881686806935193, 0.57842205914062527761, 0.2362299420741153233, -0.10286920059876342171, -0.080364133358400105522, 0.1089119554261051559, 0.0088355638877152739563, -0.28673512802124018206, -0.73010269203186040077, 0.0031519622956521736561, -0.0046471747304347834143, 0.0071520025891304338586, -0.010320575886684784858, 0.016392005544802985134, -0.022542075546565051858, 0.038353001709637714323, -0.04736186631031753802, 0.093197128054721647961, -0.090485094976372942854, 0.23844273869784807229, -0.13266783181011393422, 0.6114558419262043909, -0.058789413728980027818, 0.96115953962745293016, 0.020964556528278284475, -0.40723600724543668194, -0.0023049297098387665911, 1.0721400439055068787, 0.19046157183439835214, -0.00065664547826086978963, 0.00096816140217391292219, -0.0014898486644021743334, 0.002150119976392662717, -0.0034132712039954154516, 0.0046962657388677176915, -0.0079705034644407205674, 0.0098670554813161497065, -0.019191613515453308236, 0.018851061453411026292, -0.047118889772279910766, 0.027639131627107058065, -0.098264441613631667294, 0.012247794526870834503, 0.1283332564542301879, -0.0043676159433913086874, 1.1341990151205716408, 0.00048019368954974300367, -2.1349300292885819985, -0.039679494132166315268, 4.3776365217391304853e-05, -6.4544093478260855155e-05, 9.93232442934782663e-05, -0.00014334133175951084961, 0.00022755141359969430562, -0.00031308438259118116136, 0.00053136689762938121937, -0.00065780369875441000935, 0.0012794409010302202311, -0.0012567374302274016227, 0.0031412593181519930102, -0.0018426087751404706099, 0.0065509627742421101121, -0.00081651963512472227127, -0.0082410361884569308111, 0.00029117439622608720607, 0.18701233167960798487, -3.2012912636649525446e-05, 2.2892691807041996022, 0.0026452996088110873087, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=4 knots=None absorb_cons=TRUE output=np.array([0.17237877473947216256, 0.16160585322933293528, 0.17775653635702576283, 0.15351565450517737355, 0.18982763251278575445, 0.13530235680671015563, 0.21678099966619068795, 0.094474101590757947333, 0.27612342080317897608, 0.004254035614190725817, 0.4030409383688886571, -0.19022791859524765257, 0.65608982528520065625, -0.58588986559744449245, 1.0712259966408619327, -1.27728761497709975, 1.342827694119810289, -1.9788260499202323661, -0.052195429016186831173, -0.97077694213337639706, 0.68714996318883114768, 0.60683822144440202617, 0.72724727397328958745, 0.54653827251673237075, 0.81726443453130048766, 0.41083008335212661821, 1.0183156414681064916, 0.10689042936382432691, 1.4611707976831045386, -0.56312355687140791538, 2.4093551078217010364, -1.9991227596244038889, 4.3052173449842321418, -4.8778668329561920558, 7.4441765017641099433, -9.6842706472508055526, 9.6670625693150959989, -13.322034134461750554, 0.012161980255413960134, 0.22619930950229019673, -0.010768222248548299166, -0.01771835491152162767, -0.0072960943160139245939, -0.022933439873231927647, 0.00050471399250212036977, -0.034662450006663660107, 0.017964357449119585514, -0.060912800432191181732, 0.056636870493901259049, -0.11877531418126709151, 0.14054140101632672799, -0.24279484616490168425, 0.3140184596633784353, -0.49144965505677373763, 0.63173409697146676312, -0.90678479147331569887, 1.0347141866437283841, -1.2221561304860752983, 0.99694564370894334093, -0.056807630788862507887, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=7 knots=None absorb_cons=TRUE output=np.array([-0.088612856492796668317, -0.10148345379387192777, -0.074820024844059834779, -0.097934158724637176974, -0.030963004348234846042, -0.058173986143542688598, 0.090101482080220576809, 0.10049075761090919823, 0.29596829876665797787, 0.41518677441788176541, 0.43034390532152527742, 0.83452635605264402496, -0.13348462782429343365, 0.98208889847295099962, -1.4362329672138189895, 0.92468785756571492129, -2.6216702734870915847, 0.6954743327776672368, -0.0022196047790754855691, -0.12327370541474826082, 0.24123862297275155964, 0.35840804363927752929, 0.18093342115646737778, 0.44250759269715284061, 0.043937153723061256327, 0.6202158021725746595, -0.24740540994207005365, 0.96645134975980595549, -0.72475992234705299833, 1.5060196314875564649, -1.0261673763863199405, 1.7262771004734287494, 0.32934388972170997034, -0.37505857126375657939, 3.4434877057227697428, -4.9947636565362447669, 6.275814996477087071, -9.1524782805583591028, 0.0068271591356862497413, 0.37917074789447108296, 0.35954055514606403365, 0.35954551446237459356, 0.35744096324407559351, 0.35584226884375003142, 0.34885947617865670223, 0.33862610062421411028, 0.32049681236018934261, 0.28089301373203690027, 0.25602145147051952323, 0.16597482970505655908, 0.12171945403305862998, 0.0018741765108557348514, -0.13785069799652086009, -0.10675519142579256715, -0.57612609827379823724, -0.22195090041542500647, -0.96232722728117126021, -0.44017455467682947701, -0.01453255272801050671, -0.80711739351329703229, 0.39138909874594340899, 0.2727563805116787754, 0.45531894883729062684, 0.19274617399836088683, 0.60538032158012156092, 0.036358318155975434538, 0.93314031181054180042, -0.23999550325584054211, 1.4531806967029836652, -0.67840171762003531608, 1.6665321594967388297, -0.95105942435581225514, -0.35463732068059222735, 0.31454920667339703044, -4.8088313747127671149, 3.2255742320619229524, -8.8757338424365990193, 5.937464168828833877, 0.0075040642903919140341, 0.41676510136746514057, -0.089133365834868971689, -0.10118952771068721874, -0.075725350737607874385, -0.097115177460207771643, -0.032342847263930989132, -0.056715061432944156861, 0.092352440078496370046, 0.098331113369192327256, 0.3400969886051333213, 0.37108103090272787083, 0.71036876576058238797, 0.55444750827998945386, 1.0162219858335352907, -0.16763759291903618243, 1.3370398275661443677, -1.8484727482821472488, 1.4719012302123679614, -3.3978810087925515049, -0.0022219907599747576801, -0.12340621941421109353, -0.013194863919370570376, -0.015072699772667915927, -0.011151244212216769591, -0.014503828881363881043, -0.0046275650331869689486, -0.0085121840139092009442, 0.013410603654954418931, 0.014660726583422474187, 0.044001710617309369722, 0.05545171716304433257, 0.063619527459568614436, 0.082887169332405197242, -0.019275003418586279108, -0.025046122964883565931, -0.18452246030655442, -0.27630735647963872159, -0.17524590664413686181, -0.50790642667837671009, 0.99966985744385294943, -0.01833564992966553106, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=10 knots=None absorb_cons=TRUE output=np.array([0.017741203875868801626, 0.059666517581418020144, -0.0073055449468428850998, 0.071233532376632907601, -0.061951321114274808532, 0.023459686026844089879, -0.15952965061095217281, -0.21534242305337428225, -0.20894123625115404441, -0.40851053368902323637, 0.088769309083260417026, 0.1702312769628678002, 0.44867980227835829732, 1.1139500668975872433, -0.14345767930218020214, 0.86682083739859494553, -1.1837156997359956634, -0.21597943647408635037, -0.017918712309548733702, -0.23789999499399971938, -0.0050889918235734665428, -0.017199613011022536063, 0.002072270281447358229, -0.021223643286449593581, 0.017930549906902432111, -0.010400764195970725601, 0.046747049525783831991, 0.07425318574607586708, 0.061514150450350559007, 0.37070324197740467387, -0.026117581474140658115, 1.1506456698664466742, -0.13217536320689113327, 1.766217994670280067, 0.042129159012710096222, -0.26549271162309995287, 0.34842176083555986832, -3.4777238134306913686, 0.0052384550413758917964, 0.06954899473750200678, 0.040717410233940966313, 0.14267724274658066808, -0.01518518709909348792, 0.21755481940124130058, -0.15305036931474685913, 0.39059745188013861306, -0.43311720487012722591, 0.71916259556541839437, -0.58682103919738470843, 0.86959977149390199624, 0.24816600986825945729, -0.22985936815572685643, 1.2655729988498243266, -1.6266494012894197052, -0.39570727271363553834, 0.14945216327392790712, -3.3194533089504827394, 3.3989844591011317831, -0.047815351859148348679, -0.63482641896460045849, 0.12003828098968173654, 0.69730715192779646472, -0.080805333880490451404, 1.032100885251629574, -0.17573134714242774623, 1.0800934656851248761, 0.36589399540256600796, -0.28448323213545234633, 0.88954585755350989995, -1.7064483114238784278, -0.35386353683003346493, 0.6479603434190704947, -2.0242657690367202861, 4.0363385548625183574, 0.45966058247434787853, -0.6843648579769759932, 4.9332279555632334578, -9.2892931271418373029, 0.023611216265584832746, 0.31347722617275158852, 0.88792799224608875974, 0.25713275781065136893, 1.0532535169725301127, -0.17260864810915746426, 0.90704417033506612444, -0.47809553851771402488, -0.43024289925585679217, 0.30393926379684116368, -1.7282652053936073955, 1.1666632610962044403, 0.66595310521671158277, -0.45052173959023528171, 4.0350412627641443208, -2.7188693891544946091, -0.75770752213855696588, 0.51778556938925135444, -9.4893428981139695821, 6.4121669845342363914, 0.0013130525053329394382, 0.01743290360653331969, 0.018394340893766202016, 0.12954185558052447313, 0.015467038664664091788, 0.1947806488224434196, 0.18978123265431517952, 0.10064263984709266586, 0.87856041959654240792, -0.56951570226238645844, 1.3795704170465219462, -1.1253701882691806713, -0.4264441361865796698, 0.45357497790875550558, -2.8031655399252684013, 2.5198958573805381178, 0.38077992198436172444, -0.6254498513067092702, 6.2761716348405380828, -6.3399143661490446888, -0.045339936718340946575, -0.60196126440255459666, -0.004758953821358206937, -0.017005411359244829855, 0.0016248940817804027534, -0.020892741105429570719, 0.014155429774586492792, -0.0074074070841472963028, 0.057823550154750644192, 0.062836648910594553041, 0.313405031875580109, 0.11923488274498127149, 1.1726262403941665191, -0.048228614408167384819, 1.8989517069621104639, -0.26605941795394616456, -0.29079224977894058135, 0.067405260889645518541, -3.8003055548041984046, 0.6731217514912637423, 0.0052018569797349914138, 0.069063096056237921472, 0.017736305328008575982, 0.059663635167637223133, -0.0072989048214304030199, 0.071228621002306605203, -0.061895289364833461221, 0.02341525749747295701, -0.15969206961095888664, -0.21517495710840772727, -0.21175703601763257167, -0.40570100515546192455, 0.094808655216413129097, 0.16419386720169948379, 0.65787987528453284458, 0.90476706770574533145, 0.95333742847376501395, -0.22997392252730572415, 0.8913620730776831147, -2.2910886491383859465, -0.017918169107386142797, -0.23789278310346156919, 0.0060750632566522310557, 0.020430784619126868273, -0.0025017991326720841194, 0.024387881029101547703, -0.021213459590422957535, 0.0080142623794636578399, -0.054619250223539139721, -0.073674968074426458453, -0.071500525070803497285, -0.13890622902065649646, 0.030313398118739860038, 0.056217781624179681554, 0.1514331690285295462, 0.30977852573253761292, -0.046930667126897918084, -0.078740760116175548267, -0.2465351039505715991, -0.78443722293869766471, 0.99386473410822107066, -0.081455614651688740269, ]).reshape((20, 9, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=12 knots=None absorb_cons=TRUE output=np.array([-0.10472685656120690745, -0.12767819762829035746, -0.076360123823526582232, -0.10958044510013424666, 0.0027880985603357523288, -0.00047490167664404706613, 0.06928482401745712782, 0.12032832514458154105, -0.15700287132688625635, -0.1737016309573724826, -0.34110464392275169354, -0.43703758617891158567, 0.21658232947898950482, 0.33207321984948795501, 0.31440967571649758305, 1.0125703207549268114, -0.77927839590651259982, 0.57499654812351752575, -0.03855715180341089815, -0.29753053676014623585, -0.045155067335396106842, -0.055061683622734447496, -0.032913973166337623377, -0.047180638471765484754, 0.0012046656812917998312, -8.5942102116568291834e-05, 0.029855775209106073798, 0.047937464934110683712, -0.067681928151560877271, -0.066458149628327731206, -0.14703046798660462935, 0.16005131398107033669, 0.09335066435731875778, 1.5205934362940900506, 0.13551522132064452997, 1.2508474777651827914, -0.33590194032153924963, -2.2969931440729616234, -0.016622640017795731193, -0.1282704446656750541, -0.15034574662694516478, -0.18284705677723475659, -0.11005017611978802627, -0.16012547886604538983, 0.0038970012572561371633, -0.0056574341249261998407, 0.10020869531895890137, 0.37404580513022472887, -0.22593678600553301905, 0.90889537861179725553, -0.49151450544890296079, 0.95180903084537338277, 0.31231202672612079496, -0.78416614637393700082, 0.4534081611517823962, -1.1711369929499528819, -1.1228952128467266647, 1.7833321983277987677, -0.05543814773678283625, -0.42779461349253811164, 0.021352229303511102015, 0.007531901115204664085, 0.033244696218130601051, 0.14543534382436593133, 0.0037956069961920194805, 0.8473147453077168878, -0.044854732714249959113, 1.5853682337831724869, 0.054480061579266131355, -0.38616534110945410774, 0.14497807394687750593, -2.2653864188032963867, -0.10147073278861845624, 1.9430514052120930746, -0.14850072666747962669, 2.8843894925937085816, 0.33098715709405579499, -5.1548937962780287947, 0.011396952652503542641, 0.087945848734329423735, -0.065952557774967432436, 0.64935218850878217989, -0.23655843353990058109, 0.90262481725902277141, -0.044372232611305732763, 0.23939481976571783117, 0.37009197410777977533, -1.0588087616813195435, -0.33957241343679639423, 0.13426223409097867711, -1.0193888969785738219, 1.5733219045695456817, 0.74691289987499476233, -1.4520157873552757, 1.0969502242749886722, -2.1629612193760392458, -2.3264786688505232348, 3.5372496286723378311, -0.062415510749811636826, -0.48163620876963536555, 0.8195140136919921714, 0.045981988292849676481, 0.88170239584467990923, -0.36155327574779005939, 0.19875523391830787756, -0.098276866020166997462, -0.93084557008965973512, 0.66449081424484612857, 0.14207954819710574834, -0.38575918922177282466, 1.418831177364036078, -1.467509312217160522, -1.2937127610296201841, 1.1548650229305499337, -1.9279955842607379157, 1.7067654635554521114, 3.232004184432778704, -3.3827058656451867158, -0.047797409383925140713, -0.36883400885657852131, -0.021262964105923369551, -0.0031022854611233788291, 0.1460681949195489493, 0.060436933823290558965, 0.85348373794725573838, 0.015696770625561979468, 1.5689066026927753761, -0.10905135306422376884, -0.39035761984884714826, 0.065894018981400556689, -2.2504521949512743539, 0.2449549909100549594, 1.9251468021185453861, -0.19161744080459083839, 2.8577824618961185088, -0.28309810920621092389, -5.1294237100882460467, 0.56511845971327800964, 0.0085901166188240788563, 0.066286587283785933233, -0.14931374206792483172, -0.18258953057237148343, -0.11261589846568037521, -0.15823359174408363481, -0.0011429154130191262681, -0.0010551192149562264085, 0.30156489541842562696, 0.17464289155956300159, 0.93257469826065064744, -0.24979099176207356092, 1.0795318316732362973, -0.62202008893514815213, -0.89266942187285236443, 0.42343196160757057278, -1.3343479845023715757, 0.62052301130682563368, 2.1155524024387766246, -1.4614021376163455201, -0.055370175087794709645, -0.42727009501042717554, -0.045179413406039932444, -0.055067758936182993612, -0.032853445083453579334, -0.047225270074522207597, 0.0013235626049733512782, -0.00019451554100623699175, 0.025957829199430799971, 0.051789330864583066338, -0.059352795367343585964, -0.074783156665720870726, 0.19867211632366271745, -0.18558562156573668589, 1.4877431595022336808, 0.12613921140813655342, 1.2014365403948221722, 0.18483406256944329149, -2.1966417328137768372, -0.43610504124242488455, -0.016624243563923207095, -0.1282828186071544585, -0.10472621487286955244, -0.1276780375015148794, -0.076361719159604879437, -0.10957926874689910557, 0.0027849647992298789088, -0.00047204001118300933926, 0.069387562012853995497, 0.1202268016798509892, -0.15722127080697423374, -0.17348334021941591576, -0.34750993698157911505, -0.4306340234217196361, 0.25592930937433366889, 0.29272786696227376391, 0.89803989341652912248, 0.42894253042841035128, 0.80764101265733667656, -1.0119267694495412968, -0.038557109538814519911, -0.29753021062070239555, -0.052405524507834276571, -0.063890551260129033184, -0.038210563318028127611, -0.054833362353974436643, 0.0013952838636999886825, -0.00023608614675063352537, 0.034667393120659889538, 0.060161039317539917459, -0.078558784282471219584, -0.086811135096279259948, -0.17052577031441601418, -0.21548879873646650807, 0.10741142530502149899, 0.14648050866337228304, 0.14986173697025292895, 0.21464206783358963371, -0.2791130852403586049, -0.50636714208926458802, 0.98070595851787800701, -0.14888461024604163208, ]).reshape((20, 11, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=TRUE output=np.array([0.026705483244849264474, 0.029839625165101336945, 0.025137490946490849419, 0.032188537945938233698, 0.021607394654091297004, 0.037467894551815809911, 0.013655036980899055293, 0.049314030316796791942, -0.004274510570643489521, 0.07576300788030972122, -0.044664576703176542272, 0.1337724526334178321, -0.13422281221596993328, 0.251582066127820414, -0.31121876398729647617, 0.43612872129651875097, -0.47985225176217655152, 0.49291724756068344693, -0.081762536217311512776, -0.57008353784816034882, 0.10575886843256800118, 0.098523720643000009534, 0.10937645918695998448, 0.093097559739294566405, 0.11751580252413190708, 0.080890140328454290808, 0.13582591101899046948, 0.053438242334149517465, 0.17698108482519578355, -0.0081682818617542177903, 0.26907668054096511856, -0.14499960251139776268, 0.47046169477357585587, -0.43311804519238522593, 0.8566919676788534721, -0.94615480020651354653, 1.1710825622951557268, -1.4966789629626817693, 0.019850657594207062745, -0.7294516591807701511, -0.065495278981390098183, -0.065538150305470069257, -0.065472788689835886844, -0.065568482416232795607, -0.065419575517949940524, -0.065631137544391218719, -0.065286317209239316806, -0.065744866169428622937, -0.064914403751873434034, -0.065874953263984459273, -0.063671751390664124703, -0.065668915889071088898, -0.058354957302844213951, -0.064207225531959205567, -0.028319888642541334034, -0.061612158154530236032, 0.15860060924423530215, -0.060152247616144620401, 0.96974734313860999624, -0.071414854005294861605, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=TRUE output=np.array([0.21482323419002713472, 0.24750397604597326739, 0.19883614002139288202, 0.27258973708238182221, 0.16379621347999451242, 0.33062932586024107451, 0.090229192760086424085, 0.46740970723200020442, -0.043673571594035724697, 0.78496238070321600055, -0.14225997541844798144, 1.352022453322457185, 0.3403195391608931919, 1.6947780812699095865, 1.6168904854104195756, -0.12774151466578054226, 4.4891751144718545774, -5.4563586958095688928, 10.951815529860082776, -17.445747353383097789, 0.78661625540292257064, 0.77643763922916775311, 0.79017886797586167624, 0.76625062041079372577, 0.79428060270008205013, 0.73589687070232667754, 0.78216715039058581915, 0.63518798334179016329, 0.65550344333732102342, 0.30392043723920803, 0.19412499069407199159, -0.42254129005404528208, -0.4561535986725677172, -1.1523051841355156366, -1.7794911913869626563, -0.26332862233794968043, -4.7570007749943510333, 2.9350973501194954629, -11.456397338110976492, 10.131555788148743247, 0.21540397689706236584, 0.19368025263260429947, 0.22641495625607274689, 0.1776489056064568528, 0.25155233513796670941, 0.14240007942952948028, 0.30993541465339075058, 0.067271686044363815382, 0.44245515455816608608, -0.080352654001592846433, 0.58572719290272645676, -0.30336068837952584465, 0.016951242449324648714, -0.3319833812262538153, -1.5163892893085060276, 0.9143206895602743467, -4.9664054857636248386, 4.3390336686705470726, -12.728941927787642996, 12.044637871668662399, -0.24478425848794690967, -0.24574753349402200797, -0.24311234670203171748, -0.24447893336440842948, -0.23629987575155153579, -0.23582487990744877004, -0.20434545124769434854, -0.19103328802457081315, -0.055793360625630179783, -0.0080996580588084746144, 0.39680666612040660368, 0.39418417396254185059, 1.016323619403686207, 0.62129681239849587904, 2.2740869262794469741, -0.76498115062552785037, 5.1040543667499091995, -4.782338947138994989, 11.471481107808449096, -13.821393989294298521, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cr nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=TRUE output=np.array([0.18906235677058036426, 0.18778525841872689695, 0.18966313099776943574, 0.18676205783172145081, 0.19092371457093507137, 0.18426214279619892999, 0.19330747373751991369, 0.17770329603139525809, 0.19648166130871616564, 0.15897921699813499785, 0.19369977803300092445, 0.10544719310340035234, 0.15040730339948449323, 0.026126087939682636485, 0.014566423626603465513, 0.71038417591713942656, -0.0044258520659356658661, 0.37477303749040830061, 0.11191608928948779123, -3.5378245461949706652, -0.1717114269621455025, -0.1641796852631927095, -0.1753614922544594612, -0.15832977440814030579, -0.18329458477753993173, -0.14455385888762556368, -0.19975455127362412577, -0.11060689494904693553, -0.23004851008792162603, -0.021054716326888309186, -0.26742505651389464338, 0.22697177040482915955, -0.233733177660145508, 0.79112685520733139199, 0.0087978598061598800584, 0.36982567289124945731, -0.019154626403592486805, -0.30951045809435018263, -0.039292263384108856716, 1.0312889189371063914, 0.550395590305010729, 0.54949597448884435202, 0.55072323934346578689, 0.54860843215917232119, 0.55116651719216569472, 0.54595839995564021674, 0.5507105841767095944, 0.53682026836985941021, 0.54272290221675423272, 0.50167044456244525019, 0.49401178725604522057, 0.36566698512261336385, 0.28011631034896739001, -0.03551586186561946773, -0.15096545178713224877, 0.063629989613431983675, 0.133595751562454651, -0.72836523541381203994, -0.59095728594899554764, -5.2594893416580195122, -0.095090409079557833283, -0.1030766681400390461, -0.090988045895585706324, -0.10887508452680887128, -0.081496719465270400784, -0.12133034539926186579, -0.058865730473533696421, -0.14644031510302549237, -0.0020110794198277630838, -0.18907748512879404834, 0.15000794369741921042, -0.22502359947941463769, 0.5462684746707909822, -0.11817275982033918769, 0.95064928964895001329, 0.052430922211166117175, -0.40184197213040517838, -0.16340199015776293856, 1.0056597912908593617, -0.79932421729955860368, 0.03971313277885706039, 0.041414831889495302975, 0.038837883046586599289, 0.042648161751521966589, 0.036811186605924875459, 0.045290364726222026581, 0.031978640736114505305, 0.050580394866669355081, 0.019931365400903504337, 0.059364669508446515911, -0.010779243096346216579, 0.065589985310555209974, -0.071477633391997127954, 0.036649613012278252355, 0.13265213083724519683, -0.017297782300268411959, 1.1319824969476413035, 0.066678235503076957458, -2.1076119483122943699, 0.36704351417936742497, -0.098246537944137943277, -0.098542071754031859698, -0.098088618240313230068, -0.098745945231101361905, -0.097708943948828105852, -0.099149563571540588747, -0.096734808676916902992, -0.099784603436978008828, -0.093975228815392539139, -0.099897241328297553542, -0.085336692453571283412, -0.094243446676616243751, -0.058668216352278305947, -0.060228845196453252575, -0.018756415239227899883, 0.03177289587597233872, 0.19240899911984374326, -0.16120768980326213859, 2.2227564852817449648, -0.98762351160861328037, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=4 knots=None absorb_cons=FALSE output=np.array([-1.693557754132211208e-05, -1.9836972061171899575e-05, -1.4954358368392469422e-05, -2.088026100812221311e-05, -9.5355264085171175196e-06, -1.8699295604068575353e-05, 4.9022718833308546933e-06, 1.7974067511175931267e-05, 3.744122291002612696e-05, 0.0002988262983956019303, 0.00010718085083579747424, 0.001935560671294481172, 0.00024699248283329116715, 0.010566342124751632037, 0.00048044994314723433668, 0.053454984128479515748, 0.00065729103443297996496, 0.25076507420610638643, 0, 1, 0.35560680685948470314, 0.46359416252769836131, 0.30168882277557279581, 0.54466866236250932598, 0.18063664369405590948, 0.72711735964503432239, -0.089776348720933471514, 1.1356863997659447652, -0.68566908660007752641, 2.0361466894716735432, -1.9628257407671605428, 3.9650378336575471394, -4.523225924226894179, 7.8266636243390204086, -8.7985820993738865781, 14.246329329102128014, -12.037110654561336887, 18.944322912930825709, 0, 0, 0.6444562685323340645, 0.53647236902629924504, 0.69836999393477805498, 0.45539628464397530205, 0.81940601333710849641, 0.27293213874197075341, 1.0897461189737138731, -0.13572091900106228457, 1.6852766572444068949, -1.0365718798973906356, 2.9606222279687766097, -2.967335129700412466, 5.5120421008950604147, -6.8380635660933348774, 9.743925990387539926, -13.301405831788731149, 12.784702352769150124, -18.197306344378148424, 0, 0, -4.6139814277377641984e-05, -4.6694581936484837246e-05, -4.3862351982477081188e-05, -4.4066745476440737986e-05, -3.3121504755945730075e-05, -3.0799091401098394798e-05, 2.5327475336172373395e-05, 1.6545167606241587368e-05, 0.00035498813276064257764, 0.00012636412732133810903, 0.0020963319475481821674, 0.00036173537157081639762, 0.010936830849001568516, 0.00083359962956235762136, 0.054175659043200374843, 0.0016215185581219158321, 0.25175101075775591086, 0.0022183572412113072327, 1, 0, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=7 knots=None absorb_cons=FALSE output=np.array([-2.9605259404128104445e-07, 4.3004543792069181325e-07, -4.6506508782294522313e-07, 1.2225175036037286749e-06, -4.8490441759023038782e-07, 2.7062259965304665474e-06, 2.8104230735656565092e-07, -5.4449455566502238182e-06, 1.7542735683802410547e-06, -0.00011038827024139799939, 2.8120883750482203265e-06, -0.00053376130083573273665, -7.6784755538918750706e-07, 0.00061879177472234751789, -9.2078635815506674409e-06, 0.029574116402683980898, -1.6901017048743381161e-05, 0.21556638245963724576, 0, 1, 0.00010456869825724268133, -0.00015189629322606839636, 0.00016426557921581520349, -0.00043180524853199072667, 0.00017127302630398999017, -0.000955865732450367038, -9.9266916848531769006e-05, 0.0019305664182940607265, -0.00061962673904836988552, 0.042415216868389754579, -0.00099325736940550032363, 0.27732557133728741317, 0.00027121133522598855859, 1.1504608369631013076, 0.0032523082987030572759, 2.7821404636666882126, 0.0059696060348119403538, 4.1098324719022825136, 0, 0, -0.031642380707528651451, 0.046727930564370717681, -0.049706595580857972083, 0.14260551573254778845, -0.051827041873556585483, 0.44422176162356585838, 0.030038067097830360025, 1.2696071710389067455, 0.18749841492049976188, 2.6526069738107440621, 0.30055865997270758694, 3.4401400654673524038, -0.082068271523342908869, -0.89294447099362916909, -0.98414515128277502143, -10.707999025460104292, -1.8063966557448927208, -19.65451295888456329, 0, 0, 0.9404055233767310007, 1.0229997547271381109, 0.84839014691027592185, 0.9942253298077886603, 0.55270681506501917468, 0.71325376394101502875, -0.27008001161472772189, -0.36441609214674652861, -1.6858466263672904351, -2.2746949537899268101, -2.7024004611201863923, -3.6463202499480562579, 0.73789700429246496416, 0.99563659340566201816, 8.8487029815652924469, 11.939460983544531558, 16.241778413219694954, 21.914859168852931504, 0, 0, 0.091452250263845791256, -0.069826465452058242289, 0.20181123367547218472, -0.13689085681302756714, 0.50012121362724204499, -0.15708533369949770342, 1.2380919977577322655, 0.093217876961678852732, 2.4558884227495125785, 0.58186847095202909319, 3.1248015911458146832, 0.93273112726773044212, -0.80684053529361110524, -0.25468449792074498994, -9.6754590529148973843, -3.0541220021223365322, -17.759287695723489975, -5.6058354437652431201, 0, 0, -0.00032057317842933900735, 0.00025095691201626593508, -0.0006604553857512240455, 0.00049198690620599915858, -0.0011751018630891821651, 0.00056456602826813432217, 0.0020547291059278675364, -0.00033502584436379451204, 0.043190243671805170211, -0.002091240244288253107, 0.27856793393313283858, -0.0033522436217435750035, 1.1501216068412509763, 0.00091533825638770416853, 2.7780724886333088008, 0.010976540508122837117, 4.1023657110424904815, 0.02014742036749034293, 0, 0, 9.0759971803981566744e-07, -7.1050367876085636178e-07, 1.8698667331515860834e-06, -1.392902486538716717e-06, 3.3269224981118857968e-06, -1.5983868974364642471e-06, -5.7964722216058981582e-06, 9.4851778732840647118e-07, -0.00011258250904733756745, 5.9206732932833133743e-06, -0.00053727865043814320746, 9.4907982657877548258e-06, 0.00061975219556745361413, -2.5914854994384806915e-06, 0.029585633563949290115, -3.1076539587733474873e-05, 0.21558752218843252324, -5.7040932539508893209e-05, 1, 0, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=10 knots=None absorb_cons=FALSE output=np.array([1.9320747025477182898e-08, 4.8242814281426965595e-08, -1.2568948282371909281e-08, -7.8978523641721503504e-08, -3.5770522726027550527e-08, -7.01473863118866388e-07, 2.1638270976696590321e-08, 2.2109425903379564006e-06, 8.5370349850805669926e-08, 3.744561791897189516e-05, -3.2925418782971613435e-08, -7.9813891850179092256e-05, -1.991767035296446912e-07, -0.0021868285157625511929, 3.7614188014338205365e-08, 0.0021874060835819046644, 4.688717754303082178e-07, 0.15874967738905676473, 0, 1, -1.4544731165165144453e-06, -3.6317372379498608116e-06, 9.4619516292459995175e-07, 5.9455330204143990165e-06, 2.6928184298539113977e-06, 5.2807216744776855019e-05, -1.6289371928530509096e-06, -0.00016644059132742171966, -6.4267121060004543239e-06, -0.002818920227187673988, 2.4786379329293728427e-06, 0.0060407977628119044478, 1.4994097295422416361e-05, 0.20920617714101982787, -2.8316102474873858596e-06, 1.0967921747761912865, -3.5296843934527352639e-05, 2.0750595382021144175, 0, 0, 9.7994633378747003421e-05, 0.00024468706579685965685, -6.3749578484895432179e-05, -0.00040057827261335182721, -0.00018142772925296013898, -0.0035578683345143034648, 0.00010974909140496827962, 0.011347453981486406438, 0.00043299755045772883848, 0.2521011214081396723, -0.00016699739084748591402, 1.1986460188118959191, -0.0010102222245463542399, 2.0307158061521746184, 0.00019077878093656664284, -0.33272379892704945226, 0.0023781128997935555708, -4.1474987648643217852, 0, 0, -0.0066281488228501295526, -0.016550113319571305159, 0.0043118860597063267057, 0.027486913394933615296, 0.012271386183288513286, 0.32813715189346348566, -0.007423195392682361074, 1.293350915313034033, -0.029287034456989179121, 1.9521712704316960263, 0.011295348749219022086, -0.66799495509124628967, 0.068329285161611297283, -4.0409215143626093791, -0.012903871453880378511, 0.76312128325689243535, -0.16085050450109564246, 9.5125283792626493806, 0, 0, 0.14341751361350341121, 0.77593360542645228861, -0.090432986778547594375, 1.1259563994195789238, -0.25736721450922656063, 1.1109363559631879603, 0.15568633342939960928, -0.56801737059939128027, 0.61423561827887318554, -2.241022016113413784, -0.23689682657495314544, 0.86431165517469521475, -1.4330669354534046889, 5.2285058979714911231, 0.27063229881828543277, -0.9873946118959923135, 3.3735101868301691219, -12.308160541801825616, 0, 0, 0.88922814381914494497, 0.26150528401135941792, 1.052718109833140403, -0.16738921270899279059, 0.90250428645253855109, -0.47638032258407536634, -0.44193283907468589033, 0.28817153685428037457, -1.7435755898887204118, 1.1369348754708179516, 0.6724577863061328431, -0.43849014288054521948, 4.0679186503944446684, -2.652571308655904847, -0.76821964733539216397, 0.50093366421426255286, -9.5760809678860194794, 6.2442835778727587837, 0, 0, -0.026500124336832579092, -0.021442252392463622551, 0.033954742484493150023, 0.014552615451508882013, 0.34654423116839672137, 0.041415928368598843579, 1.2822161222240113787, -0.025053284450302942821, 1.9082407187462107068, -0.09884374129233876316, -0.65105193196742539019, 0.038121802028613864521, -3.9384275866202003158, 0.23061133742043829487, 0.74376547607607035317, -0.043550566156846479138, 9.271252622511015673, -0.54287045269119871271, 0, 0, 0.00039179415524385585289, 0.00031701546211065640812, -0.0004962026403406965408, -0.0002151548273865228544, -0.0038300099283937543392, -0.00061231858622874310843, 0.011512077618593902564, 0.00037040318349176827237, 0.25275061773382700991, 0.0014613667327948419179, 1.1983955227256271048, -0.00056361619411026103298, 2.0292004728153583137, -0.0034095000078439544229, -0.33243763075564392029, 0.00064387838566091657681, -4.1439315955146351911, 0.0080261310368032780238, 0, 0, -5.8151558545865533576e-06, -4.7052624338916936783e-06, 7.3648257648012915326e-06, 3.1934086748705229843e-06, 5.6846444389557634026e-05, 9.088262200756952661e-06, -0.00016888399711670137003, -5.4976630258790269937e-06, -0.0028285602953466680576, -2.1690153357751540543e-05, 0.0060445157197113750028, 8.3654030236365385289e-06, 0.20922866828696309871, 5.0605078372050556115e-05, 1.0967879273608196478, -9.5566845852699449579e-06, 2.0750065929362113692, -0.00011912684827902968556, 0, 0, 7.7246635846578622837e-08, 6.2503173239414331853e-08, -9.7831946065279388105e-08, -4.2420200453005213882e-08, -7.5512964720790736305e-07, -1.2072551420034312034e-07, 2.2433999968030079565e-06, 7.3029164546350807041e-08, 3.7573673443748081119e-05, 2.8812493074646954793e-07, -7.9863279978354456061e-05, -1.1112328839252804556e-07, -0.0021871272808178477895, -6.7222137441255028355e-07, 0.0021874625048639208952, 1.2694788454839182361e-07, 0.15875038069671990049, 1.5824422420772902814e-06, 1, 0, ]).reshape((20, 10, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=12 knots=None absorb_cons=FALSE output=np.array([-3.6802322454608713193e-09, -1.1576165417746612791e-08, 4.0903329323109361483e-09, 4.0685732923556337374e-08, 1.7703518712245050831e-09, 7.8790091688795366003e-08, -9.3409196093845724123e-09, -2.611905916838673793e-06, 3.0933985220706180644e-09, 5.5610182678566951338e-06, 1.6919855821268632348e-08, 0.00016301336905366541432, -1.4352199746873791695e-08, -0.00096630860312933643686, -2.1293612839737406843e-08, -0.0074687624866493825676, 3.8568109059678503697e-08, 0.11083787128447247783, 0, 1, 1.4460849968779115402e-07, 4.5486583496536781572e-07, -1.6072271234908852579e-07, -1.5986770411107210554e-06, -6.9562981611555798878e-08, -3.0959233519658970583e-06, 3.6703563262437939878e-07, 0.00010263042405151407538, -1.2154986136127495596e-07, -0.00021851080443170532329, -6.6483710865399255783e-07, -0.006405334542942880266, 5.6394540729725285131e-07, 0.039346935329722818042, 8.3669649095786947889e-07, 0.58363014399245827235, -1.5154685940796589346e-06, 1.5869193120902693739, 0, 0, -5.4865400297656400151e-06, -1.725790404504806206e-05, 6.09792368290666473e-06, 6.0654841172256407627e-05, 2.6392645247385444743e-06, 0.0001174613348200111455, -1.3925569348217424364e-05, -0.0038938646832389072101, 4.6116803743272812418e-06, 0.008333356501832860086, 2.5224350005560149737e-05, 0.3457041582444345118, -2.1396453586193263292e-05, 1.3943941859934374516, -3.1744806151200007029e-05, 1.0659241155872372087, 5.7497858861837635377e-05, -1.8607361322222011335, 0, 0, 0.00023256871331852414829, 0.00073154456479976239392, -0.00025848462913861898265, -0.0025710954976070178948, -0.00011187567233550075858, -0.0049790635547287236118, 0.00059029037024653681354, 0.20118319692573388702, -0.00019548432437836398673, 1.1583324452787904235, -0.0010692339057516754303, 1.5709796196933376589, 0.00090697336629712820011, -1.2050531219054301246, 0.0013456292455859321117, -1.7878746869280606191, -0.0024372743079464744345, 3.238292460253569427, 0, 0, -0.0096036016628270864243, -0.030208115694913360155, 0.010673763374229244122, 0.11304537553087100343, 0.0046197503419112585787, 0.84717528615215309529, -0.024375220124670693433, 1.6209052609300536041, 0.008072253382102512545, -0.43744451537192496904, 0.044152527520605257261, -2.3926752654369827233, -0.037452204143992279262, 2.0295771844456078625, -0.055565888790719414336, 3.0111781856549497682, 0.10064385386396751398, -5.4540039558593536029, 0, 0, 0.10357732090048776818, 0.85603572021517670976, -0.11294852267110891408, 1.0800087615333373581, -0.048885660842739625531, 0.2401585690492896441, 0.25793574452960393861, -1.2534276012546561319, -0.085419646489135220291, 0.4150930798449654624, -0.46721691129938069942, 2.2704203849822679473, 0.39631486857335096463, -1.9258749730998911964, 0.58799177288976445244, -2.8573206043806020915, -1.0650015567431445618, 5.1753290302415839719, 0, 0, 0.94933893888308096276, 0.20425894956750886844, 0.97636209569699528021, -0.2257137560392956932, 0.19529887869007700463, -0.097691991575737782694, -1.0167341121476407562, 0.51545291906204826482, 0.33670815416251220764, -0.17070067666802030137, 1.8416810448534708389, -0.93367563771983919096, -1.5621985497383736874, 0.79198660986818203433, -2.3177527963386914678, 1.1750293712615387243, 4.1980354999103974833, -2.1282748625244334306, 0, 0, -0.044595007867203530216, -0.03154770903003507182, 0.12905602059221518707, 0.036023951388023812969, 0.85410491166502156002, 0.015591657403950714977, 1.5843424307430495901, -0.082266367920763708299, -0.42533613529877184467, 0.027243855164596028628, -2.3264464741560830774, 0.14901478038204274412, 1.9733988782296159048, -0.12640118898597454966, 2.9278293524688816518, -0.18753487466867790889, -5.3030381750634045801, 0.33967300679089129645, 0, 0, 0.001079949373603220162, 0.00076398525832885994229, -0.0029588224413149436312, -0.00087238562334283919521, -0.0051468770632319233924, -0.000377580394132319148, 0.20206863248110379372, 0.0019922299995820589268, 1.1580392187922230463, -0.00065975959477697777422, 1.5693757688347129697, -0.0036086644319118939793, -1.2036926618559800062, 0.0030610351112528135301, -1.7858562430596849335, 0.0045414987038525046412, 3.234636548791644195, -0.0082258007893193495902, 0, 0, -2.5477139138140648842e-05, -1.8023214052146796125e-05, 6.9801726696616189999e-05, 2.058049242980996096e-05, 0.00012142023160711750383, 8.9075177709926672744e-06, -0.0039147530372612262053, -4.699879655023364312e-05, 0.0083402740223943259285, 1.5564421263354520723e-05, 0.34574199476944195153, 8.5132181268765064475e-05, 1.3943620913130563288, -7.2213030853402265731e-05, 1.0658764983780117941, -0.00010713872076029942911, -1.8606498854339041937, 0.00019405527365870157302, 0, 0, 6.7149986095353481289e-07, 4.7503707792034416037e-07, -1.8397611096343514486e-06, -5.4243915417817383743e-07, -3.2002678243831975437e-06, -2.3477506293900330442e-07, 0.00010318097750045069814, 1.2387452601072785849e-06, -0.00021869312922374677631, -4.1023078209430247345e-07, -0.00640633179860586148, -2.2438252417072174446e-06, 0.039347781247833550633, 1.9033157496282319598e-06, 0.58363139903719374324, 2.8238506569827991151e-06, 1.5869170388873774513, -5.1147065050188478719e-06, 0, 0, -1.7089420375974873326e-08, -1.2089516008577751082e-08, 4.6821232322022749866e-08, 1.3804873646549429766e-08, 8.1445619495631410706e-08, 5.9749375653827772664e-09, -2.6259172962527552498e-06, -3.1525603681672934994e-08, 5.5656583656398050473e-06, 1.0440220011988337053e-08, 0.00016303874883739764194, 5.7104513396781534087e-08, -0.00096633013142895208495, -4.8438674145699212406e-08, -0.0074687944270686538323, -7.1865943334113596723e-08, 0.11083792913663595425, 1.3016736807641509225e-07, 1, 0, ]).reshape((20, 12, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=FALSE output=np.array([-0.0051418610449036764379, -0.0051207276560822079237, -0.0051517581620670691717, -0.0051036969019558836927, -0.0051724091286762859804, -0.0050616345732373531699, -0.0052108015793779615538, -0.0049476511776847470828, -0.00525764719160080711, -0.0045887810243909702562, -0.0051780211262217532009, -0.0032118234048454639452, -0.0042344880890217376918, 0.0033309098171163710467, -0.0002048781708894489937, 0.036873131791193267115, 0.0053670164361401782871, 0.19556964857406355929, 0.00027785197169554257113, 0.82912284340298503249, 0.65949448911253716332, 0.6654093842010145865, 0.65653556733089935005, 0.66984284447353759084, 0.64987461295218140744, 0.67980900603329297294, 0.63487206047522759533, 0.70218091452751663084, 0.60105778412747068451, 0.75218902517658670082, 0.52490094418674082544, 0.86229539201840321727, 0.35569403068909899446, 1.0895778496922625678, 0.014595118281322956924, 1.4713138143490975818, -0.3821845778229204238, 1.7530984807387750557, -0.019785804601727036839, 0.46986487896086814864, 0.35950128095327349431, 0.35338118891054576265, 0.36256112212690350116, 0.34879091185540550546, 0.36944502095300485456, 0.3384628569483467686, 0.38492806794448852781, 0.31523169436694697954, 0.41971361558839714867, 0.26307215994266325287, 0.49746707294522052312, 0.14713121281897215131, 0.66691371985704051006, -0.097089653687300894735, 0.9873402560545572193, -0.53105535871660392022, 1.2102463767433073727, -0.99135819513191647534, 0.044702741392222536398, -0.3124421698702265493, -0.013853909020907106964, -0.013669845455478047552, -0.013944931295735877447, -0.013530059426987159701, -0.014147224776509979491, -0.013210228408402318115, -0.014589326840338213628, -0.012464957716778975183, -0.015513752524267036478, -0.010672404094858929657, -0.017189996005739616169, -0.0062147814325298833885, -0.018373262457117778973, 0.004180894177922047858, -0.0017304961649907309174, 0.022868412576313085216, 0.16657118464347281384, 0.042690065819077735454, 0.97480521123780894399, 0.013454447506373390722, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=FALSE output=np.array([-0.00093170433340244659687, -0.0012410506081968345274, -0.00078243418040293037045, -0.0014789118938009710238, -0.00046560203080319340248, -0.0020151841680895970881, 0.00010431589287147467922, -0.0030688678143442819739, 0.00055449747551044887423, -0.0028609656322684854333, -0.00014909864114985113439, 0.026923379556954812886, 0.00037005603844857983247, 0.24511712352852058072, 0.0023876748113693508323, 1.1555668732201982429, 0.0069273170504410850126, 3.402689781560706006, 0.017141512088352486159, 8.4587163253268489171, 0.084019186191167327671, 0.11446091282800011091, 0.069879045425061070418, 0.13908517998091646239, 0.040857714895860555715, 0.19963820957805075706, -0.009029120060764306635, 0.3576254481931359086, -0.047994837046959955285, 0.77395141822244861718, 0.012905315717303777676, 1.5659844614539342178, -0.032030405994604847775, 2.0431817208334850378, -0.20666651978408043244, -0.51809792415464139825, -0.59959777581040041294, -8.0018873070243543566, -1.4836931018696206674, -24.840413418481212204, 1.0495495283018867472, 1.0438716260482179266, 1.049399522569444354, 1.0346122703952109756, 1.0414030908228715244, 0.99920617535829048261, 0.98168936282733698651, 0.85586872666349445016, 0.66418975301951443946, 0.32605391435347208517, -0.11777757243608029392, -0.85263295720592457982, 0.29231857203837463555, -1.8526421434409372502, 1.8860972902309971477, 0.52133890068006216723, 5.472099406164398161, 8.051943344140203962, 13.540604167014549830, 24.995803341925523000, -0.15192966618287376268, -0.17994115928882439825, -0.13573193509615386065, -0.19726850771611939561, -0.093692687860718673609, -0.2254589026981791422, 0.031190848164371551277, -0.24103262443025505468, 0.43031985595064925487, -0.11127445668091072439, 1.0214738883945315706, 0.29750331491431342146, -1.0287095298822455103, 0.64642959712677627859, -6.6374375162553720386, -0.1819071733449817152, -19.257075485594906894, -2.8095088468476898669, -47.65126091660886232, -8.7216126122287835898, 0.019292656023222061468, 0.02284967102080309731, 0.017235801282051283617, 0.025049969233792938189, 0.011897484172789671297, 0.028629701929927506981, -0.0039554068238157324977, 0.030607317387968895062, -0.047069269398714280728, 0.014130089737258502702, 0.083547466965394667771, -0.037778198719277893136, 1.7680513078000272831, -0.082086298047844605263, 5.9556190709970859842, 0.023099323599362758808, 15.377646538190468561, 0.35676302817113519916, 36.577208339375580692, 1.1075063634576232108, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=FALSE output=np.array([-4.3178765217391302305e-05, 6.5885993478260890537e-05, -9.6313081793478265121e-05, 0.00015008344269701090522, -0.00021248546106063178089, 0.00034663246069176721631, -0.00045706603540709121736, 0.00082099033067846731351, -0.00092573826895791448514, 0.0020071162494608137464, -0.0016063078222631245202, 0.0047785217809616390913, -0.0016924900384041397308, 0.0058508393862588175052, 0.00037271449492305578147, -0.023079518000319019372, -0.00039460853415255482939, 0.96024636175750266442, 0.0010388953913813049352, 4.2828168145345602014, 0.0006476814782608695956, -0.00098828990217391310055, 0.0014446962269021740039, -0.0022512516404551634157, 0.0031872819159094768759, -0.0051994869103765082446, 0.0068559905311063691277, -0.01231485496017700916, 0.013886074034368717928, -0.030106743741912203594, 0.024094617333946868887, -0.071677826714424588972, 0.02538735057606209683, -0.087762590793882266915, -0.0055907174238458371557, 0.77073211937350760703, 0.0059191280122883223325, 0.065812149128730040859, -0.015583430870719573377, -5.436090696106786524, -0.0031088710956521745793, 0.0047440075304347817733, -0.0069345418891304357392, 0.010808468249184780344, -0.015298953196365492127, 0.024985562378791605076, -0.032908754549310574589, 0.059430528454937264771, -0.066653155364969851604, 0.14814853819552065151, -0.11565416320294498453, 0.38547179702354089637, -0.12185928276509808144, 0.89303994817074205947, 0.026835443634460023205, 0.3158234170040984945, -0.028411814458983948584, -0.033037325841022327499, 0.07480046817945394666, 2.7299491962930431121, 0.99996527520000000333, 0.99992195380000004068, 0.99982468157500004047, 0.99960649719062499852, 0.99911787098710935773, 0.99802618626118166922, 0.9955959649107849474, 0.99021595070231704927, 0.97840786415926050967, 0.95284186025012007626, 0.89870074544754108281, 0.78829881686806935193, 0.57842205914062527761, 0.2362299420741153233, -0.10286920059876342171, -0.080364133358400105522, 0.1089119554261051559, 0.0088355638877152739563, -0.28673512802124018206, -0.73010269203186040077, 0.0031519622956521736561, -0.0046471747304347834143, 0.0071520025891304338586, -0.010320575886684784858, 0.016392005544802985134, -0.022542075546565051858, 0.038353001709637714323, -0.04736186631031753802, 0.093197128054721647961, -0.090485094976372942854, 0.23844273869784807229, -0.13266783181011393422, 0.6114558419262043909, -0.058789413728980027818, 0.96115953962745293016, 0.020964556528278284475, -0.40723600724543668194, -0.0023049297098387665911, 1.0721400439055068787, 0.19046157183439835214, -0.00065664547826086978963, 0.00096816140217391292219, -0.0014898486644021743334, 0.002150119976392662717, -0.0034132712039954154516, 0.0046962657388677176915, -0.0079705034644407205674, 0.0098670554813161497065, -0.019191613515453308236, 0.018851061453411026292, -0.047118889772279910766, 0.027639131627107058065, -0.098264441613631667294, 0.012247794526870834503, 0.1283332564542301879, -0.0043676159433913086874, 1.1341990151205716408, 0.00048019368954974300367, -2.1349300292885819985, -0.039679494132166315268, 4.3776365217391304853e-05, -6.4544093478260855155e-05, 9.93232442934782663e-05, -0.00014334133175951084961, 0.00022755141359969430562, -0.00031308438259118116136, 0.00053136689762938121937, -0.00065780369875441000935, 0.0012794409010302202311, -0.0012567374302274016227, 0.0031412593181519930102, -0.0018426087751404706099, 0.0065509627742421101121, -0.00081651963512472227127, -0.0082410361884569308111, 0.00029117439622608720607, 0.18701233167960798487, -3.2012912636649525446e-05, 2.2892691807041996022, 0.0026452996088110873087, ]).reshape((20, 7, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=4 knots=None absorb_cons=TRUE output=np.array([0.17237877473947216256, 0.16160585322933293528, 0.17775653635702576283, 0.15351565450517737355, 0.18982763251278575445, 0.13530235680671015563, 0.21678099966619068795, 0.094474101590757947333, 0.27612342080317897608, 0.004254035614190725817, 0.4030409383688886571, -0.19022791859524765257, 0.65608982528520065625, -0.58588986559744449245, 1.0712259966408619327, -1.27728761497709975, 1.342827694119810289, -1.9788260499202323661, -0.052195429016186831173, -0.97077694213337639706, 0.68714996318883114768, 0.60683822144440202617, 0.72724727397328958745, 0.54653827251673237075, 0.81726443453130048766, 0.41083008335212661821, 1.0183156414681064916, 0.10689042936382432691, 1.4611707976831045386, -0.56312355687140791538, 2.4093551078217010364, -1.9991227596244038889, 4.3052173449842321418, -4.8778668329561920558, 7.4441765017641099433, -9.6842706472508055526, 9.6670625693150959989, -13.322034134461750554, 0.012161980255413960134, 0.22619930950229019673, -0.010768222248548299166, -0.01771835491152162767, -0.0072960943160139245939, -0.022933439873231927647, 0.00050471399250212036977, -0.034662450006663660107, 0.017964357449119585514, -0.060912800432191181732, 0.056636870493901259049, -0.11877531418126709151, 0.14054140101632672799, -0.24279484616490168425, 0.3140184596633784353, -0.49144965505677373763, 0.63173409697146676312, -0.90678479147331569887, 1.0347141866437283841, -1.2221561304860752983, 0.99694564370894334093, -0.056807630788862507887, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=7 knots=None absorb_cons=TRUE output=np.array([-0.088612856492796668317, -0.10148345379387192777, -0.074820024844059834779, -0.097934158724637176974, -0.030963004348234846042, -0.058173986143542688598, 0.090101482080220576809, 0.10049075761090919823, 0.29596829876665797787, 0.41518677441788176541, 0.43034390532152527742, 0.83452635605264402496, -0.13348462782429343365, 0.98208889847295099962, -1.4362329672138189895, 0.92468785756571492129, -2.6216702734870915847, 0.6954743327776672368, -0.0022196047790754855691, -0.12327370541474826082, 0.24123862297275155964, 0.35840804363927752929, 0.18093342115646737778, 0.44250759269715284061, 0.043937153723061256327, 0.6202158021725746595, -0.24740540994207005365, 0.96645134975980595549, -0.72475992234705299833, 1.5060196314875564649, -1.0261673763863199405, 1.7262771004734287494, 0.32934388972170997034, -0.37505857126375657939, 3.4434877057227697428, -4.9947636565362447669, 6.275814996477087071, -9.1524782805583591028, 0.0068271591356862497413, 0.37917074789447108296, 0.35954055514606403365, 0.35954551446237459356, 0.35744096324407559351, 0.35584226884375003142, 0.34885947617865670223, 0.33862610062421411028, 0.32049681236018934261, 0.28089301373203690027, 0.25602145147051952323, 0.16597482970505655908, 0.12171945403305862998, 0.0018741765108557348514, -0.13785069799652086009, -0.10675519142579256715, -0.57612609827379823724, -0.22195090041542500647, -0.96232722728117126021, -0.44017455467682947701, -0.01453255272801050671, -0.80711739351329703229, 0.39138909874594340899, 0.2727563805116787754, 0.45531894883729062684, 0.19274617399836088683, 0.60538032158012156092, 0.036358318155975434538, 0.93314031181054180042, -0.23999550325584054211, 1.4531806967029836652, -0.67840171762003531608, 1.6665321594967388297, -0.95105942435581225514, -0.35463732068059222735, 0.31454920667339703044, -4.8088313747127671149, 3.2255742320619229524, -8.8757338424365990193, 5.937464168828833877, 0.0075040642903919140341, 0.41676510136746514057, -0.089133365834868971689, -0.10118952771068721874, -0.075725350737607874385, -0.097115177460207771643, -0.032342847263930989132, -0.056715061432944156861, 0.092352440078496370046, 0.098331113369192327256, 0.3400969886051333213, 0.37108103090272787083, 0.71036876576058238797, 0.55444750827998945386, 1.0162219858335352907, -0.16763759291903618243, 1.3370398275661443677, -1.8484727482821472488, 1.4719012302123679614, -3.3978810087925515049, -0.0022219907599747576801, -0.12340621941421109353, -0.013194863919370570376, -0.015072699772667915927, -0.011151244212216769591, -0.014503828881363881043, -0.0046275650331869689486, -0.0085121840139092009442, 0.013410603654954418931, 0.014660726583422474187, 0.044001710617309369722, 0.05545171716304433257, 0.063619527459568614436, 0.082887169332405197242, -0.019275003418586279108, -0.025046122964883565931, -0.18452246030655442, -0.27630735647963872159, -0.17524590664413686181, -0.50790642667837671009, 0.99966985744385294943, -0.01833564992966553106, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=10 knots=None absorb_cons=TRUE output=np.array([0.017741203875868801626, 0.059666517581418020144, -0.0073055449468428850998, 0.071233532376632907601, -0.061951321114274808532, 0.023459686026844089879, -0.15952965061095217281, -0.21534242305337428225, -0.20894123625115404441, -0.40851053368902323637, 0.088769309083260417026, 0.1702312769628678002, 0.44867980227835829732, 1.1139500668975872433, -0.14345767930218020214, 0.86682083739859494553, -1.1837156997359956634, -0.21597943647408635037, -0.017918712309548733702, -0.23789999499399971938, -0.0050889918235734665428, -0.017199613011022536063, 0.002072270281447358229, -0.021223643286449593581, 0.017930549906902432111, -0.010400764195970725601, 0.046747049525783831991, 0.07425318574607586708, 0.061514150450350559007, 0.37070324197740467387, -0.026117581474140658115, 1.1506456698664466742, -0.13217536320689113327, 1.766217994670280067, 0.042129159012710096222, -0.26549271162309995287, 0.34842176083555986832, -3.4777238134306913686, 0.0052384550413758917964, 0.06954899473750200678, 0.040717410233940966313, 0.14267724274658066808, -0.01518518709909348792, 0.21755481940124130058, -0.15305036931474685913, 0.39059745188013861306, -0.43311720487012722591, 0.71916259556541839437, -0.58682103919738470843, 0.86959977149390199624, 0.24816600986825945729, -0.22985936815572685643, 1.2655729988498243266, -1.6266494012894197052, -0.39570727271363553834, 0.14945216327392790712, -3.3194533089504827394, 3.3989844591011317831, -0.047815351859148348679, -0.63482641896460045849, 0.12003828098968173654, 0.69730715192779646472, -0.080805333880490451404, 1.032100885251629574, -0.17573134714242774623, 1.0800934656851248761, 0.36589399540256600796, -0.28448323213545234633, 0.88954585755350989995, -1.7064483114238784278, -0.35386353683003346493, 0.6479603434190704947, -2.0242657690367202861, 4.0363385548625183574, 0.45966058247434787853, -0.6843648579769759932, 4.9332279555632334578, -9.2892931271418373029, 0.023611216265584832746, 0.31347722617275158852, 0.88792799224608875974, 0.25713275781065136893, 1.0532535169725301127, -0.17260864810915746426, 0.90704417033506612444, -0.47809553851771402488, -0.43024289925585679217, 0.30393926379684116368, -1.7282652053936073955, 1.1666632610962044403, 0.66595310521671158277, -0.45052173959023528171, 4.0350412627641443208, -2.7188693891544946091, -0.75770752213855696588, 0.51778556938925135444, -9.4893428981139695821, 6.4121669845342363914, 0.0013130525053329394382, 0.01743290360653331969, 0.018394340893766202016, 0.12954185558052447313, 0.015467038664664091788, 0.1947806488224434196, 0.18978123265431517952, 0.10064263984709266586, 0.87856041959654240792, -0.56951570226238645844, 1.3795704170465219462, -1.1253701882691806713, -0.4264441361865796698, 0.45357497790875550558, -2.8031655399252684013, 2.5198958573805381178, 0.38077992198436172444, -0.6254498513067092702, 6.2761716348405380828, -6.3399143661490446888, -0.045339936718340946575, -0.60196126440255459666, -0.004758953821358206937, -0.017005411359244829855, 0.0016248940817804027534, -0.020892741105429570719, 0.014155429774586492792, -0.0074074070841472963028, 0.057823550154750644192, 0.062836648910594553041, 0.313405031875580109, 0.11923488274498127149, 1.1726262403941665191, -0.048228614408167384819, 1.8989517069621104639, -0.26605941795394616456, -0.29079224977894058135, 0.067405260889645518541, -3.8003055548041984046, 0.6731217514912637423, 0.0052018569797349914138, 0.069063096056237921472, 0.017736305328008575982, 0.059663635167637223133, -0.0072989048214304030199, 0.071228621002306605203, -0.061895289364833461221, 0.02341525749747295701, -0.15969206961095888664, -0.21517495710840772727, -0.21175703601763257167, -0.40570100515546192455, 0.094808655216413129097, 0.16419386720169948379, 0.65787987528453284458, 0.90476706770574533145, 0.95333742847376501395, -0.22997392252730572415, 0.8913620730776831147, -2.2910886491383859465, -0.017918169107386142797, -0.23789278310346156919, 0.0060750632566522310557, 0.020430784619126868273, -0.0025017991326720841194, 0.024387881029101547703, -0.021213459590422957535, 0.0080142623794636578399, -0.054619250223539139721, -0.073674968074426458453, -0.071500525070803497285, -0.13890622902065649646, 0.030313398118739860038, 0.056217781624179681554, 0.1514331690285295462, 0.30977852573253761292, -0.046930667126897918084, -0.078740760116175548267, -0.2465351039505715991, -0.78443722293869766471, 0.99386473410822107066, -0.081455614651688740269, ]).reshape((20, 9, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=12 knots=None absorb_cons=TRUE output=np.array([-0.10472685656120690745, -0.12767819762829035746, -0.076360123823526582232, -0.10958044510013424666, 0.0027880985603357523288, -0.00047490167664404706613, 0.06928482401745712782, 0.12032832514458154105, -0.15700287132688625635, -0.1737016309573724826, -0.34110464392275169354, -0.43703758617891158567, 0.21658232947898950482, 0.33207321984948795501, 0.31440967571649758305, 1.0125703207549268114, -0.77927839590651259982, 0.57499654812351752575, -0.03855715180341089815, -0.29753053676014623585, -0.045155067335396106842, -0.055061683622734447496, -0.032913973166337623377, -0.047180638471765484754, 0.0012046656812917998312, -8.5942102116568291834e-05, 0.029855775209106073798, 0.047937464934110683712, -0.067681928151560877271, -0.066458149628327731206, -0.14703046798660462935, 0.16005131398107033669, 0.09335066435731875778, 1.5205934362940900506, 0.13551522132064452997, 1.2508474777651827914, -0.33590194032153924963, -2.2969931440729616234, -0.016622640017795731193, -0.1282704446656750541, -0.15034574662694516478, -0.18284705677723475659, -0.11005017611978802627, -0.16012547886604538983, 0.0038970012572561371633, -0.0056574341249261998407, 0.10020869531895890137, 0.37404580513022472887, -0.22593678600553301905, 0.90889537861179725553, -0.49151450544890296079, 0.95180903084537338277, 0.31231202672612079496, -0.78416614637393700082, 0.4534081611517823962, -1.1711369929499528819, -1.1228952128467266647, 1.7833321983277987677, -0.05543814773678283625, -0.42779461349253811164, 0.021352229303511102015, 0.007531901115204664085, 0.033244696218130601051, 0.14543534382436593133, 0.0037956069961920194805, 0.8473147453077168878, -0.044854732714249959113, 1.5853682337831724869, 0.054480061579266131355, -0.38616534110945410774, 0.14497807394687750593, -2.2653864188032963867, -0.10147073278861845624, 1.9430514052120930746, -0.14850072666747962669, 2.8843894925937085816, 0.33098715709405579499, -5.1548937962780287947, 0.011396952652503542641, 0.087945848734329423735, -0.065952557774967432436, 0.64935218850878217989, -0.23655843353990058109, 0.90262481725902277141, -0.044372232611305732763, 0.23939481976571783117, 0.37009197410777977533, -1.0588087616813195435, -0.33957241343679639423, 0.13426223409097867711, -1.0193888969785738219, 1.5733219045695456817, 0.74691289987499476233, -1.4520157873552757, 1.0969502242749886722, -2.1629612193760392458, -2.3264786688505232348, 3.5372496286723378311, -0.062415510749811636826, -0.48163620876963536555, 0.8195140136919921714, 0.045981988292849676481, 0.88170239584467990923, -0.36155327574779005939, 0.19875523391830787756, -0.098276866020166997462, -0.93084557008965973512, 0.66449081424484612857, 0.14207954819710574834, -0.38575918922177282466, 1.418831177364036078, -1.467509312217160522, -1.2937127610296201841, 1.1548650229305499337, -1.9279955842607379157, 1.7067654635554521114, 3.232004184432778704, -3.3827058656451867158, -0.047797409383925140713, -0.36883400885657852131, -0.021262964105923369551, -0.0031022854611233788291, 0.1460681949195489493, 0.060436933823290558965, 0.85348373794725573838, 0.015696770625561979468, 1.5689066026927753761, -0.10905135306422376884, -0.39035761984884714826, 0.065894018981400556689, -2.2504521949512743539, 0.2449549909100549594, 1.9251468021185453861, -0.19161744080459083839, 2.8577824618961185088, -0.28309810920621092389, -5.1294237100882460467, 0.56511845971327800964, 0.0085901166188240788563, 0.066286587283785933233, -0.14931374206792483172, -0.18258953057237148343, -0.11261589846568037521, -0.15823359174408363481, -0.0011429154130191262681, -0.0010551192149562264085, 0.30156489541842562696, 0.17464289155956300159, 0.93257469826065064744, -0.24979099176207356092, 1.0795318316732362973, -0.62202008893514815213, -0.89266942187285236443, 0.42343196160757057278, -1.3343479845023715757, 0.62052301130682563368, 2.1155524024387766246, -1.4614021376163455201, -0.055370175087794709645, -0.42727009501042717554, -0.045179413406039932444, -0.055067758936182993612, -0.032853445083453579334, -0.047225270074522207597, 0.0013235626049733512782, -0.00019451554100623699175, 0.025957829199430799971, 0.051789330864583066338, -0.059352795367343585964, -0.074783156665720870726, 0.19867211632366271745, -0.18558562156573668589, 1.4877431595022336808, 0.12613921140813655342, 1.2014365403948221722, 0.18483406256944329149, -2.1966417328137768372, -0.43610504124242488455, -0.016624243563923207095, -0.1282828186071544585, -0.10472621487286955244, -0.1276780375015148794, -0.076361719159604879437, -0.10957926874689910557, 0.0027849647992298789088, -0.00047204001118300933926, 0.069387562012853995497, 0.1202268016798509892, -0.15722127080697423374, -0.17348334021941591576, -0.34750993698157911505, -0.4306340234217196361, 0.25592930937433366889, 0.29272786696227376391, 0.89803989341652912248, 0.42894253042841035128, 0.80764101265733667656, -1.0119267694495412968, -0.038557109538814519911, -0.29753021062070239555, -0.052405524507834276571, -0.063890551260129033184, -0.038210563318028127611, -0.054833362353974436643, 0.0013952838636999886825, -0.00023608614675063352537, 0.034667393120659889538, 0.060161039317539917459, -0.078558784282471219584, -0.086811135096279259948, -0.17052577031441601418, -0.21548879873646650807, 0.10741142530502149899, 0.14648050866337228304, 0.14986173697025292895, 0.21464206783358963371, -0.2791130852403586049, -0.50636714208926458802, 0.98070595851787800701, -0.14888461024604163208, ]).reshape((20, 11, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=TRUE output=np.array([0.026705483244849264474, 0.029839625165101336945, 0.025137490946490849419, 0.032188537945938233698, 0.021607394654091297004, 0.037467894551815809911, 0.013655036980899055293, 0.049314030316796791942, -0.004274510570643489521, 0.07576300788030972122, -0.044664576703176542272, 0.1337724526334178321, -0.13422281221596993328, 0.251582066127820414, -0.31121876398729647617, 0.43612872129651875097, -0.47985225176217655152, 0.49291724756068344693, -0.081762536217311512776, -0.57008353784816034882, 0.10575886843256800118, 0.098523720643000009534, 0.10937645918695998448, 0.093097559739294566405, 0.11751580252413190708, 0.080890140328454290808, 0.13582591101899046948, 0.053438242334149517465, 0.17698108482519578355, -0.0081682818617542177903, 0.26907668054096511856, -0.14499960251139776268, 0.47046169477357585587, -0.43311804519238522593, 0.8566919676788534721, -0.94615480020651354653, 1.1710825622951557268, -1.4966789629626817693, 0.019850657594207062745, -0.7294516591807701511, -0.065495278981390098183, -0.065538150305470069257, -0.065472788689835886844, -0.065568482416232795607, -0.065419575517949940524, -0.065631137544391218719, -0.065286317209239316806, -0.065744866169428622937, -0.064914403751873434034, -0.065874953263984459273, -0.063671751390664124703, -0.065668915889071088898, -0.058354957302844213951, -0.064207225531959205567, -0.028319888642541334034, -0.061612158154530236032, 0.15860060924423530215, -0.060152247616144620401, 0.96974734313860999624, -0.071414854005294861605, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=TRUE output=np.array([0.21482323419002713472, 0.24750397604597326739, 0.19883614002139288202, 0.27258973708238182221, 0.16379621347999451242, 0.33062932586024107451, 0.090229192760086424085, 0.46740970723200020442, -0.043673571594035724697, 0.78496238070321600055, -0.14225997541844798144, 1.352022453322457185, 0.3403195391608931919, 1.6947780812699095865, 1.6168904854104195756, -0.12774151466578054226, 4.4891751144718545774, -5.4563586958095688928, 10.951815529860082776, -17.445747353383097789, 0.78661625540292257064, 0.77643763922916775311, 0.79017886797586167624, 0.76625062041079372577, 0.79428060270008205013, 0.73589687070232667754, 0.78216715039058581915, 0.63518798334179016329, 0.65550344333732102342, 0.30392043723920803, 0.19412499069407199159, -0.42254129005404528208, -0.4561535986725677172, -1.1523051841355156366, -1.7794911913869626563, -0.26332862233794968043, -4.7570007749943510333, 2.9350973501194954629, -11.456397338110976492, 10.131555788148743247, 0.21540397689706236584, 0.19368025263260429947, 0.22641495625607274689, 0.1776489056064568528, 0.25155233513796670941, 0.14240007942952948028, 0.30993541465339075058, 0.067271686044363815382, 0.44245515455816608608, -0.080352654001592846433, 0.58572719290272645676, -0.30336068837952584465, 0.016951242449324648714, -0.3319833812262538153, -1.5163892893085060276, 0.9143206895602743467, -4.9664054857636248386, 4.3390336686705470726, -12.728941927787642996, 12.044637871668662399, -0.24478425848794690967, -0.24574753349402200797, -0.24311234670203171748, -0.24447893336440842948, -0.23629987575155153579, -0.23582487990744877004, -0.20434545124769434854, -0.19103328802457081315, -0.055793360625630179783, -0.0080996580588084746144, 0.39680666612040660368, 0.39418417396254185059, 1.016323619403686207, 0.62129681239849587904, 2.2740869262794469741, -0.76498115062552785037, 5.1040543667499091995, -4.782338947138994989, 11.471481107808449096, -13.821393989294298521, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cs nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=TRUE output=np.array([0.18906235677058036426, 0.18778525841872689695, 0.18966313099776943574, 0.18676205783172145081, 0.19092371457093507137, 0.18426214279619892999, 0.19330747373751991369, 0.17770329603139525809, 0.19648166130871616564, 0.15897921699813499785, 0.19369977803300092445, 0.10544719310340035234, 0.15040730339948449323, 0.026126087939682636485, 0.014566423626603465513, 0.71038417591713942656, -0.0044258520659356658661, 0.37477303749040830061, 0.11191608928948779123, -3.5378245461949706652, -0.1717114269621455025, -0.1641796852631927095, -0.1753614922544594612, -0.15832977440814030579, -0.18329458477753993173, -0.14455385888762556368, -0.19975455127362412577, -0.11060689494904693553, -0.23004851008792162603, -0.021054716326888309186, -0.26742505651389464338, 0.22697177040482915955, -0.233733177660145508, 0.79112685520733139199, 0.0087978598061598800584, 0.36982567289124945731, -0.019154626403592486805, -0.30951045809435018263, -0.039292263384108856716, 1.0312889189371063914, 0.550395590305010729, 0.54949597448884435202, 0.55072323934346578689, 0.54860843215917232119, 0.55116651719216569472, 0.54595839995564021674, 0.5507105841767095944, 0.53682026836985941021, 0.54272290221675423272, 0.50167044456244525019, 0.49401178725604522057, 0.36566698512261336385, 0.28011631034896739001, -0.03551586186561946773, -0.15096545178713224877, 0.063629989613431983675, 0.133595751562454651, -0.72836523541381203994, -0.59095728594899554764, -5.2594893416580195122, -0.095090409079557833283, -0.1030766681400390461, -0.090988045895585706324, -0.10887508452680887128, -0.081496719465270400784, -0.12133034539926186579, -0.058865730473533696421, -0.14644031510302549237, -0.0020110794198277630838, -0.18907748512879404834, 0.15000794369741921042, -0.22502359947941463769, 0.5462684746707909822, -0.11817275982033918769, 0.95064928964895001329, 0.052430922211166117175, -0.40184197213040517838, -0.16340199015776293856, 1.0056597912908593617, -0.79932421729955860368, 0.03971313277885706039, 0.041414831889495302975, 0.038837883046586599289, 0.042648161751521966589, 0.036811186605924875459, 0.045290364726222026581, 0.031978640736114505305, 0.050580394866669355081, 0.019931365400903504337, 0.059364669508446515911, -0.010779243096346216579, 0.065589985310555209974, -0.071477633391997127954, 0.036649613012278252355, 0.13265213083724519683, -0.017297782300268411959, 1.1319824969476413035, 0.066678235503076957458, -2.1076119483122943699, 0.36704351417936742497, -0.098246537944137943277, -0.098542071754031859698, -0.098088618240313230068, -0.098745945231101361905, -0.097708943948828105852, -0.099149563571540588747, -0.096734808676916902992, -0.099784603436978008828, -0.093975228815392539139, -0.099897241328297553542, -0.085336692453571283412, -0.094243446676616243751, -0.058668216352278305947, -0.060228845196453252575, -0.018756415239227899883, 0.03177289587597233872, 0.19240899911984374326, -0.16120768980326213859, 2.2227564852817449648, -0.98762351160861328037, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=4 knots=None absorb_cons=FALSE output=np.array([-0.00011851815295720334641, -0.00012663567739435794201, -0.00010982693555193819436, -0.00012486657068400786952, -7.8574573747497574675e-05, -9.7456835655811038625e-05, 5.4155250158153698734e-05, 7.2018696214385270055e-05, 0.00068058583050101088829, 0.00094879940917660596233, 0.0037183147633434721069, 0.0053485055580019882915, 0.0183840295424396033, 0.02677749980087675008, 0.086329741280370991818, 0.12474145818684392872, 0.3626441834851739654, 0.49882426586368627808, 1, 0.99999999999999977796, 0.35563445619191813574, 0.46362413648379807718, 0.30171426160013936624, 0.54469854427884600856, 0.18065455579032740907, 0.72714127654712401583, -0.089788280381099649929, 1.1356676987661198375, -0.68581279037676712296, 2.0358855654123400036, -1.9635811732571735178, 3.963516193961472478, -4.5268166706294525881, 7.818994828526680152, -8.8143777554827753562, 14.211587381298102173, -12.092085343347227067, 18.821721967074164894, 0, 1.1102230246251565404e-16, 0.64448406196103913413, 0.53650249919359616069, 0.69839556533541258254, 0.45542632229183804826, 0.81942401878342008281, 0.27295618028853169124, 1.0897341251309387022, -0.13573971746233581825, 1.685132204546259338, -1.0368343648215159192, 2.9598628584938277974, -2.9688646995194716283, 5.5084326410870145807, -6.8457723283275555559, 9.7280480142024003953, -13.336328839484941966, 12.72944115986205027, -18.320546232937854114, -5.5511151231257827021e-17, 1.1102230246251565404e-16, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=7 knots=None absorb_cons=FALSE output=np.array([7.6647956129152551405e-07, -1.1550595444117308123e-07, 1.9331333022628060142e-06, 7.0074370880358996831e-07, 4.276544862176869743e-06, 3.9889844050692401989e-06, -8.8286906818606534835e-06, -1.1748087754222518581e-05, -0.00018052779655174404014, -0.00026002337472508241234, -0.00087512057847980513799, -0.0012887090681721327309, 0.0010150616469118238505, 0.0015068191039899082179, 0.047197284048099072407, 0.069183314412051222231, 0.31391535501834866295, 0.43677277426050831188, 1, 1, 0.00010414066623386542833, -0.00015235200709813872134, 0.00016280595673724983273, -0.00043421191677436572591, 0.00016730985750816495974, -0.00096382547591343699107, -9.0113367500726122014e-05, 0.0019506005877063976872, -0.00042706768120516621842, 0.042844970995241649092, -5.2132124969551940641e-05, 0.27943748664501288914, -0.00082303187571104372, 1.1480003233353344161, -0.045428857401670741378, 2.6726262314060083014, -0.26572750370216563498, 3.4985477308433670096, 2.2204460492503130808e-16, 0, -0.031639925468625686167, 0.046730544589411379675, -0.049698223025307776413, 0.14261932064706925316, -0.051804308700160631163, 0.44426741959012128191, 0.029985561329029603028, 1.2694922528331589184, 0.18639387492323189477, 2.6501418567322971853, 0.29516026175368154094, 3.4280258862936845432, -0.075791571757795037079, -0.87883069340180242079, -0.70490461473281773586, -10.07981330853315427, -0.24791206297674328596, -16.148116252431378825, -1.7763568394002504647e-15, 0, 0.9404004428081522704, 1.0229943455861092438, 0.84837282177641593073, 0.99419676362025177774, 0.5526597738396983317, 0.71315928497606728698, -0.26997136265236038044, -0.36417829458474493265, -1.6835610275209540454, -2.2695939445331143602, -2.6912296816355203433, -3.6212526616197560081, 0.7249087759092099903, 0.96643128272326350725, 8.270877054240102666, 10.639570935701080145, 13.016842606390081727, 14.65915427188926401, 0, 0, 0.091455790248830992617, -0.06982269652782702174, 0.20182330529950023856, -0.13687095276618752027, 0.50015399051603726122, -0.15701950364285152673, 1.2380162944797474811, 0.093052186884682086543, 2.454295887293517886, 0.57831424352675386835, 3.117018133244208844, 0.91526479748152955729, -0.79779073451863469213, -0.23433512959673663545, -9.2728475980951614588, -2.148398325632811634, -15.512250940383852438, -0.55028191775469181835, -3.5527136788005009294e-15, 0, -0.00032121473415268157747, 0.00025027386535885344336, -0.00066264314064781932364, 0.00048837967193203002999, -0.0011810420579453353648, 0.00055263556817129462322, 0.0020684489017660893928, -0.00030499763304578722534, 0.043478860781959838278, -0.001447103346453879405, 0.27997853934107919116, -0.00018679973229867273543, 1.1484815005960040679, -0.0027726021640539499913, 2.7051067319414512369, -0.15316884735317959354, 3.6951325456543355763, -0.89607660680706602285, 4.4408920985006261617e-16, 0, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=10 knots=None absorb_cons=FALSE output=np.array([1.7052675937596968357e-07, 2.1554130872244172271e-07, -1.8766481785820174288e-07, -2.5625876315796275659e-07, -1.3028476697477565147e-06, -1.8631355726629473538e-06, 3.6693095004218680534e-06, 5.3764688723538902118e-06, 6.0796641288638842218e-05, 8.9528552951850039502e-05, -0.00012887069103758740009, -0.00019001548651089925079, -0.0035275680979317425995, -0.0052024266625657657193, 0.0035256702065387685724, 0.0051982990643293265631, 0.2362999429577772037, 0.33323526570676581526, 0.99999999999999988898, 1, -1.5536971506019493448e-06, -3.7723308315471718504e-06, 1.0498525832867015351e-06, 6.1264614941971342081e-06, 3.3796481270836038393e-06, 5.4203738638415076344e-05, -3.5129100336125819986e-06, -0.0001705894910869632658, -3.7468155003797335172e-05, -0.0028884082386604389286, 6.8182896976374759469e-05, 0.0061884953395126701681, 0.0018130647206160395782, 0.21325100724817980247, -0.0017981230343969457908, 1.0927529255884664838, -0.10407530163420519731, 1.8409714784490120998, 0, -2.2204460492503130808e-16, 9.8228540403516947542e-05, 0.00024501849587308256306, -6.3993936608566740908e-05, -0.0004010047866267046901, -0.00018304683587852169481, -0.0035611604429654220016, 0.00011419029847733929632, 0.011357234442518585049, 0.00050617348630547251507, 0.25226492984599047098, -0.00032188615231765204967, 1.1982978420717516066, -0.0052489266241784434189, 2.0211806750240537411, 0.004422931600446426692, -0.32320182404955488664, 0.24763812678503760445, -3.5956683328958609636, 2.2204460492503130808e-16, 0, -0.0066286721449819972773, -0.016550854830837284648, 0.0043124327641686751103, 0.027487867638282981186, 0.012275008624007566552, 0.3281445173550811556, -0.0074331317422015865878, 1.2933290334062270688, -0.029450751586844299557, 1.9518047804513476429, 0.011641882663420602406, -0.66721597637483753829, 0.077812573766972065181, -4.01958848618877429, -0.02237250215454568425, 0.74181768967199701592, -0.70957277332229884426, 8.2779135644074397504, -8.8817841970012523234e-16, -1.7763568394002504647e-15, 0.14341792272585895951, 0.77593418511036460483, -0.090433414170275921995, 1.1259556534302141984, -0.25737004638880062179, 1.110930597939268516, 0.15569410127086877327, -0.56800026419801152855, 0.61436360579663806813, -2.2407355088743035054, -0.23716773295804302601, 0.86370268070518285697, -1.4404805919834702266, 5.2118285879993342391, 0.2780344963834942007, -0.97074031271066019144, 3.8024793557257794419, -11.342987956092152046, 0, 0, 0.88922860214889154484, 0.26150593343291061998, 1.0527176310249597435, -0.16739004844301930142, 0.90250111388972187321, -0.47638677331480472343, -0.44192413673958003129, 0.28819070120487033648, -1.7434322051050550328, 1.1372558503535050711, 0.67215428910723273859, -0.43917237872741754501, 4.0596131096953849138, -2.6712549469068491703, -0.75992694414757844257, 0.51959152341465042468, -9.0955055761811927084, 7.3255691971501466497, 0, 0, -0.026500878677340050649, -0.021443321240789712384, 0.033955530529363592285, 0.01455399094161052842, 0.34654945272048409688, 0.041426545282572141415, 1.2822017995143766278, -0.025084826033741181028, 1.9080047293712101286, -0.099372016748206934422, -0.65055242209998476177, 0.039244657655196912849, -3.9247579397546936697, 0.26136174203674272798, 0.7301169577862545168, -0.074258542407508620897, 8.480299193362036192, -2.3225008358095564631, -8.8817841970012523234e-16, -1.7763568394002504647e-15, 0.00039214456274060853838, 0.00031751196529807511858, -0.00049656870424369891313, -0.0002157937723172946836, -0.0038324354523821676718, -0.00061725037261922916041, 0.011518730827296382035, 0.00038505493185519946639, 0.25286023990754330493, 0.0017067621188927551389, 1.1981634895781139161, -0.0010852069418095661169, 2.0228506259066678652, -0.017693728679951309379, -0.32609759852778003042, 0.014908398163377523105, -3.7765166040071465048, 0.83470297192668896358, 4.4408920985006261617e-16, 0, -5.9639851815191078669e-06, -4.9161432965675507926e-06, 7.5203048707079762948e-06, 3.4647891244179194819e-06, 5.7876642390457017258e-05, 1.1182950401982235801e-05, -0.00017170982870512408684, -1.1720731503335203019e-05, -0.0028751203560827250441, -0.0001259174615164541705, 0.0061430676556425214963, 0.00022990175893332354118, 0.21192565237063318295, 0.0061175761298287428119, 1.0940951118875659187, -0.0060681567350964193963, 1.9189536363142121189, -0.35123535284248763588, 0, -4.4408920985006261617e-16, ]).reshape((20, 9, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=12 knots=None absorb_cons=FALSE output=np.array([-3.5802310279784754585e-08, -4.6262647978685225562e-08, 8.4108204760015214405e-08, 1.1691317515513621075e-07, 1.3383585568882782416e-07, 1.9334920179260735574e-07, -4.203667662871902566e-06, -6.1443325945584099136e-06, 8.870740522791627539e-06, 1.2991653852666520431e-05, 0.00025968489610813934136, 0.00038043548035123880531, -0.001538951033558028092, -0.002254678726504918268, -0.011894389332899407696, -0.017426311105119260403, 0.16787692522771835435, 0.2391755401848442153, 1, 1, 1.5979133269021421162e-07, 4.7768854385179628339e-07, -1.9425098428285929911e-07, -1.661723207394062299e-06, -1.2068855552733148196e-07, -3.2055922738442340194e-06, 1.9511133954906500148e-06, 0.00010616629891630242227, -3.4565252886977172405e-06, -0.00022600512710102375891, -9.8259384682155117077e-05, -0.0066248710130454744813, 0.00057889080026267934753, 0.040648127270414641243, 0.0044706215223907566636, 0.59368709533766184094, -0.057610288739808043768, 1.4572996890794969183, 0, 2.2204460492503130808e-16, -5.5109923531389489217e-06, -1.7294660575127571221e-05, 6.1519217837447675384e-06, 6.0756378562884462338e-05, 2.7216035102121806338e-06, 0.00011763795929723115457, -1.6476765274556428198e-05, -0.0038995592960682475339, 9.9827397529189624958e-06, 0.0083454262915318017102, 0.00018240275250513114397, 0.34605772709145571353, -0.00095280597828643811414, 1.3922985845928645965, -0.007230442437518780005, 1.0497271498165361425, 0.092837834238923916397, -1.6519805635559703383, 0, -4.4408920985006261617e-16, 0.00023261122232546074806, 0.00073160846398710190595, -0.00025857850183971374958, -0.0025712720147247018071, -0.00011201881409144995674, -0.0049793706065786738982, 0.00059472548275806631952, 0.20119309669405308072, -0.00020482161298259401474, 1.1583114626193435903, -0.0013424798341694420094, 1.5703649598779465535, 0.0025261771262702131455, -1.2014100351887648799, 0.013860165507571235721, -1.7597171609627610955, -0.16373073924917352917, 2.8753824912917806955, 0, 4.4408920985006261617e-16, -0.009603671354393113202, -0.030208220454698501978, 0.010673917274214628215, 0.11304566492258458221, 0.0046199850162510491813, 0.84717578954956962445, -0.024382491288093402493, 1.620889030714223189, 0.0080875614382544102482, -0.43741011526471584858, 0.044600501680212630007, -2.3916675588781384221, -0.040106814398597687155, 2.0236045109652547325, -0.076082896664545401899, 2.9650152140638748044, 0.36507708741878691638, -4.859029716718211489, 0, 0, 0.10357736791963742529, 0.85603579089397319191, -0.11294862650356479739, 1.0800085662880150039, -0.048885819171624864121, 0.24015822941969325321, 0.25794065020098305707, -1.2534166511355830931, -0.085429974450229081984, 0.41506987095643310681, -0.46751914821604279027, 2.269740510670964273, 0.39810586745371900896, -1.921845360279117898, 0.60183408291126982981, -2.8261756066203411741, -1.2434080193894079258, 4.7739148511542310871, 0, 0, 0.94933899366841034695, 0.20425903192036259926, 0.97636197471447405682, -0.22571398353340010878, 0.19529869420992360118, -0.097692387302149769068, -1.0167283962036961498, 0.5154656778185841004, 0.33669612032614604225, -0.17072771898201555274, 1.8413288872915176686, -0.93446780726201750245, -1.5601117305432461446, 0.7966817961552830063, -2.3016241440126483653, 1.2113186062078835636, 3.9901615392586013975, -2.5959908523544878989, -4.4408920985006261617e-16, -8.8817841970012523234e-16, -0.044595107940631317778, -0.031547859459583466046, 0.12905624158450940131, 0.036024366939322927106, 0.85410524864504355769, 0.015592380256197772501, 1.5843319897343188796, -0.082289673658138620671, -0.42531415373425501825, 0.02729325191154396979, -2.3258032067945428878, 0.15046179406667054002, 1.9695869973481714421, -0.13497763436486276412, 2.8983680069011104052, -0.25382247697616189264, -4.9233259421066879469, 1.1940247607627194615, 4.4408920985006261617e-16, 0, 0.0010800130290116449761, 0.00076408094461186495429, -0.0029589630116448285488, -0.00087264995012978460905, -0.0051470914118511394697, -0.00037804019106299111778, 0.20207527387121515527, 0.0020070544765826729347, 1.1580252366043886081, -0.00069118022426611653436, 1.5689665948166240383, -0.0045290910522769603844, -1.2012679739112754884, 0.0085164006815267589978, -1.7671162635968691301, 0.046706182018272840439, 2.9931065266471703623, -0.55166786071240503375, 0, 0, -2.5513815072961419693e-05, -1.8078345013767598096e-05, 6.9882718216176479414e-05, 2.0732787925975703258e-05, 0.00012154373149783886249, 9.1724360658872410822e-06, -0.0039185795650754015607, -5.5540121878291909925e-05, 0.0083483300512910106617, 3.366784700094217677e-05, 0.34597774598037167593, 0.00061544857656690001111, 1.3929650741667591873, -0.0032153965723222335815, 1.0550792027144637597, -0.024400898124765986463, -1.7214890573288510467, 0.31330563624629470532, 0, 0, 6.9427404324062452156e-07, 5.092710402106320213e-07, -1.8900533691006157141e-06, -6.37008124488906817e-07, -3.2769559589167800075e-06, -3.9927796023670766917e-07, 0.00010555708713181393625, 6.542541903472452638e-06, -0.00022369557760034113753, -1.1651681607648470274e-05, -0.0065527231879012725474, -0.00033154755847665279712, 0.040215268969781864383, 0.0019536854662268524074, 0.59033605648767484286, 0.015088206344919037616, 1.5005041340227291968, -0.19443397537829132382, -1.1102230246251565404e-16, 0, ]).reshape((20, 11, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=FALSE output=np.array([-0.03303972668455883166, -0.032723631630874550069, -0.033194598680918049882, -0.032481019964223409313, -0.033535293013993730527, -0.031917861017270607316, -0.03426231263902488472, -0.030563998358814094963, -0.035695429397679306893, -0.027087023457207957261, -0.037865492526216820712, -0.017178820311233713308, -0.037086543898716496648, 0.014593291596970280055, -0.0029879266785189512179, 0.12551122978070125558, 0.23922654623041611499, 0.49782433286763683178, 0.98768627515810170081, 1.0869145050394330987, 0.66601437778113736776, 0.67187778857060287407, 0.663080503426190071, 0.67627141867934648101, 0.65647421427937902028, 0.68614414294517134252, 0.64158611076564242559, 0.70828643005086278084, 0.60798624557560698722, 0.7576791553065401752, 0.5320956270640739838, 0.865894348010412096, 0.36241579174811910935, 1.0862902782459773476, 0.015083764342983273232, 1.4407803230899152158, -0.4134231048981120149, 1.6325959428355898417, -0.025636829423459156496, 0.35643164379996317148, 0.36702534890342142226, 0.36084584306027167599, 0.37011409525472788173, 0.35620960128487694218, 0.37706107873461469637, 0.34577371807209933419, 0.39267620187338242443, 0.32227756830795128984, 0.42770918382207229191, 0.26940786815066769533, 0.50576986546214286466, 0.15128447230082164854, 0.6746707521505973526, -0.10088356984294755136, 0.98790416233553579595, -0.56629155287061583302, 1.1741965586676958999, -1.1304202757032262294, 0.037950554265357511197, -0.44334614883939615915, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=FALSE output=np.array([0.015162953484350691347, 0.017003239050418700318, 0.01389317445221215086, 0.01781864311343593632, 0.010107471620559638215, 0.018108516505766211746, -0.0037108407090391832411, 0.013008750547323907898, -0.048928829481261350287, -0.0040236561643654800147, 0.092210640717886122042, 0.1378672467189187012, 1.5241189647343453828, 1.1908052000719462349, 1.3336312280306561462, 0.1631244792608173011, 1.8059790376981266213, -0.013880364847569600512, 0.0084813946159772030109, 1.4226717441894676242, 0.085563268199896264088, 0.11668451836115162346, 0.071115177265946433183, 0.14186260471454884002, 0.041497176936461224739, 0.20374514230967810668, -0.0090968367480324351104, 0.36464078885855299994, -0.046829263271134111135, 0.78133519562325359775, 0.0086505068072265829671, 1.4941773335338757178, 0.083794480856750619413, 1.5469438185966697397, 1.4536819883528222519, 0.01009019548322653316, 0.24373154263518609364, -0.026484165693594415025, 0.45865093019281244491, 1.3856971378298170716, 1.0470804297077640488, 1.0403159202859915133, 1.0474228585236060596, 1.0301709677530419551, 1.0403805448593048677, 0.99263889345454958679, 0.98179764669852209735, 0.84465069052528973792, 0.662325916413059268, 0.31424672127915465047, -0.11097382584830663144, -0.73780817614306393182, 0.10634636847011158522, -1.0591219159940026273, -0.97302313835893716742, -0.14685901225266156933, 0.065659678129581511286, 0.92642956346749238961, 0.738106497929031935, -0.90865791610827972846, -0.14780665139201115688, -0.17400367769756186487, -0.13243121024176468525, -0.18985221558102688411, -0.091985193416325716775, -0.21449255226999386359, 0.031010030758549505386, -0.22230022993116671515, 0.43343217633933617261, -0.091558260738042823723, 1.0101126783231939577, 0.10576359589026917973, -0.71425981406120730988, -0.67862710267461334723, -0.81429007802454078657, 0.97364433750861778538, -1.115370258462895503, 0.11393496707367145593, -0.20523882273782165231, -0.89971096591100430118, ]).reshape((20, 4, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=FALSE output=np.array([1.0865454545429926135e-06, 2.4398181818138797716e-06, 5.4730227272719212013e-06, 1.2258383522732035908e-05, 2.7392640980115554039e-05, 6.0996505637431891467e-05, 0.0001350924767677991363, 0.00029670296713464952565, 0.00064309569467692253436, 0.0013643251258789318436, 0.0027908209016161266564, 0.0053380236469475778743, 0.0088335867924326759437, 0.0091533086384256254903, -0.014306039442788853755, -0.041433352007441685838, 0.31405170261824960631, 0.99790444618953877409, 0.027724626841434521496, 0.0068039590138881103284, 0.00064727402371540310524, -0.00098920483399211889264, 0.0014426438433794426357, -0.0022558485342761974701, 0.0031770096755419320253, -0.0052223605999905525765, 0.0068053308523184398438, -0.012426118572852515309, 0.013644913148864869917, -0.030618365664116811709, 0.023048059495840829414, -0.073679585582029943902, 0.022074755528899835111, -0.091195081533291866283, -0.0002259526328000187323, 0.78626962637629815855, -0.10027585488170688566, 0.03440373467483604647, 1.0473889505746853956, -0.065646799659779347946, -0.003108653786561336313, 0.0047444954940711828867, -0.0069334472845849707406, 0.010810919925889327445, -0.015293474668169363198, 0.024997761679919142108, -0.03288173605395704191, 0.059489869048364188431, -0.066524536226034367004, 0.14842140322069641289, -0.11509599902262178384, 0.38653940175293038939, -0.12009256540661154833, 0.89487060989842726055, 0.023974235745902267025, 0.30753674660261021145, 0.028225509751146779513, -0.016286171465612203635, -0.087188251557721807572, 0.95672514836045408071, 0.99996516654545464231, 0.99992170981818173647, 0.99982413427272720519, 0.99960527135227272844, 0.99911513172301125163, 0.99802008661061780703, 0.99558245566310810126, 0.99018628040560352499, 0.9783435545897927188, 0.95270542773753197352, 0.89842166335737938532, 0.7877650145033745499, 0.57753870046138200411, 0.23531461121027275052, -0.101438596654484528, -0.076220798157655950122, 0.080593293321039760624, 0.00045998670001021375875, 0.022032291428807804223, 0.12976646713984618664, 0.0031521796047431342204, -0.0046466867667984135259, 0.0071530971936759326843, -0.010318124209980239492, 0.01639748407299900651, -0.022529876245437452376, 0.03838002020499131639, -0.047302525716890531093, 0.093325747193657035417, -0.090212229951197098199, 0.23900090287817127299, -0.13160022708072441344, 0.61322255928469093789, -0.056958752001294875311, 0.958298331738895115, 0.012677886126789952859, -0.35059868303530600242, 0.01444622466557134513, -0.00094091415750929829898, -0.032059943389195363905, -0.00065705293280633844839, 0.00096724647035571764686, -0.0014919010479248961606, 0.0021455230825716264942, -0.0034235434443629607359, 0.0046733920492536690228, -0.008021163143228648984, 0.0097557918686406452924, -0.019432774400957149308, 0.018339439531206425116, -0.048165447610385929422, 0.025637372759501717012, -0.10157703666079390126, 0.0088153037874612247271, 0.13369802124527602194, 0.011169891059399323502, 1.0280040322265766584, -0.030928220764344298982, -0.0090167031296965738374, 0.004411168534786328102, ]).reshape((20, 6, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=4 knots=None absorb_cons=TRUE output=np.array([0.19511874223105002413, 0.19328136140945711974, 0.19603076753463616333, 0.19189278126562986371, 0.19806475922765381936, 0.18874078982063985377, 0.20253543882853958236, 0.18155709647843842891, 0.21199988818367701549, 0.16514182390533327371, 0.23031856584684334832, 0.12705798198206638694, 0.25709528872115156028, 0.036195691844765578993, 0.25138520448010354125, -0.18708237186262471896, -0.013972372776853255599, -0.70106203022920721146, -0.96214970344564842986, -0.96214970344564820781, 0.68403334135296989249, 0.60311193997153489388, 0.72443479807328026165, 0.54235372240853074732, 0.81513433908152477247, 0.40561204773518105826, 1.0177088251859824908, 0.099342480096702004411, 1.4639211866974952692, -0.57590399808120096292, 2.4193104627273829266, -2.0236042389889199455, 4.3297301577097133674, -4.928177533945309996, 7.4943452610127803126, -9.7886574066511276015, 9.753529003891806326, -13.510350272896850754, 0.23706294230926167055, 0.23706294230926180933, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=7 knots=None absorb_cons=TRUE output=np.array([-0.09996264143545301184, -0.11347888802625775984, -0.085118834474127241796, -0.10911447157987791412, -0.0376192915747680795, -0.064440162552667323848, 0.093968376147604365523, 0.11142321074687944193, 0.31776266530375074648, 0.45570604000055198179, 0.46380277431584160386, 0.89300636651257359055, -0.14510213799700033555, 0.96333821295332311418, -1.5132894536153895793, 0.82432258344543585249, -2.5988714669036583338, 0.92635413953045664393, -0.14134351039860873489, -0.14134351039860867938, 0.23834091756376188376, 0.35248629179932566835, 0.18039220953514537027, 0.43583938337484684489, 0.050144193047881382985, 0.61552699880967853829, -0.2237848683809608441, 0.97413442320198884161, -0.67208413897185625974, 1.5362399491838241161, -0.95632335803874857216, 1.7726129713790426123, 0.31347441526077174379, -0.38061109255151409636, 3.2553930274666940115, -5.093077784759170612, 6.0469258529758489473, -9.2083208514013143997, 0.38134573025237739063, 0.38134573025237838984, 0.36086943997122683525, 0.36667024194421360406, 0.35446911502732253441, 0.36478130178265055772, 0.33382084885464979873, 0.34553973621188488474, 0.27476295985860993421, 0.26982601949673939989, 0.15921647742368139067, 0.1214671481463027447, -0.0048407176404455702981, -0.067803846828875705133, -0.11067535403631012514, -0.10302868958722773141, -0.2301530127015978533, -0.064773144847515770617, -0.49542622407015213248, -0.23755670608026469015, -0.81858279646244769268, -0.81858279646244602734, 0.36026977248710895241, 0.23461157027634554906, 0.43091928395041323441, 0.1550818089508944897, 0.60166186948369282383, 0.013499889397706944794, 0.98534266394190206384, -0.20102910227880538274, 1.5995282267915371666, -0.53077336516331363736, 1.8709434423002264669, -0.7329934007992182643, -0.41020716027288028904, 0.26173115951276498814, -5.3296664149739436667, 2.8167844608879319956, -9.2446193976201840314, 6.3595195952585372723, 0.3796975489346396393, 0.37969754893464241485, -0.09342094101113124216, -0.10518600840242132988, -0.080006629364737152921, -0.10062511887211006423, -0.036336786815554571184, -0.058504221090871715016, 0.089578204326735660912, 0.10154568474713222825, 0.33951493212221051099, 0.38266890106103862434, 0.7115379830932434535, 0.57066292250314121226, 1.0142476918808467357, -0.17457778635418924607, 1.3394445526441309813, -1.8727860878377164955, 1.5244315420174636699, -3.2891836324660186364, -0.13150260109059599967, -0.13150260109059619396, ]).reshape((20, 5, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=10 knots=None absorb_cons=TRUE output=np.array([-0.010422589164110178037, 0.0456424386898241774, -0.037865479111961315717, 0.067665213698612183824, -0.082737354894918610504, 0.028116719037984471558, -0.13142165397961641515, -0.22684145450558812485, -0.14183761903259869963, -0.4397636102098926858, 0.038753040222973357543, 0.16687260126443773101, 0.24596836576748323799, 1.1496535841573791714, -0.11141233683909176899, 0.852150817442395625, -0.69719802631935023918, -0.16780336922215680073, -0.27375964350090231658, -0.27375964350090231658, 0.00075686991514895495502, -0.0026399619145559983374, 0.002329286728553797009, -0.0046772664281081481261, 0.0050464211301084280908, -0.0053347974019967861828, 0.0084202195193733477691, 0.025683527700154588219, 0.0094683775407149051856, 0.27987678132954318944, -0.0027668876775316930683, 1.1881421141921484352, -0.020680289816708941064, 1.9619971627400687453, 0.011350885804312933758, -0.30799503353642232462, 0.28512530049421225264, -3.4687076070237505832, 0.017302448352367463119, 0.017302448352367227197, -0.031423668563904168238, 0.092056168183617143042, -0.085784223470207240925, 0.18847060597276241167, -0.18459182127963985609, 0.39491427056812172802, -0.32011917407772511046, 0.75400611255752592044, -0.36683893574186782205, 0.9123381072270769776, 0.10368560537272084043, -0.284896787114018335, 0.65873659367957215771, -1.7915854614656305799, -0.28317998595666565853, 0.16934785893573317539, -2.1208025768458700355, 3.4983918640596205485, -0.65136227602061025355, -0.65136227602061058661, 0.15390547566386716949, 0.72999661431747953078, -0.052325183145307693766, 1.0578646982284642952, -0.17410117772396246916, 1.0826889596002884364, 0.28795108435587113416, -0.33988255901594949249, 0.75706886705991383035, -1.8010717324191700683, -0.27609951573452540252, 0.70199292810649616126, -1.6861943363558831166, 4.2694489532922874631, 0.3883483777323971653, -0.72860244126989648361, 4.3993879828572435287, -9.3213910982477266032, 0.27550705134905656424, 0.27550705134905634219, 0.88473558816481134848, 0.28118622960754013906, 1.0363915332292370497, -0.13821893333931381531, 0.86682756892064050103, -0.46428766146855998675, -0.49858487332948592252, 0.19046189397427254475, -1.804569129824072693, 0.94889775503965156922, 0.68883320796217106796, -0.36989366480027330297, 4.1648803055867107403, -2.2675263648363670477, -0.80718694983894612172, 0.41585628137205921107, -9.3512295591515375293, 6.4594889011881138075, -0.11803106422832626565, -0.11803106422832615463, -0.04700419996957900387, 0.068365309897369067049, -0.04054662542658848956, 0.14767281664377382433, 0.18375758144525122795, 0.096639366434089990499, 1.0236374391375679949, -0.47105831760443933609, 1.6290138304390200386, -0.95892121020674836451, -0.57444021027476643848, 0.35538961856435591358, -3.4443839310068100623, 2.1037278825342364819, 0.51445168623435322264, -0.5476416666229180219, 7.3133339883682770832, -6.2747526284605275038, -0.5386203650629584061, -0.53862036506295885019, -0.00033055953514735743511, 0.0034830998631877898263, -0.0031226317036678898596, 0.0044763982618290778723, -0.0095705473805210890259, 0.0013288993039827501352, 0.0024048160547256620945, -0.015334684583203243846, 0.24302632748633656856, -0.02859075483432030676, 1.2008463040646732978, 0.01005831734477347951, 2.0397829199332155881, 0.047246265215163486817, -0.33369939987991892316, -0.0017774801323981149429, -3.8176499659498723815, 0.69539341686864553083, -0.018985370198741847308, -0.01898537019874227405, -0.0098485472403549607912, 0.043107558972873410985, -0.035757108373310586558, 0.063906922484069789148, -0.078090074289757671622, 0.026516000447580879984, -0.12429507538278902756, -0.2141005094608952608, -0.13680420974781898336, -0.41275099307476753596, 0.042680600816607877634, 0.15199473312943939662, 0.44252834874998403514, 0.8905421952388906659, 0.99056537832665858545, -0.23331489131404251069, 1.3587540460951703647, -2.2485068612054024406, -0.25856375708606771369, -0.25856375708606793573, ]).reshape((20, 8, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=12 knots=None absorb_cons=TRUE output=np.array([-0.07766833740489995086, -0.10190625722373891637, -0.060810191651491825315, -0.095843436838280671175, -0.026956945198949221199, -0.02140480076346930674, -0.0044354303173668322216, 0.068142865158757101596, -0.10242235056407339444, -0.134962672377622106, -0.17923996606808398346, -0.32198261136183764508, 0.061769798735937714307, 0.22368036323142082011, 0.1062350870161039712, 0.86684951229524320748, -0.41431550373346520688, 0.82628193491032586593, -0.30550552892225468771, -0.30550552892225446566, -0.046843894383066776721, -0.061472670935691822725, -0.036665627178526026542, -0.057737114955866711274, -0.016253728913666141281, -0.012788703169830750381, -0.0026924622704371371611, 0.037130318979860681139, -0.061754253708923484423, -0.072908208132761248566, -0.10784991719446739011, 0.15587963325803569359, 0.035948683163131246732, 1.5026771080647596257, 0.054139137539069064065, 1.2144591386571002012, -0.12227506507632453225, -2.0325190387816292059, -0.18423666748038231011, -0.1842366674803827542, -0.10253186808225246696, -0.1341028859441055765, -0.080717296578839040078, -0.12938111733003612724, -0.035779041281597902258, -0.033296176820796892504, -0.0052764476269259964411, 0.29121358356235121922, -0.13571696227220694797, 0.9800391391797497187, -0.23836784885271891654, 1.1531098931327661283, 0.083488884262169646422, -0.95923704223174655059, 0.14850642088377782879, -1.3982914277816191895, -0.63569333679288853034, 2.040472414748102814, -0.40421944208659177367, -0.40421944208659132958, -0.014736324481430123495, -0.036942635828954392685, 0.0066553430184999587058, 0.1067120469886325973, 0.0028385673937536314362, 0.84576148428624664533, -0.024675731668667714308, 1.6253851750682712929, 0.0013193004864854235899, -0.44631406733607736914, 0.032762082318650569135, -2.4125076929166153761, -0.03606306801106605453, 2.0357000324722602791, -0.069357883037490283828, 2.9830669079191580728, 0.34150454252272821609, -4.9007299602670126859, -0.020189059463686463008, -0.020189059463686463008, -0.012482766482938594671, 0.70375642696389684172, -0.20381708316356489386, 0.93679207951689691392, -0.089167436412632875009, 0.2081777984642331214, 0.25130986547048167479, -1.1517493252694694927, -0.23847465841613846305, 0.21373269726886048869, -0.73521082895311973271, 1.7985010371260992112, 0.48954351921215394361, -1.6483400566813448673, 0.75390085346775681518, -2.4179886586261010173, -1.7764330770220631006, 3.8309842164304526158, -0.45651730144672891809, -0.45651730144672891809, 0.86855075539761661396, 0.098258969106073332234, 0.91310939823129155801, -0.32540547292490512588, 0.16725908432707276008, -0.11995362876401588492, -1.021344015679647832, 0.58623523352093998007, 0.23016332628828412021, -0.31087640693906454237, 1.6549915252178080127, -1.2624926460002787731, -1.4964629433416147819, 0.98706595992626866565, -2.1957720652086201163, 1.4954532454044999756, 3.619128405599101761, -3.2523549448400173034, -0.31777688966039602247, -0.31777688966039646656, -0.068946719872015810315, -0.063498950974957252269, 0.10999032024314105149, 0.0059748385118500793589, 0.84565340326272575577, 0.0088822808061620184272, 1.5829407256504981483, -0.060957945254261673707, -0.45742582387218383433, -0.014951096212291247606, -2.381969985853732652, 0.051586837169672689063, 1.9887723465897544362, -0.077591047241966609738, 2.9302744923046657455, -0.16817713282609225711, -5.0351646834625327642, 0.99618007435765298929, -0.095785966663044092018, -0.095785966663044536107, -0.092425948063627971774, -0.12192256208017497598, -0.076168790113220086191, -0.11625762092899732947, -0.037600709993932460162, -0.026143656852116337053, 0.19673306138614868321, 0.083917187234610293722, 1.0347219995539065351, -0.16290214117344051226, 1.3532959167438192161, -0.38419175563061558343, -1.1275995644118568251, 0.22887092989596394266, -1.6446009002954922185, 0.37556943100676937197, 2.5636651809434143345, -1.3113571002380715047, -0.36780147849154309903, -0.36780147849154309903, -0.052133443997911740886, -0.06838748447056271218, -0.040727650559123369789, -0.064279681235506633263, -0.017963836831353656442, -0.014349195077656596783, -0.0068956265034833194583, 0.045590399764175990049, -0.060364684525170013429, -0.090361397122777853408, 0.22579126263553786491, -0.21095860924503515021, 1.4340181609862545642, 0.11958124964793707956, 1.1233531600390498362, 0.15886424366674492537, -1.9608032079784090129, -0.11004527519600627861, -0.20496419199835186342, -0.20496419199835186342, -0.074397287283336693831, -0.097615070510572624074, -0.058251260498594206738, -0.091806644267701589812, -0.02582498595211388942, -0.020500802148151781035, -0.0041449721977959343849, 0.065178307797134571455, -0.098329868155822383602, -0.12907473963965984676, -0.17815101427258792088, -0.30240999889208236562, 0.098829510321402941342, 0.17727868088912610012, 0.68781535762400658474, 0.27674812243204571782, 1.1588192944220654379, -0.7988804231889805596, -0.29264110323919079182, -0.2926411032391906808, ]).reshape((20, 10, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=4 knots=np.array([-2500, -150, 300, 1500.0000000000000000, ]) absorb_cons=TRUE output=np.array([0.12313452571054940565, 0.12624926449090670411, 0.12157389196350212868, 0.12857954187334583174, 0.11805461329003080762, 0.13380425540627891023, 0.11009731641023581816, 0.14546384037961754276, 0.092005873745000077424, 0.17117971635322848378, 0.050459076159657625937, 0.22605161337519041886, -0.04604552699950949235, 0.33036931836460758927, -0.26444093870479618014, 0.45978406857873022062, -0.66227507648797157014, 0.32946799433163242998, -0.92301557680450230237, -0.77049779143573060569, 0.15656827198639958199, 0.14932319431509266661, 0.16018937960169490897, 0.14388705697943621198, 0.1683330978621291607, 0.13164928933754893592, 0.18663507205881160744, 0.10408933274661898161, 0.22768017116328620464, 0.042040878816280137509, 0.31905484877266732102, -0.096761998947504118673, 0.51632342198274205902, -0.39392983963941236025, 0.87954141557829357279, -0.94659228313998333704, 1.0777246444382655444, -1.6356011142349877741, -0.30993435496630139525, -0.88022048471107694478, ]).reshape((20, 2, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=5 knots=np.array([-400, -50, 10, 50, 100, ]) absorb_cons=TRUE output=np.array([-0.16508056690578271075, -0.14355192166054722702, -0.17398515944924686227, -0.12432844320614395983, -0.18919890928728577695, -0.072282554700368567557, -0.19674137112973341668, 0.081531091765042931763, -0.099155660656600808078, 0.52456028958996936407, 0.059704023721638257993, 1.2036951770569246367, -0.95698314791898375642, 0.59171921208541256032, 0.40429183055884826414, 0.021134682418438101997, -1.035862596202561825, -0.18547278101676506479, 0.17821896181573629314, 0.27778784312200943685, 0.84139218015539352091, 0.82675559954147825881, 0.84628382311749461575, 0.8117240601311839221, 0.85106220741071658686, 0.76611964233460772, 0.82780911632526033017, 0.61231966988832331467, 0.61938480406110207888, 0.10352707230517528558, -0.069077289581256251516, -0.9761893286926568436, -0.74775693930080122662, -1.843017028194991358, -1.8341942282567624822, -0.13779546922177127177, -0.98442590823258591115, 0.79595721424418131029, 0.50797292743971100837, -1.8178521254738027046, -0.057675412235671304206, -0.080422948680625480633, -0.044293406826563193335, -0.094130216309427472421, -0.0090271421596601793269, -0.11523330512953809668, 0.098486792744760950913, -0.12049430358370501193, 0.45224868955998115405, 0.00077770457813844623505, 0.99175389147792825106, 0.21022065027995529007, -0.33999735782244788762, -0.33512941632144105375, -0.43693056610772496029, 0.96967275251871798414, -0.65522965575463965049, 0.17110709302995569248, -0.10439580460326271238, -0.50130803865473083647, ]).reshape((20, 3, ), order="F") --END TEST CASE-- --BEGIN TEST CASE-- spline_type=cc nb_knots=7 knots=np.array([-1000, -500, -250, 0, 250, 500, 1000, ]) absorb_cons=TRUE output=np.array([-0.10421720382140421679, -0.10591736328552885105, -0.10337614089781445303, -0.10720759929942245969, -0.10150561917996400729, -0.11015275422250632442, -0.097405901413871925909, -0.11694382088021060273, -0.088713078413449955595, -0.13250065888393972036, -0.07161679548265666162, -0.1625759820148384438, -0.043015722854652924112, -0.13320201735142009336, 0.0004329669515455780238, 0.78255062519422247114, -0.15088551365188454523, -0.085517465675143913839, 1.0298684606084522741, -0.098098415425510815391, -0.16366514384894217948, -0.15590949502662956871, -0.16741997729436150499, -0.14987919244601818902, -0.17557153836450387074, -0.13565965107884025986, -0.1924380512291224421, -0.10053567798805967959, -0.22324338128005483117, -0.0075691069598905688642, -0.26003599357502127365, 0.25043141090853648922, -0.21975165836638141381, 0.83055439613540549271, 0.024983098021001227257, 0.30184263751436557488, -0.049262209087217581627, -0.19989579226301767978, -0.11401362112954462058, 0.90703894735829693197, 0.15993324209937159752, 0.15937966275042819708, 0.16015824098241202011, 0.15887423467611411221, 0.16053993466300625625, 0.15746013438548811525, 0.16078344852210443849, 0.15293225108971789106, 0.15839019763395165152, 0.13656272051085360797, 0.14009527925573453389, 0.075647694069404219919, 0.05612209740148604159, -0.10118796678857061577, -0.096160226916063087921, -0.10601238997400461161, -0.32482262945752882199, -0.96018597065328126217, -0.11831810372893719618, -0.13019185052168549821, -0.080994950879331353844, -0.088844916921682309807, -0.07695736745600667561, -0.094535285708752150868, -0.067603724064711254171, -0.10672989998407718393, -0.045242921897107997442, -0.13117139070034619652, 0.011189913886947294372, -0.17196634533664997835, 0.16303832653371333472, -0.20293397921406119977, 0.56099155513187193289, -0.090666668944769990279, 0.95882707315240156554, 0.0096936222066400009462, -0.39120974264100472073, -0.081782976118326314308, -0.015000002474441720413, -0.058100318570304822219, -0.06971270937892765085, -0.068130345111184043017, -0.070517467540312656071, -0.066967604576931502591, -0.072359448172744492145, -0.064425671443484225365, -0.076646642438798442964, -0.059071504956647494233, -0.086837852317636618493, -0.048752380149832678924, -0.11050441917873819742, -0.032902936957473871704, -0.1444406050066643632, -0.018847223960927635827, 0.13413193485813351691, 0.0087208437439542339786, 0.9946764147081772478, -0.10989907326032234691, -0.020554346388292375064, -0.016958962471346181872, ]).reshape((20, 5, ), order="F") --END TEST CASE-- """ R_crs_num_tests = 42 patsy-0.5.2/patsy/test_state.py000066400000000000000000000175221412400214200165440ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012-2013 Nathaniel Smith # See file LICENSE.txt for license information. from __future__ import print_function import numpy as np from patsy.state import Center, Standardize, center from patsy.util import atleast_2d_column_default def check_stateful(cls, accepts_multicolumn, input, output, *args, **kwargs): input = np.asarray(input) output = np.asarray(output) test_cases = [ # List input, one chunk ([input], output), # Scalar input, many chunks (input, output), # List input, many chunks: ([[n] for n in input], output), # 0-d array input, many chunks: ([np.array(n) for n in input], output), # 1-d array input, one chunk: ([np.array(input)], output), # 1-d array input, many chunks: ([np.array([n]) for n in input], output), # 2-d but 1 column input, one chunk: ([np.array(input)[:, None]], atleast_2d_column_default(output)), # 2-d but 1 column input, many chunks: ([np.array([[n]]) for n in input], atleast_2d_column_default(output)), ] if accepts_multicolumn: # 2-d array input, one chunk: test_cases += [ ([np.column_stack((input, input[::-1]))], np.column_stack((output, output[::-1]))), # 2-d array input, many chunks: ([np.array([[input[i], input[-i-1]]]) for i in range(len(input))], np.column_stack((output, output[::-1]))), ] from patsy.util import have_pandas if have_pandas: import pandas pandas_type = (pandas.Series, pandas.DataFrame) pandas_index = np.linspace(0, 1, num=len(input)) # 1d and 2d here refer to the dimensionality of the input if output.ndim == 1: output_1d = pandas.Series(output, index=pandas_index) else: output_1d = pandas.DataFrame(output, index=pandas_index) test_cases += [ # Series input, one chunk ([pandas.Series(input, index=pandas_index)], output_1d), # Series input, many chunks ([pandas.Series([x], index=[idx]) for (x, idx) in zip(input, pandas_index)], output_1d), ] if accepts_multicolumn: input_2d_2col = np.column_stack((input, input[::-1])) output_2d_2col = np.column_stack((output, output[::-1])) output_2col_dataframe = pandas.DataFrame(output_2d_2col, index=pandas_index) test_cases += [ # DataFrame input, one chunk ([pandas.DataFrame(input_2d_2col, index=pandas_index)], output_2col_dataframe), # DataFrame input, many chunks ([pandas.DataFrame([input_2d_2col[i, :]], index=[pandas_index[i]]) for i in range(len(input))], output_2col_dataframe), ] for input_obj, output_obj in test_cases: print(input_obj) t = cls() for input_chunk in input_obj: t.memorize_chunk(input_chunk, *args, **kwargs) t.memorize_finish() all_outputs = [] for input_chunk in input_obj: output_chunk = t.transform(input_chunk, *args, **kwargs) if input.ndim == output.ndim: assert output_chunk.ndim == np.asarray(input_chunk).ndim all_outputs.append(output_chunk) if have_pandas and isinstance(all_outputs[0], pandas_type): all_output1 = pandas.concat(all_outputs) assert np.array_equal(all_output1.index, pandas_index) elif all_outputs[0].ndim == 0: all_output1 = np.array(all_outputs) elif all_outputs[0].ndim == 1: all_output1 = np.concatenate(all_outputs) else: all_output1 = np.row_stack(all_outputs) assert all_output1.shape[0] == len(input) # output_obj_reshaped = np.asarray(output_obj).reshape(all_output1.shape) # assert np.allclose(all_output1, output_obj_reshaped) assert np.allclose(all_output1, output_obj) if np.asarray(input_obj[0]).ndim == 0: all_input = np.array(input_obj) elif have_pandas and isinstance(input_obj[0], pandas_type): # handles both Series and DataFrames all_input = pandas.concat(input_obj) elif np.asarray(input_obj[0]).ndim == 1: # Don't use row_stack, because that would turn this into a 1xn # matrix: all_input = np.concatenate(input_obj) else: all_input = np.row_stack(input_obj) all_output2 = t.transform(all_input, *args, **kwargs) if have_pandas and isinstance(input_obj[0], pandas_type): assert np.array_equal(all_output2.index, pandas_index) if input.ndim == output.ndim: assert all_output2.ndim == all_input.ndim assert np.allclose(all_output2, output_obj) def test_Center(): check_stateful(Center, True, [1, 2, 3], [-1, 0, 1]) check_stateful(Center, True, [1, 2, 1, 2], [-0.5, 0.5, -0.5, 0.5]) check_stateful(Center, True, [1.3, -10.1, 7.0, 12.0], [-1.25, -12.65, 4.45, 9.45]) def test_stateful_transform_wrapper(): assert np.allclose(center([1, 2, 3]), [-1, 0, 1]) assert np.allclose(center([1, 2, 1, 2]), [-0.5, 0.5, -0.5, 0.5]) assert center([1.0, 2.0, 3.0]).dtype == np.dtype(float) assert (center(np.array([1.0, 2.0, 3.0], dtype=np.float32)).dtype == np.dtype(np.float32)) assert center([1, 2, 3]).dtype == np.dtype(float) from patsy.util import have_pandas if have_pandas: import pandas s = pandas.Series([1, 2, 3], index=["a", "b", "c"]) df = pandas.DataFrame([[1, 2], [2, 4], [3, 6]], columns=["x1", "x2"], index=[10, 20, 30]) s_c = center(s) assert isinstance(s_c, pandas.Series) assert np.array_equal(s_c.index, ["a", "b", "c"]) assert np.allclose(s_c, [-1, 0, 1]) df_c = center(df) assert isinstance(df_c, pandas.DataFrame) assert np.array_equal(df_c.index, [10, 20, 30]) assert np.array_equal(df_c.columns, ["x1", "x2"]) assert np.allclose(df_c, [[-1, -2], [0, 0], [1, 2]]) def test_Standardize(): check_stateful(Standardize, True, [1, -1], [1, -1]) check_stateful(Standardize, True, [12, 10], [1, -1]) check_stateful(Standardize, True, [12, 11, 10], [np.sqrt(3./2), 0, -np.sqrt(3./2)]) check_stateful(Standardize, True, [12.0, 11.0, 10.0], [np.sqrt(3./2), 0, -np.sqrt(3./2)]) # XX: see the comment in Standardize.transform about why this doesn't # work: # check_stateful(Standardize, # [12.0+0j, 11.0+0j, 10.0], # [np.sqrt(3./2)+0j, 0, -np.sqrt(3./2)]) r20 = list(range(20)) check_stateful(Standardize, True, [1, -1], [np.sqrt(2)/2, -np.sqrt(2)/2], ddof=1) check_stateful(Standardize, True, r20, list((np.arange(20) - 9.5) / 5.7662812973353983), ddof=0) check_stateful(Standardize, True, r20, list((np.arange(20) - 9.5) / 5.9160797830996161), ddof=1) check_stateful(Standardize, True, r20, list((np.arange(20) - 9.5)), rescale=False, ddof=1) check_stateful(Standardize, True, r20, list(np.arange(20) / 5.9160797830996161), center=False, ddof=1) check_stateful(Standardize, True, r20, r20, center=False, rescale=False, ddof=1) patsy-0.5.2/patsy/tokens.py000066400000000000000000000177431412400214200156750ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011 Nathaniel Smith # See file LICENSE.txt for license information. # Utilities for dealing with Python code at the token level. # # Includes: # a "pretty printer" that converts a sequence of tokens back into a # readable, white-space normalized string. # a utility function to replace calls to global functions with calls to # other functions import tokenize from six.moves import cStringIO as StringIO from patsy import PatsyError from patsy.origin import Origin __all__ = ["python_tokenize", "pretty_untokenize", "normalize_token_spacing"] # A convenience wrapper around tokenize.generate_tokens. yields tuples # (tokenize type, token string, origin object) def python_tokenize(code): # Since formulas can only contain Python expressions, and Python # expressions cannot meaningfully contain newlines, we'll just remove all # the newlines up front to avoid any complications: code = code.replace("\n", " ").strip() it = tokenize.generate_tokens(StringIO(code).readline) try: for (pytype, string, (_, start), (_, end), code) in it: if pytype == tokenize.ENDMARKER: break origin = Origin(code, start, end) assert pytype != tokenize.NL if pytype == tokenize.NEWLINE: assert string == "" continue if pytype == tokenize.ERRORTOKEN: raise PatsyError("error tokenizing input " "(maybe an unclosed string?)", origin) if pytype == tokenize.COMMENT: raise PatsyError("comments are not allowed", origin) yield (pytype, string, origin) else: # pragma: no cover raise ValueError("stream ended without ENDMARKER?!?") except tokenize.TokenError as e: # TokenError is raised iff the tokenizer thinks that there is # some sort of multi-line construct in progress (e.g., an # unclosed parentheses, which in Python lets a virtual line # continue past the end of the physical line), and it hits the # end of the source text. We have our own error handling for # such cases, so just treat this as an end-of-stream. # # Just in case someone adds some other error case: assert e.args[0].startswith("EOF in multi-line") return def test_python_tokenize(): code = "a + (foo * -1)" tokens = list(python_tokenize(code)) expected = [(tokenize.NAME, "a", Origin(code, 0, 1)), (tokenize.OP, "+", Origin(code, 2, 3)), (tokenize.OP, "(", Origin(code, 4, 5)), (tokenize.NAME, "foo", Origin(code, 5, 8)), (tokenize.OP, "*", Origin(code, 9, 10)), (tokenize.OP, "-", Origin(code, 11, 12)), (tokenize.NUMBER, "1", Origin(code, 12, 13)), (tokenize.OP, ")", Origin(code, 13, 14))] assert tokens == expected code2 = "a + (b" tokens2 = list(python_tokenize(code2)) expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)), (tokenize.OP, "+", Origin(code2, 2, 3)), (tokenize.OP, "(", Origin(code2, 4, 5)), (tokenize.NAME, "b", Origin(code2, 5, 6))] assert tokens2 == expected2 import pytest pytest.raises(PatsyError, list, python_tokenize("a b # c")) import pytest pytest.raises(PatsyError, list, python_tokenize("a b \"c")) _python_space_both = (list("+-*/%&^|<>") + ["==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//"]) _python_space_before = (_python_space_both + ["!", "~"]) _python_space_after = (_python_space_both + [",", ":"]) def pretty_untokenize(typed_tokens): text = [] prev_was_space_delim = False prev_wants_space = False prev_was_open_paren_or_comma = False prev_was_object_like = False brackets = [] for token_type, token in typed_tokens: assert token_type not in (tokenize.INDENT, tokenize.DEDENT, tokenize.NL) if token_type == tokenize.NEWLINE: continue if token_type == tokenize.ENDMARKER: continue if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING): if prev_wants_space or prev_was_space_delim: text.append(" ") text.append(token) prev_wants_space = False prev_was_space_delim = True else: if token in ("(", "[", "{"): brackets.append(token) elif brackets and token in (")", "]", "}"): brackets.pop() this_wants_space_before = (token in _python_space_before) this_wants_space_after = (token in _python_space_after) # Special case for slice syntax: foo[:10] # Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..." if token == ":" and brackets and brackets[-1] == "[": this_wants_space_after = False # Special case for foo(*args), foo(a, *args): if token in ("*", "**") and prev_was_open_paren_or_comma: this_wants_space_before = False this_wants_space_after = False # Special case for "a = foo(b=1)": if token == "=" and not brackets: this_wants_space_before = True this_wants_space_after = True # Special case for unary -, +. Our heuristic is that if we see the # + or - after something that looks like an object (a NAME, # NUMBER, STRING, or close paren) then it is probably binary, # otherwise it is probably unary. if token in ("+", "-") and not prev_was_object_like: this_wants_space_before = False this_wants_space_after = False if prev_wants_space or this_wants_space_before: text.append(" ") text.append(token) prev_wants_space = this_wants_space_after prev_was_space_delim = False if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING) or token == ")"): prev_was_object_like = True else: prev_was_object_like = False prev_was_open_paren_or_comma = token in ("(", ",") return "".join(text) def normalize_token_spacing(code): tokens = [(t[0], t[1]) for t in tokenize.generate_tokens(StringIO(code).readline)] return pretty_untokenize(tokens) def test_pretty_untokenize_and_normalize_token_spacing(): assert normalize_token_spacing("1 + 1") == "1 + 1" assert normalize_token_spacing("1+1") == "1 + 1" assert normalize_token_spacing("1*(2+3**2)") == "1 * (2 + 3 ** 2)" assert normalize_token_spacing("a and b") == "a and b" assert normalize_token_spacing("foo(a=bar.baz[1:])") == "foo(a=bar.baz[1:])" assert normalize_token_spacing("""{"hi":foo[:]}""") == """{"hi": foo[:]}""" assert normalize_token_spacing("""'a' "b" 'c'""") == """'a' "b" 'c'""" assert normalize_token_spacing('"""a""" is 1 or 2==3') == '"""a""" is 1 or 2 == 3' assert normalize_token_spacing("foo ( * args )") == "foo(*args)" assert normalize_token_spacing("foo ( a * args )") == "foo(a * args)" assert normalize_token_spacing("foo ( ** args )") == "foo(**args)" assert normalize_token_spacing("foo ( a ** args )") == "foo(a ** args)" assert normalize_token_spacing("foo (1, * args )") == "foo(1, *args)" assert normalize_token_spacing("foo (1, a * args )") == "foo(1, a * args)" assert normalize_token_spacing("foo (1, ** args )") == "foo(1, **args)" assert normalize_token_spacing("foo (1, a ** args )") == "foo(1, a ** args)" assert normalize_token_spacing("a=foo(b = 1)") == "a = foo(b=1)" assert normalize_token_spacing("foo(+ 10, bar = - 1)") == "foo(+10, bar=-1)" assert normalize_token_spacing("1 + +10 + -1 - 5") == "1 + +10 + -1 - 5" patsy-0.5.2/patsy/user_util.py000066400000000000000000000215721412400214200164000ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2012 Nathaniel Smith # See file LICENSE.txt for license information. # Miscellaneous utilities that are useful to users (as compared to # patsy.util, which is misc. utilities useful for implementing patsy). # These are made available in the patsy.* namespace __all__ = ["balanced", "demo_data", "LookupFactor"] import itertools import numpy as np from patsy import PatsyError from patsy.categorical import C from patsy.util import no_pickling, assert_no_pickling def balanced(**kwargs): """balanced(factor_name=num_levels, [factor_name=num_levels, ..., repeat=1]) Create simple balanced factorial designs for testing. Given some factor names and the number of desired levels for each, generates a balanced factorial design in the form of a data dictionary. For example: .. ipython:: In [1]: balanced(a=2, b=3) Out[1]: {'a': ['a1', 'a1', 'a1', 'a2', 'a2', 'a2'], 'b': ['b1', 'b2', 'b3', 'b1', 'b2', 'b3']} By default it produces exactly one instance of each combination of levels, but if you want multiple replicates this can be accomplished via the `repeat` argument: .. ipython:: In [2]: balanced(a=2, b=2, repeat=2) Out[2]: {'a': ['a1', 'a1', 'a2', 'a2', 'a1', 'a1', 'a2', 'a2'], 'b': ['b1', 'b2', 'b1', 'b2', 'b1', 'b2', 'b1', 'b2']} """ repeat = kwargs.pop("repeat", 1) levels = [] names = sorted(kwargs) for name in names: level_count = kwargs[name] levels.append(["%s%s" % (name, i) for i in range(1, level_count + 1)]) # zip(*...) does an "unzip" values = zip(*itertools.product(*levels)) data = {} for name, value in zip(names, values): data[name] = list(value) * repeat return data def test_balanced(): data = balanced(a=2, b=3) assert data["a"] == ["a1", "a1", "a1", "a2", "a2", "a2"] assert data["b"] == ["b1", "b2", "b3", "b1", "b2", "b3"] data = balanced(a=2, b=3, repeat=2) assert data["a"] == ["a1", "a1", "a1", "a2", "a2", "a2", "a1", "a1", "a1", "a2", "a2", "a2"] assert data["b"] == ["b1", "b2", "b3", "b1", "b2", "b3", "b1", "b2", "b3", "b1", "b2", "b3"] def demo_data(*names, **kwargs): """demo_data(*names, nlevels=2, min_rows=5) Create simple categorical/numerical demo data. Pass in a set of variable names, and this function will return a simple data set using those variable names. Names whose first letter falls in the range "a" through "m" will be made categorical (with `nlevels` levels). Those that start with a "p" through "z" are numerical. We attempt to produce a balanced design on the categorical variables, repeating as necessary to generate at least `min_rows` data points. Categorical variables are returned as a list of strings. Numerical data is generated by sampling from a normal distribution. A fixed random seed is used, so that identical calls to demo_data() will produce identical results. Numerical data is returned in a numpy array. Example: .. ipython: In [1]: patsy.demo_data("a", "b", "x", "y") Out[1]: {'a': ['a1', 'a1', 'a2', 'a2', 'a1', 'a1', 'a2', 'a2'], 'b': ['b1', 'b2', 'b1', 'b2', 'b1', 'b2', 'b1', 'b2'], 'x': array([ 1.76405235, 0.40015721, 0.97873798, 2.2408932 , 1.86755799, -0.97727788, 0.95008842, -0.15135721]), 'y': array([-0.10321885, 0.4105985 , 0.14404357, 1.45427351, 0.76103773, 0.12167502, 0.44386323, 0.33367433])} """ nlevels = kwargs.pop("nlevels", 2) min_rows = kwargs.pop("min_rows", 5) if kwargs: raise TypeError("unexpected keyword arguments %r" % (kwargs,)) numerical = set() categorical = {} for name in names: if name[0] in "abcdefghijklmn": categorical[name] = nlevels elif name[0] in "pqrstuvwxyz": numerical.add(name) else: raise PatsyError("bad name %r" % (name,)) balanced_design_size = np.prod(list(categorical.values()), dtype=int) repeat = int(np.ceil(min_rows * 1.0 / balanced_design_size)) num_rows = repeat * balanced_design_size data = balanced(repeat=repeat, **categorical) r = np.random.RandomState(0) for name in sorted(numerical): data[name] = r.normal(size=num_rows) return data def test_demo_data(): d1 = demo_data("a", "b", "x") assert sorted(d1.keys()) == ["a", "b", "x"] assert d1["a"] == ["a1", "a1", "a2", "a2", "a1", "a1", "a2", "a2"] assert d1["b"] == ["b1", "b2", "b1", "b2", "b1", "b2", "b1", "b2"] assert d1["x"].dtype == np.dtype(float) assert d1["x"].shape == (8,) d2 = demo_data("x", "y") assert sorted(d2.keys()) == ["x", "y"] assert len(d2["x"]) == len(d2["y"]) == 5 assert len(demo_data("x", min_rows=10)["x"]) == 10 assert len(demo_data("a", "b", "x", min_rows=10)["x"]) == 12 assert len(demo_data("a", "b", "x", min_rows=10, nlevels=3)["x"]) == 18 import pytest pytest.raises(PatsyError, demo_data, "a", "b", "__123") pytest.raises(TypeError, demo_data, "a", "b", asdfasdf=123) class LookupFactor(object): """A simple factor class that simply looks up a named entry in the given data. Useful for programatically constructing formulas, and as a simple example of the factor protocol. For details see :ref:`expert-model-specification`. Example:: dmatrix(ModelDesc([], [Term([LookupFactor("x")])]), {"x": [1, 2, 3]}) :arg varname: The name of this variable; used as a lookup key in the passed in data dictionary/DataFrame/whatever. :arg force_categorical: If True, then treat this factor as categorical. (Equivalent to using :func:`C` in a regular formula, but of course you can't do that with a :class:`LookupFactor`. :arg contrast: If given, the contrast to use; see :func:`C`. (Requires ``force_categorical=True``.) :arg levels: If given, the categorical levels; see :func:`C`. (Requires ``force_categorical=True``.) :arg origin: Either ``None``, or the :class:`Origin` of this factor for use in error reporting. .. versionadded:: 0.2.0 The ``force_categorical`` and related arguments. """ def __init__(self, varname, force_categorical=False, contrast=None, levels=None, origin=None): self._varname = varname self._force_categorical = force_categorical self._contrast = contrast self._levels = levels self.origin = origin if not self._force_categorical: if contrast is not None: raise ValueError("contrast= requires force_categorical=True") if levels is not None: raise ValueError("levels= requires force_categorical=True") def name(self): return self._varname def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._varname) def __eq__(self, other): return (isinstance(other, LookupFactor) and self._varname == other._varname and self._force_categorical == other._force_categorical and self._contrast == other._contrast and self._levels == other._levels) def __ne__(self, other): return not self == other def __hash__(self): return hash((LookupFactor, self._varname, self._force_categorical, self._contrast, self._levels)) def memorize_passes_needed(self, state, eval_env): return 0 def memorize_chunk(self, state, which_pass, data): # pragma: no cover assert False def memorize_finish(self, state, which_pass): # pragma: no cover assert False def eval(self, memorize_state, data): value = data[self._varname] if self._force_categorical: value = C(value, contrast=self._contrast, levels=self._levels) return value __getstate__ = no_pickling def test_LookupFactor(): l_a = LookupFactor("a") assert l_a.name() == "a" assert l_a == LookupFactor("a") assert l_a != LookupFactor("b") assert hash(l_a) == hash(LookupFactor("a")) assert hash(l_a) != hash(LookupFactor("b")) assert l_a.eval({}, {"a": 1}) == 1 assert l_a.eval({}, {"a": 2}) == 2 assert repr(l_a) == "LookupFactor('a')" assert l_a.origin is None l_with_origin = LookupFactor("b", origin="asdf") assert l_with_origin.origin == "asdf" l_c = LookupFactor("c", force_categorical=True, contrast="CONTRAST", levels=(1, 2)) box = l_c.eval({}, {"c": [1, 1, 2]}) assert box.data == [1, 1, 2] assert box.contrast == "CONTRAST" assert box.levels == (1, 2) import pytest pytest.raises(ValueError, LookupFactor, "nc", contrast="CONTRAST") pytest.raises(ValueError, LookupFactor, "nc", levels=(1, 2)) assert_no_pickling(LookupFactor("a")) patsy-0.5.2/patsy/util.py000066400000000000000000000674761412400214200153570ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2013 Nathaniel Smith # See file LICENSE.txt for license information. # Some generic utilities. __all__ = ["atleast_2d_column_default", "uniqueify_list", "widest_float", "widest_complex", "wide_dtype_for", "widen", "repr_pretty_delegate", "repr_pretty_impl", "SortAnythingKey", "safe_scalar_isnan", "safe_isnan", "iterable", "have_pandas", "have_pandas_categorical", "have_pandas_categorical_dtype", "pandas_Categorical_from_codes", "pandas_Categorical_categories", "pandas_Categorical_codes", "safe_is_pandas_categorical_dtype", "safe_is_pandas_categorical", "safe_issubdtype", "no_pickling", "assert_no_pickling", "safe_string_eq", ] import sys import numpy as np import six from six.moves import cStringIO as StringIO from .compat import optional_dep_ok try: import pandas except ImportError: have_pandas = False else: have_pandas = True # Pandas versions < 0.9.0 don't have Categorical # Can drop this guard whenever we drop support for such older versions of # pandas. have_pandas_categorical = (have_pandas and hasattr(pandas, "Categorical")) if not have_pandas: have_pandas_categorical_dtype = False _pandas_is_categorical_dtype = None else: if hasattr(pandas, "api"): # This is available starting in pandas v0.19.0 have_pandas_categorical_dtype = True _pandas_is_categorical_dtype = pandas.api.types.is_categorical_dtype else: # This is needed for pandas v0.18.0 and earlier _pandas_is_categorical_dtype = getattr(pandas.core.common, "is_categorical_dtype", None) have_pandas_categorical_dtype = (_pandas_is_categorical_dtype is not None) # Passes through Series and DataFrames, call np.asarray() on everything else def asarray_or_pandas(a, copy=False, dtype=None, subok=False): if have_pandas: if isinstance(a, (pandas.Series, pandas.DataFrame)): # The .name attribute on Series is discarded when passing through # the constructor: # https://github.com/pydata/pandas/issues/1578 extra_args = {} if hasattr(a, "name"): extra_args["name"] = a.name return a.__class__(a, copy=copy, dtype=dtype, **extra_args) return np.array(a, copy=copy, dtype=dtype, subok=subok) def test_asarray_or_pandas(): import warnings assert type(asarray_or_pandas([1, 2, 3])) is np.ndarray with warnings.catch_warnings() as w: warnings.filterwarnings('ignore', 'the matrix subclass', PendingDeprecationWarning) assert type(asarray_or_pandas(np.matrix([[1, 2, 3]]))) is np.ndarray assert type(asarray_or_pandas( np.matrix([[1, 2, 3]]), subok=True)) is np.matrix assert w is None a = np.array([1, 2, 3]) assert asarray_or_pandas(a) is a a_copy = asarray_or_pandas(a, copy=True) assert np.array_equal(a, a_copy) a_copy[0] = 100 assert not np.array_equal(a, a_copy) assert np.allclose(asarray_or_pandas([1, 2, 3], dtype=float), [1.0, 2.0, 3.0]) assert asarray_or_pandas([1, 2, 3], dtype=float).dtype == np.dtype(float) a_view = asarray_or_pandas(a, dtype=a.dtype) a_view[0] = 99 assert a[0] == 99 global have_pandas if have_pandas: s = pandas.Series([1, 2, 3], name="A", index=[10, 20, 30]) s_view1 = asarray_or_pandas(s) assert s_view1.name == "A" assert np.array_equal(s_view1.index, [10, 20, 30]) s_view1[10] = 101 assert s[10] == 101 s_copy = asarray_or_pandas(s, copy=True) assert s_copy.name == "A" assert np.array_equal(s_copy.index, [10, 20, 30]) assert np.array_equal(s_copy, s) s_copy[10] = 100 assert not np.array_equal(s_copy, s) assert asarray_or_pandas(s, dtype=float).dtype == np.dtype(float) s_view2 = asarray_or_pandas(s, dtype=s.dtype) assert s_view2.name == "A" assert np.array_equal(s_view2.index, [10, 20, 30]) s_view2[10] = 99 assert s[10] == 99 df = pandas.DataFrame([[1, 2, 3]], columns=["A", "B", "C"], index=[10]) df_view1 = asarray_or_pandas(df) df_view1.loc[10, "A"] = 101 assert np.array_equal(df_view1.columns, ["A", "B", "C"]) assert np.array_equal(df_view1.index, [10]) assert df.loc[10, "A"] == 101 df_copy = asarray_or_pandas(df, copy=True) assert np.array_equal(df_copy, df) assert np.array_equal(df_copy.columns, ["A", "B", "C"]) assert np.array_equal(df_copy.index, [10]) df_copy.loc[10, "A"] = 100 assert not np.array_equal(df_copy, df) df_converted = asarray_or_pandas(df, dtype=float) assert df_converted["A"].dtype == np.dtype(float) assert np.allclose(df_converted, df) assert np.array_equal(df_converted.columns, ["A", "B", "C"]) assert np.array_equal(df_converted.index, [10]) df_view2 = asarray_or_pandas(df, dtype=df["A"].dtype) assert np.array_equal(df_view2.columns, ["A", "B", "C"]) assert np.array_equal(df_view2.index, [10]) # This actually makes a copy, not a view, because of a pandas bug: # https://github.com/pydata/pandas/issues/1572 assert np.array_equal(df, df_view2) # df_view2[0][0] = 99 # assert df[0][0] == 99 had_pandas = have_pandas try: have_pandas = False assert (type(asarray_or_pandas(pandas.Series([1, 2, 3]))) is np.ndarray) assert (type(asarray_or_pandas(pandas.DataFrame([[1, 2, 3]]))) is np.ndarray) finally: have_pandas = had_pandas # Like np.atleast_2d, but this converts lower-dimensional arrays into columns, # instead of rows. It also converts ndarray subclasses into basic ndarrays, # which makes it easier to guarantee correctness. However, there are many # places in the code where we want to preserve pandas indexing information if # present, so there is also an option def atleast_2d_column_default(a, preserve_pandas=False): if preserve_pandas and have_pandas: if isinstance(a, pandas.Series): return pandas.DataFrame(a) elif isinstance(a, pandas.DataFrame): return a # fall through a = np.asarray(a) a = np.atleast_1d(a) if a.ndim <= 1: a = a.reshape((-1, 1)) assert a.ndim >= 2 return a def test_atleast_2d_column_default(): import warnings assert np.all(atleast_2d_column_default([1, 2, 3]) == [[1], [2], [3]]) assert atleast_2d_column_default(1).shape == (1, 1) assert atleast_2d_column_default([1]).shape == (1, 1) assert atleast_2d_column_default([[1]]).shape == (1, 1) assert atleast_2d_column_default([[[1]]]).shape == (1, 1, 1) assert atleast_2d_column_default([1, 2, 3]).shape == (3, 1) assert atleast_2d_column_default([[1], [2], [3]]).shape == (3, 1) with warnings.catch_warnings() as w: warnings.filterwarnings('ignore', 'the matrix subclass', PendingDeprecationWarning) assert type(atleast_2d_column_default(np.matrix(1))) == np.ndarray assert w is None global have_pandas if have_pandas: assert (type(atleast_2d_column_default(pandas.Series([1, 2]))) == np.ndarray) assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]))) == np.ndarray) assert (type(atleast_2d_column_default(pandas.Series([1, 2]), preserve_pandas=True)) == pandas.DataFrame) assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]), preserve_pandas=True)) == pandas.DataFrame) s = pandas.Series([10, 11, 12], name="hi", index=["a", "b", "c"]) df = atleast_2d_column_default(s, preserve_pandas=True) assert isinstance(df, pandas.DataFrame) assert np.all(df.columns == ["hi"]) assert np.all(df.index == ["a", "b", "c"]) with warnings.catch_warnings() as w: warnings.filterwarnings('ignore', 'the matrix subclass', PendingDeprecationWarning) assert (type(atleast_2d_column_default(np.matrix(1), preserve_pandas=True)) == np.ndarray) assert w is None assert (type(atleast_2d_column_default([1, 2, 3], preserve_pandas=True)) == np.ndarray) if have_pandas: had_pandas = have_pandas try: have_pandas = False assert (type(atleast_2d_column_default(pandas.Series([1, 2]), preserve_pandas=True)) == np.ndarray) assert (type(atleast_2d_column_default(pandas.DataFrame([[1], [2]]), preserve_pandas=True)) == np.ndarray) finally: have_pandas = had_pandas # A version of .reshape() that knows how to down-convert a 1-column # pandas.DataFrame into a pandas.Series. Useful for code that wants to be # agnostic between 1d and 2d data, with the pattern: # new_a = atleast_2d_column_default(a, preserve_pandas=True) # # do stuff to new_a, which can assume it's always 2 dimensional # return pandas_friendly_reshape(new_a, a.shape) def pandas_friendly_reshape(a, new_shape): if not have_pandas: return a.reshape(new_shape) if not isinstance(a, pandas.DataFrame): return a.reshape(new_shape) # we have a DataFrame. Only supported reshapes are no-op, and # single-column DataFrame -> Series. if new_shape == a.shape: return a if len(new_shape) == 1 and a.shape[1] == 1: if new_shape[0] != a.shape[0]: raise ValueError("arrays have incompatible sizes") return a[a.columns[0]] raise ValueError("cannot reshape a DataFrame with shape %s to shape %s" % (a.shape, new_shape)) def test_pandas_friendly_reshape(): import pytest global have_pandas assert np.allclose(pandas_friendly_reshape(np.arange(10).reshape(5, 2), (2, 5)), np.arange(10).reshape(2, 5)) if have_pandas: df = pandas.DataFrame({"x": [1, 2, 3]}, index=["a", "b", "c"]) noop = pandas_friendly_reshape(df, (3, 1)) assert isinstance(noop, pandas.DataFrame) assert np.array_equal(noop.index, ["a", "b", "c"]) assert np.array_equal(noop.columns, ["x"]) squozen = pandas_friendly_reshape(df, (3,)) assert isinstance(squozen, pandas.Series) assert np.array_equal(squozen.index, ["a", "b", "c"]) assert squozen.name == "x" pytest.raises(ValueError, pandas_friendly_reshape, df, (4,)) pytest.raises(ValueError, pandas_friendly_reshape, df, (1, 3)) pytest.raises(ValueError, pandas_friendly_reshape, df, (3, 3)) had_pandas = have_pandas try: have_pandas = False # this will try to do a reshape directly, and DataFrames *have* no # reshape method pytest.raises(AttributeError, pandas_friendly_reshape, df, (3,)) finally: have_pandas = had_pandas def uniqueify_list(seq): seq_new = [] seen = set() for obj in seq: if obj not in seen: seq_new.append(obj) seen.add(obj) return seq_new def test_to_uniqueify_list(): assert uniqueify_list([1, 2, 3]) == [1, 2, 3] assert uniqueify_list([1, 3, 3, 2, 3, 1]) == [1, 3, 2] assert uniqueify_list([3, 2, 1, 4, 1, 2, 3]) == [3, 2, 1, 4] for float_type in ("float128", "float96", "float64"): if hasattr(np, float_type): widest_float = getattr(np, float_type) break else: # pragma: no cover assert False for complex_type in ("complex256", "complex196", "complex128"): if hasattr(np, complex_type): widest_complex = getattr(np, complex_type) break else: # pragma: no cover assert False def wide_dtype_for(arr): arr = np.asarray(arr) if (safe_issubdtype(arr.dtype, np.integer) or safe_issubdtype(arr.dtype, np.floating)): return widest_float elif safe_issubdtype(arr.dtype, np.complexfloating): return widest_complex raise ValueError("cannot widen a non-numeric type %r" % (arr.dtype,)) def widen(arr): return np.asarray(arr, dtype=wide_dtype_for(arr)) def test_wide_dtype_for_and_widen(): assert np.allclose(widen([1, 2, 3]), [1, 2, 3]) assert widen([1, 2, 3]).dtype == widest_float assert np.allclose(widen([1.0, 2.0, 3.0]), [1, 2, 3]) assert widen([1.0, 2.0, 3.0]).dtype == widest_float assert np.allclose(widen([1+0j, 2, 3]), [1, 2, 3]) assert widen([1+0j, 2, 3]).dtype == widest_complex import pytest pytest.raises(ValueError, widen, ["hi"]) class PushbackAdapter(object): def __init__(self, it): self._it = it self._pushed = [] def __iter__(self): return self def push_back(self, obj): self._pushed.append(obj) def next(self): if self._pushed: return self._pushed.pop() else: # May raise StopIteration return six.advance_iterator(self._it) __next__ = next def peek(self): try: obj = six.advance_iterator(self) except StopIteration: raise ValueError("no more data") self.push_back(obj) return obj def has_more(self): try: self.peek() except ValueError: return False else: return True def test_PushbackAdapter(): it = PushbackAdapter(iter([1, 2, 3, 4])) assert it.has_more() assert six.advance_iterator(it) == 1 it.push_back(0) assert six.advance_iterator(it) == 0 assert six.advance_iterator(it) == 2 assert it.peek() == 3 it.push_back(10) assert it.peek() == 10 it.push_back(20) assert it.peek() == 20 assert it.has_more() assert list(it) == [20, 10, 3, 4] assert not it.has_more() # The IPython pretty-printer gives very nice output that is difficult to get # otherwise, e.g., look how much more readable this is than if it were all # smooshed onto one line: # # ModelDesc(input_code='y ~ x*asdf', # lhs_terms=[Term([EvalFactor('y')])], # rhs_terms=[Term([]), # Term([EvalFactor('x')]), # Term([EvalFactor('asdf')]), # Term([EvalFactor('x'), EvalFactor('asdf')])], # ) # # But, we don't want to assume it always exists; nor do we want to be # re-writing every repr function twice, once for regular repr and once for # the pretty printer. So, here's an ugly fallback implementation that can be # used unconditionally to implement __repr__ in terms of _pretty_repr_. # # Pretty printer docs: # http://ipython.org/ipython-doc/dev/api/generated/IPython.lib.pretty.html class _MiniPPrinter(object): def __init__(self): self._out = StringIO() self.indentation = 0 def text(self, text): self._out.write(text) def breakable(self, sep=" "): self._out.write(sep) def begin_group(self, _, text): self.text(text) def end_group(self, _, text): self.text(text) def pretty(self, obj): if hasattr(obj, "_repr_pretty_"): obj._repr_pretty_(self, False) else: self.text(repr(obj)) def getvalue(self): return self._out.getvalue() def _mini_pretty(obj): printer = _MiniPPrinter() printer.pretty(obj) return printer.getvalue() def repr_pretty_delegate(obj): # If IPython is already loaded, then might as well use it. (Most commonly # this will occur if we are in an IPython session, but somehow someone has # called repr() directly. This can happen for example if printing an # container like a namedtuple that IPython lacks special code for # pretty-printing.) But, if IPython is not already imported, we do not # attempt to import it. This makes patsy itself faster to import (as of # Nov. 2012 I measured the extra overhead from loading IPython as ~4 # seconds on a cold cache), it prevents IPython from automatically # spawning a bunch of child processes (!) which may not be what you want # if you are not otherwise using IPython, and it avoids annoying the # pandas people who have some hack to tell whether you are using IPython # in their test suite (see patsy bug #12). if optional_dep_ok and "IPython" in sys.modules: from IPython.lib.pretty import pretty return pretty(obj) else: return _mini_pretty(obj) def repr_pretty_impl(p, obj, args, kwargs=[]): name = obj.__class__.__name__ p.begin_group(len(name) + 1, "%s(" % (name,)) started = [False] def new_item(): if started[0]: p.text(",") p.breakable() started[0] = True for arg in args: new_item() p.pretty(arg) for label, value in kwargs: new_item() p.begin_group(len(label) + 1, "%s=" % (label,)) p.pretty(value) p.end_group(len(label) + 1, "") p.end_group(len(name) + 1, ")") def test_repr_pretty(): assert repr_pretty_delegate("asdf") == "'asdf'" printer = _MiniPPrinter() class MyClass(object): pass repr_pretty_impl(printer, MyClass(), ["a", 1], [("foo", "bar"), ("asdf", "asdf")]) assert printer.getvalue() == "MyClass('a', 1, foo='bar', asdf='asdf')" # In Python 3, objects of different types are not generally comparable, so a # list of heterogeneous types cannot be sorted. This implements a Python 2 # style comparison for arbitrary types. (It works on Python 2 too, but just # gives you the built-in ordering.) To understand why this is tricky, consider # this example: # a = 1 # type 'int' # b = 1.5 # type 'float' # class gggg: # pass # c = gggg() # sorted([a, b, c]) # The fallback ordering sorts by class name, so according to the fallback # ordering, we have b < c < a. But, of course, a and b are comparable (even # though they're of different types), so we also have a < b. This is # inconsistent. There is no general solution to this problem (which I guess is # why Python 3 stopped trying), but the worst offender is all the different # "numeric" classes (int, float, complex, decimal, rational...), so as a # special-case, we sort all numeric objects to the start of the list. # (In Python 2, there is also a similar special case for str and unicode, but # we don't have to worry about that for Python 3.) class SortAnythingKey(object): def __init__(self, obj): self.obj = obj def _python_lt(self, other_obj): # On Py2, < never raises an error, so this is just <. (Actually it # does raise a TypeError for comparing complex to numeric, but not for # comparisons of complex to other types. Sigh. Whatever.) # On Py3, this returns a bool if available, and otherwise returns # NotImplemented try: return self.obj < other_obj except TypeError: return NotImplemented def __lt__(self, other): assert isinstance(other, SortAnythingKey) result = self._python_lt(other.obj) if result is not NotImplemented: return result # Okay, that didn't work, time to fall back. # If one of these is a number, then it is smaller. if self._python_lt(0) is not NotImplemented: return True if other._python_lt(0) is not NotImplemented: return False # Also check ==, since it may well be defined for otherwise # unorderable objects, and if so then we should be consistent with # it: if self.obj == other.obj: return False # Otherwise, we break ties based on class name and memory position return ((self.obj.__class__.__name__, id(self.obj)) < (other.obj.__class__.__name__, id(other.obj))) def test_SortAnythingKey(): assert sorted([20, 10, 0, 15], key=SortAnythingKey) == [0, 10, 15, 20] assert sorted([10, -1.5], key=SortAnythingKey) == [-1.5, 10] assert sorted([10, "a", 20.5, "b"], key=SortAnythingKey) == [10, 20.5, "a", "b"] class a(object): pass class b(object): pass class z(object): pass a_obj = a() b_obj = b() z_obj = z() o_obj = object() assert (sorted([z_obj, a_obj, 1, b_obj, o_obj], key=SortAnythingKey) == [1, a_obj, b_obj, o_obj, z_obj]) # NaN checking functions that work on arbitrary objects, on old Python # versions (math.isnan is only in 2.6+), etc. def safe_scalar_isnan(x): try: return np.isnan(float(x)) except (TypeError, ValueError, NotImplementedError): return False safe_isnan = np.vectorize(safe_scalar_isnan, otypes=[bool]) def test_safe_scalar_isnan(): assert not safe_scalar_isnan(True) assert not safe_scalar_isnan(None) assert not safe_scalar_isnan("sadf") assert not safe_scalar_isnan((1, 2, 3)) assert not safe_scalar_isnan(np.asarray([1, 2, 3])) assert not safe_scalar_isnan([np.nan]) assert safe_scalar_isnan(np.nan) assert safe_scalar_isnan(np.float32(np.nan)) assert safe_scalar_isnan(float(np.nan)) def test_safe_isnan(): assert np.array_equal(safe_isnan([1, True, None, np.nan, "asdf"]), [False, False, False, True, False]) assert safe_isnan(np.nan).ndim == 0 assert safe_isnan(np.nan) assert not safe_isnan(None) # raw isnan raises a *different* error for strings than for objects: assert not safe_isnan("asdf") def iterable(obj): try: iter(obj) except Exception: return False return True def test_iterable(): assert iterable("asdf") assert iterable([]) assert iterable({"a": 1}) assert not iterable(1) assert not iterable(iterable) ##### Handling Pandas's categorical stuff is horrible and hateful # Basically they decided that they didn't like how numpy does things, so their # categorical stuff is *kinda* like how numpy would do it (e.g. they have a # special ".dtype" attribute to mark categorical data), so by default you'll # find yourself using the same code paths to handle pandas categorical data # and other non-categorical data. BUT, all the idioms for detecting # categorical data blow up with errors if you try them with real numpy dtypes, # and all numpy's idioms for detecting non-categorical types blow up with # errors if you try them with pandas categorical stuff. So basically they have # just poisoned all code that touches dtypes; the old numpy stuff is unsafe, # and you must use special code like below. # # Also there are hoops to jump through to handle both the old style # (Categorical objects) and new-style (Series with dtype="category"). # Needed to support pandas < 0.15 def pandas_Categorical_from_codes(codes, categories): assert have_pandas_categorical # Old versions of pandas sometimes fail to coerce this to an array and # just return it directly from .labels (?!). codes = np.asarray(codes) if hasattr(pandas.Categorical, "from_codes"): return pandas.Categorical.from_codes(codes, categories) else: return pandas.Categorical(codes, categories) def test_pandas_Categorical_from_codes(): if not have_pandas_categorical: return c = pandas_Categorical_from_codes([1, 1, 0, -1], ["a", "b"]) assert np.all(np.asarray(c)[:-1] == ["b", "b", "a"]) assert np.isnan(np.asarray(c)[-1]) # Needed to support pandas < 0.15 def pandas_Categorical_categories(cat): # In 0.15+, a categorical Series has a .cat attribute which is similar to # a Categorical object, and Categorical objects are what have .categories # and .codes attributes. if hasattr(cat, "cat"): cat = cat.cat if hasattr(cat, "categories"): return cat.categories else: return cat.levels # Needed to support pandas < 0.15 def pandas_Categorical_codes(cat): # In 0.15+, a categorical Series has a .cat attribute which is a # Categorical object, and Categorical objects are what have .categories / # .codes attributes. if hasattr(cat, "cat"): cat = cat.cat if hasattr(cat, "codes"): return cat.codes else: return cat.labels def test_pandas_Categorical_accessors(): if not have_pandas_categorical: return c = pandas_Categorical_from_codes([1, 1, 0, -1], ["a", "b"]) assert np.all(pandas_Categorical_categories(c) == ["a", "b"]) assert np.all(pandas_Categorical_codes(c) == [1, 1, 0, -1]) if have_pandas_categorical_dtype: s = pandas.Series(c) assert np.all(pandas_Categorical_categories(s) == ["a", "b"]) assert np.all(pandas_Categorical_codes(s) == [1, 1, 0, -1]) # Needed to support pandas >= 0.15 (!) def safe_is_pandas_categorical_dtype(dt): if not have_pandas_categorical_dtype: return False return _pandas_is_categorical_dtype(dt) # Needed to support pandas >= 0.15 (!) def safe_is_pandas_categorical(data): if not have_pandas_categorical: return False if isinstance(data, pandas.Categorical): return True if hasattr(data, "dtype"): return safe_is_pandas_categorical_dtype(data.dtype) return False def test_safe_is_pandas_categorical(): assert not safe_is_pandas_categorical(np.arange(10)) if have_pandas_categorical: c_obj = pandas.Categorical(["a", "b"]) assert safe_is_pandas_categorical(c_obj) if have_pandas_categorical_dtype: s_obj = pandas.Series(["a", "b"], dtype="category") assert safe_is_pandas_categorical(s_obj) # Needed to support pandas >= 0.15 (!) # Calling np.issubdtype on a pandas categorical will blow up -- the officially # recommended solution is to replace every piece of code like # np.issubdtype(foo.dtype, bool) # with code like # isinstance(foo.dtype, np.dtype) and np.issubdtype(foo.dtype, bool) # or # not pandas.is_categorical_dtype(foo.dtype) and issubdtype(foo.dtype, bool) # We do the latter (with extra hoops) because the isinstance check is not # safe. See # https://github.com/pydata/pandas/issues/9581 # https://github.com/pydata/pandas/issues/9581#issuecomment-77099564 def safe_issubdtype(dt1, dt2): if safe_is_pandas_categorical_dtype(dt1): return False return np.issubdtype(dt1, dt2) def test_safe_issubdtype(): assert safe_issubdtype(int, np.integer) assert safe_issubdtype(np.dtype(float), np.floating) assert not safe_issubdtype(int, np.floating) assert not safe_issubdtype(np.dtype(float), np.integer) if have_pandas_categorical_dtype: bad_dtype = pandas.Series(["a", "b"], dtype="category") assert not safe_issubdtype(bad_dtype, np.integer) def no_pickling(*args, **kwargs): raise NotImplementedError( "Sorry, pickling not yet supported. " "See https://github.com/pydata/patsy/issues/26 if you want to " "help.") def assert_no_pickling(obj): import pickle import pytest pytest.raises(NotImplementedError, pickle.dumps, obj) # Use like: # if safe_string_eq(constraints, "center"): # ... # where 'constraints' might be a string or an array. (If it's an array, then # we can't use == becaues it might broadcast and ugh.) def safe_string_eq(obj, value): if isinstance(obj, six.string_types): return obj == value else: return False def test_safe_string_eq(): assert safe_string_eq("foo", "foo") assert not safe_string_eq("foo", "bar") if not six.PY3: assert safe_string_eq(unicode("foo"), "foo") assert not safe_string_eq(np.empty((2, 2)), "foo") patsy-0.5.2/patsy/version.py000066400000000000000000000014671412400214200160530ustar00rootroot00000000000000# This file is part of Patsy # Copyright (C) 2011-2014 Nathaniel Smith # See file LICENSE.txt for license information. # This file must be kept very simple, because it is consumed from several # places -- it is imported by patsy/__init__.py, execfile'd by setup.py, etc. # We use a simple scheme: # 1.0.0 -> 1.0.0+dev -> 1.1.0 -> 1.1.0+dev # where the +dev versions are never released into the wild, they're just what # we stick into the VCS in between releases. # # This is compatible with PEP 440: # http://legacy.python.org/dev/peps/pep-0440/ # via the use of the "local suffix" "+dev", which is disallowed on index # servers and causes 1.0.0+dev to sort after plain 1.0.0, which is what we # want. (Contrast with the special suffix 1.0.0.dev, which sorts *before* # 1.0.0.) __version__ = "0.5.2" patsy-0.5.2/release-checklist.txt000066400000000000000000000020701412400214200167730ustar00rootroot00000000000000* docs: * update doc/changes.rst * make sure appropriate ..versionadded:: directives are present * make sure docs are up to date * verify that the ">97% coverage" claim in overview.rst is still true. * cd docs; make clean html -- check that there are no warnings * check MANIFEST.in * update version in doc/changes.rst, patsy/version.py * make sure there are no uncommitted changes * clone a clean source directory (so as to get a clean checkout without killing the expensive-to-recreate .tox dir) * python setup.py sdist --formats=zip * unzip sdist to a clean directory, and run tox in it * in original directory: git tag v && git push --tags * in clean clone: python setup.py sdist --formats=zip && python setup.py bdist_wheel * twine upload dist/*.zip dist/*.whl * announce release on: pydata@googlegroups.com pystatsmodels@googlegroups.com scipy-dev? pypi * update version in patsy/version.py again * go to https://github.com/pydata/patsy/releases/new and type the tag name and give it some text. (This will trigger Zenodo archiving the tag.) patsy-0.5.2/setup.cfg000066400000000000000000000001661412400214200144700ustar00rootroot00000000000000[bdist_wheel] universal=1 [metadata] license_file = LICENSE.txt [tool:pytest] python_files = *.py testpaths = patsy patsy-0.5.2/setup.py000066400000000000000000000026341412400214200143630ustar00rootroot00000000000000#!/usr/bin/env python import sys from setuptools import setup DESC = ("A Python package for describing statistical models and for " "building design matrices.") LONG_DESC = open("README.md").read() # defines __version__ exec(open("patsy/version.py").read()) setup( name="patsy", version=__version__, description=DESC, long_description=LONG_DESC, author="Nathaniel J. Smith", author_email="njs@pobox.com", license="2-clause BSD", packages=["patsy"], url="https://github.com/pydata/patsy", install_requires=[ "six", # Possibly we need an even newer numpy than this, but we definitely # need at least 1.4 for triu_indices "numpy >= 1.4", ], extras_require={ "test": ["pytest", "pytest-cov", "scipy"], }, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Intended Audience :: Financial and Insurance Industry", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering", ], ) patsy-0.5.2/tools/000077500000000000000000000000001412400214200140045ustar00rootroot00000000000000patsy-0.5.2/tools/check-API-refs.py000066400000000000000000000024041412400214200167770ustar00rootroot00000000000000#!/usr/bin/env python # NB: this currently works on both Py2 and Py3, and should be kept that way. import sys import re from os.path import dirname, abspath root = dirname(dirname(abspath(__file__))) patsy_ref = root + "/doc/API-reference.rst" doc_re = re.compile(r"^\.\. (.*):: ([^\(]*)") def _documented(rst_path): documented = set() with open(rst_path) as rst_file: for line in rst_file: match = doc_re.match(line.rstrip()) if match: directive = match.group(1) symbol = match.group(2) if directive not in ["module", "ipython"]: documented.add(symbol) return documented try: import patsy except ImportError: sys.path.append(root) import patsy documented = set(_documented(patsy_ref)) # print(documented) exported = set(patsy.__all__) missed = exported.difference(documented) extra = documented.difference(exported) if missed: print("DOCS MISSING FROM %s:" % (patsy_ref,)) for m in sorted(missed): print(" %s" % (m,)) if extra: print("EXTRA DOCS IN %s:" % (patsy_ref,)) for m in sorted(extra): print(" %s" % (m,)) if missed or extra: sys.exit(1) else: print("Reference docs look good.") sys.exit(0) patsy-0.5.2/tools/get-R-bs-test-vectors.R000066400000000000000000000042121412400214200201460ustar00rootroot00000000000000# R code to dump the output from 'bs()' in many situations, for # checking compatibility. # # Usage: # Rscript tools/get-R-bs-test-vectors.R > patsy/test_splines_bs_data.py cat("# This file auto-generated by tools/get-R-bs-test-vectors.R\n") cat(sprintf("# Using: %s\n", R.Version()$version.string)) cat("import numpy as np\n") options(digits=20) library(splines) x <- (1.5)^(0:19) MISSING <- "MISSING" is.missing <- function(obj) { length(obj) == 1 && obj == MISSING } pyprint <- function(arr) { if (is.missing(arr)) { cat("None\n") } else { cat("np.array([") for (val in arr) { cat(val) cat(", ") } cat("])") if (!is.null(dim(arr))) { cat(".reshape((") for (size in dim(arr)) { cat(sprintf("%s, ", size)) } cat("), order=\"F\")") } cat("\n") } } num.tests <- 0 dump.bs <- function(degree, df, knots, intercept, Boundary.knots) { cat("--BEGIN TEST CASE--\n") cat(sprintf("degree=%s\n", degree)) cat(sprintf("df=%s\n", if (is.missing(df)) "None" else df)) cat(sprintf("intercept=%s\n", intercept)) cat("Boundary.knots=") pyprint(Boundary.knots) cat("knots=") pyprint(knots) args <- list(x=x, degree=degree, intercept=intercept) if (!is.missing(df)) { args$df <- df } if (!is.missing(knots)) { args$knots <- knots } if (!is.missing(Boundary.knots)) { args$Boundary.knots <- Boundary.knots } result <- do.call(bs, args) cat("output=") pyprint(result) cat("--END TEST CASE--\n") assign("num.tests", num.tests + 1, envir=.GlobalEnv) } cat("R_bs_test_x = ") pyprint(x) cat("R_bs_test_data = \"\"\"\n") for (degree in c(1, 3, 5)) { for (intercept in c(TRUE, FALSE)) { for (Boundary.knots in list(MISSING, c(0, 3000))) { for (df in c(3, 5, 12)) { if (df < degree + 2) { next } else { dump.bs(degree, df, MISSING, intercept, Boundary.knots) } } for (knots in list(c(), c(100), c(1000), c(10, 100, 1000))) { dump.bs(degree, MISSING, knots, intercept, Boundary.knots) } } } } cat("\"\"\"\n") cat(sprintf("R_bs_num_tests = %s\n", num.tests)) patsy-0.5.2/tools/get-R-crs-test-vectors.R000066400000000000000000000040241412400214200203320ustar00rootroot00000000000000# R code to dump the output from 'mgcv' cubic splines in many situations, for # checking compatibility. # # Usage: # Rscript tools/get-R-crs-test-vectors.R > patsy/test_splines_crs_data.py cat("# This file auto-generated by tools/get-R-crs-test-vectors.R\n") cat(sprintf("# Using: %s and package 'mgcv' version %s\n", R.Version()$version.string, packageVersion("mgcv"))) cat("import numpy as np\n") options(digits=20) library(mgcv) x <- (-1.5)^(0:19) pyprint <- function(arr) { if (is.null(arr)) { cat("None\n") } else { cat("np.array([") for (val in arr) { cat(val) cat(", ") } cat("])") if (!is.null(dim(arr))) { cat(".reshape((") for (size in dim(arr)) { cat(sprintf("%s, ", size)) } cat("), order=\"F\")") } cat("\n") } } num.tests <- 0 dump.crs <- function(spline.type, nb.knots, knots, absorb.cons) { cat("--BEGIN TEST CASE--\n") cat(sprintf("spline_type=%s\n", spline.type)) cat(sprintf("nb_knots=%s\n", nb.knots)) cat("knots=") pyprint(knots) cat(sprintf("absorb_cons=%s\n", absorb.cons)) args <- list(object=s(x, bs=spline.type, k=nb.knots), data=data.frame(x=x), knots=NULL, absorb.cons=absorb.cons) if (!is.null(knots)) { args$knots <- data.frame(x=knots) } result <- do.call(smoothCon, args) cat("output=") pyprint(result[[1]]$X) cat("--END TEST CASE--\n") assign("num.tests", num.tests + 1, envir=.GlobalEnv) } cat("R_crs_test_x = ") pyprint(x) cat("R_crs_test_data = \"\"\"\n") for (spline.type in c("cr", "cs", "cc")) { for (absorb.cons in c(FALSE, TRUE)) { for (nb.knots in c(4, 7, 10, 12)) { dump.crs(spline.type, nb.knots, NULL, absorb.cons) } for (knots in list(c(-2500., -150., 300., 1500.), c(-400., -50., 10., 50., 100.), c(-1000., -500., -250., 0., 250., 500., 1000.))) { dump.crs(spline.type, length(knots), knots, absorb.cons) } } } cat("\"\"\"\n") cat(sprintf("R_crs_num_tests = %s\n", num.tests)) patsy-0.5.2/tox.ini000066400000000000000000000017541412400214200141660ustar00rootroot00000000000000[tox] envlist = {py27,py36,py37,py38,py39}-{with_pandas,without_pandas} [testenv] deps= .[test] with_pandas: pandas # Display all warnings, and error out on any warnings attributed to # code in the patsy namespace (but not ones triggered by pytest, etc.): setenv= PYTHONWARNINGS=default PATSY_FORCE_NO_WARNINGS=1 # Work around terrible pathological behaviour in OpenBLAS/MKL/VML multithreading, that causes # execution time to blow up from 3 minutes to 18 minutes, apparently in SVD on smallish matrices OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 VML_NUM_THREADS=1 commands= pytest -vv --cov=patsy --cov-config={toxinidir}/.coveragerc --cov-report=term-missing --cov-report=xml --cov-report=html:{toxworkdir}/coverage/{envname} {posargs:} env PATSY_AVOID_OPTIONAL_DEPENDENCIES=1 pytest -vv --cov=patsy --cov-config={toxinidir}/.coveragerc --cov-report=term-missing --cov-report=xml --cov-report=html:{toxworkdir}/coverage/{envname} {posargs:} python {toxinidir}/tools/check-API-refs.py