css-parser-1.0.4/0000755000175000017500000000000013413156014014046 5ustar kovidkovid00000000000000css-parser-1.0.4/PKG-INFO0000644000175000017500000000347713413156014015156 0ustar kovidkovid00000000000000Metadata-Version: 1.1 Name: css-parser Version: 1.0.4 Summary: A CSS Cascading Style Sheets library for Python Home-page: https://github.com/ebook-utils/css-parser Author: Various People Author-email: redacted@anonymous.net License: LGPL 2.1 or later Description: css-parser ================ [![Build Status](https://api.travis-ci.org/ebook-utils/css-parser.svg)](https://travis-ci.org/ebook-utils/css-parser) [![Build Status](https://ci.appveyor.com/api/projects/status/fqs1n6c2lidphx1t?svg=true)](https://ci.appveyor.com/project/kovidgoyal/css-parser) A fork of the cssutils project based on version 1.0.2. This fork includes general bug fixes and extensions specific to editing and working with ebooks. The main python source code has been modified so that it will run without further conversion on both Python >= 2.7 and Python 3.X without any further modules required. All required modifications are handled local to each file For more information on usage, please see the cssutils documentation Keywords: CSS,Cascading Style Sheets,CSSParser,DOM Level 2 Stylesheets,DOM Level 2 CSS Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Web Environment Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL) Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Internet Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Text Processing :: Markup :: HTML css-parser-1.0.4/README.md0000644000175000017500000000135113413152352015326 0ustar kovidkovid00000000000000css-parser ================ [![Build Status](https://api.travis-ci.org/ebook-utils/css-parser.svg)](https://travis-ci.org/ebook-utils/css-parser) [![Build Status](https://ci.appveyor.com/api/projects/status/fqs1n6c2lidphx1t?svg=true)](https://ci.appveyor.com/project/kovidgoyal/css-parser) A fork of the cssutils project based on version 1.0.2. This fork includes general bug fixes and extensions specific to editing and working with ebooks. The main python source code has been modified so that it will run without further conversion on both Python >= 2.7 and Python 3.X without any further modules required. All required modifications are handled local to each file For more information on usage, please see the cssutils documentation css-parser-1.0.4/setup.cfg0000644000175000017500000000014413413156014015666 0ustar kovidkovid00000000000000[flake8] max-line-length = 120 builtins = unicode,basestring [egg_info] tag_build = tag_date = 0 css-parser-1.0.4/setup.py0000755000175000017500000000502113413155230015560 0ustar kovidkovid00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- """ css_parser setup use EasyInstall or install with >python setup.py install """ __docformat__ = 'restructuredtext' __author__ = 'Christof Hoeke with contributions by Walter Doerwald and lots of other people' __date__ = '$LastChangedDate:: $:' import ast import re import sys import os from setuptools import find_packages, setup from setuptools.command.test import test # extract the version without importing the module VERSION = open('src/css_parser/version.py', 'rb').read().decode('utf-8') VERSION = '.'.join(map(str, ast.literal_eval(re.search(r'^version\s+=\s+(.+)', VERSION, flags=re.M).group(1)))) long_description = '\n' + open('README.md', 'rb').read().decode('utf-8') + '\n' # + read('CHANGELOG.txt') class Test(test): user_options = [ ('which-test=', 'w', "Specify which test to run as either" " the test method name (without the leading test_)" " or a module name with a trailing period"), ] def initialize_options(self): self.which_test = None def finalize_options(self): pass def run(self): import importlib orig = sys.path[:] try: sys.path.insert(0, os.getcwd()) m = importlib.import_module('run_tests') which_test = (self.which_test,) if self.which_test else () m.run_tests(which_test) finally: sys.path = orig setup( name='css-parser', version=VERSION, package_dir={'': 'src'}, packages=find_packages('src'), description='A CSS Cascading Style Sheets library for Python', long_description=long_description, cmdclass={'test': Test}, author='Various People', author_email='redacted@anonymous.net', url='https://github.com/ebook-utils/css-parser', license='LGPL 2.1 or later', keywords='CSS, Cascading Style Sheets, CSSParser, DOM Level 2 Stylesheets, DOM Level 2 CSS', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', 'Intended Audience :: Developers', 'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Topic :: Internet', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Text Processing :: Markup :: HTML' ] ) css-parser-1.0.4/src/0000755000175000017500000000000013413156014014635 5ustar kovidkovid00000000000000css-parser-1.0.4/src/css_parser/0000755000175000017500000000000013413156014017001 5ustar kovidkovid00000000000000css-parser-1.0.4/src/css_parser/__init__.py0000644000175000017500000003733713407702010021123 0ustar kovidkovid00000000000000#!/usr/bin/env python from __future__ import unicode_literals, division, absolute_import, print_function from .profiles import Profiles from .serialize import CSSSerializer from .parse import CSSParser from . import css from . import errorhandler from .version import VERSION import xml.dom import os.path import sys """css_parser - CSS Cascading Style Sheets library for Python Copyright (C) 2004-2013 Christof Hoeke css_parser is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . A Python package to parse and build CSS Cascading Style Sheets. DOM only, not any rendering facilities! Based upon and partly implementing the following specifications : `CSS 2.1 `__ General CSS rules and properties are defined here `CSS 2.1 Errata `__ A few errata, mainly the definition of CHARSET_SYM tokens `CSS3 Module: Syntax `__ Used in parts since css_parser 0.9.4. css_parser tries to use the features from CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some parts are from CSS 2.1 `MediaQueries `__ MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in @import and @media rules. `Namespaces `__ Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5 for dev version `CSS3 Module: Pages Media `__ Most properties of this spec are implemented including MarginRules `Selectors `__ The selector syntax defined here (and not in CSS 2.1) should be parsable with css_parser (*should* mind though ;) ) `DOM Level 2 Style CSS `__ DOM for package css. 0.9.8 removes support for CSSValue and related API, see PropertyValue and Value API for now `DOM Level 2 Style Stylesheets `__ DOM for package stylesheets `CSSOM `__ A few details (mainly the NamespaceRule DOM) is taken from here. Plan is to move implementation to the stuff defined here which is newer but still no REC so might change anytime... The css_parser tokenizer is a customized implementation of `CSS3 Module: Syntax (W3C Working Draft 13 August 2003) `__ which itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as possible but uses some (helpful) parts of the CSS 2.1 tokenizer. I guess css_parser is neither CSS 2.1 nor CSS 3 compliant but tries to at least be able to parse both grammars including some more real world cases (some CSS hacks are actually parsed and serialized). Both official grammars are not final nor bugfree but still feasible. css_parser aim is not to be fully compliant to any CSS specification (the specifications seem to be in a constant flow anyway) but css_parser *should* be able to read and write as many as possible CSS stylesheets "in the wild" while at the same time implement the official APIs which are well documented. Some minor extensions are provided as well. Please visit http://cthedot.de/css_parser/ for more details. Tested with Python 2.7.6 and 3.3.3 on Windows 8.1 64bit. This library may be used ``from css_parser import *`` which import subpackages ``css`` and ``stylesheets``, CSSParser and CSSSerializer classes only. Usage may be:: >>> from css_parser import * >>> parser = CSSParser() >>> sheet = parser.parseString(u'a { color: red}') >>> print sheet.cssText a { color: red } """ __all__ = list(map(str, ('css', 'stylesheets', 'CSSParser', 'CSSSerializer'))) __docformat__ = 'restructuredtext' __author__ = 'Christof Hoeke with contributions by Walter Doerwald' __date__ = '$LastChangedDate:: $:' if sys.version_info[0] >= 3: text_type = str from urllib.parse import urlsplit as urllib_urlsplit from urllib.request import pathname2url as urllib_pathname2url else: text_type = unicode from urlparse import urlsplit as urllib_urlsplit from urllib import pathname2url as urllib_pathname2url if sys.version_info < (2, 6): bytes = str # order of imports is important (partly circular) log = errorhandler.ErrorHandler() VERSION ser = CSSSerializer() profile = Profiles(log=log) # used by Selector defining namespace prefix '*' _ANYNS = -1 class DOMImplementationCSS(object): """This interface allows the DOM user to create a CSSStyleSheet outside the context of a document. There is no way to associate the new CSSStyleSheet with a document in DOM Level 2. This class is its *own factory*, as it is given to xml.dom.registerDOMImplementation which simply calls it and receives an instance of this class then. """ _features = [ ('css', '1.0'), ('css', '2.0'), ('stylesheets', '1.0'), ('stylesheets', '2.0') ] def createCSSStyleSheet(self, title, media): """ Creates a new CSSStyleSheet. title of type DOMString The advisory title. See also the Style Sheet Interfaces section. media of type DOMString The comma-separated list of media associated with the new style sheet. See also the Style Sheet Interfaces section. returns CSSStyleSheet: A new CSS style sheet. TODO: DOMException SYNTAX_ERR: Raised if the specified media string value has a syntax error and is unparsable. """ import warnings warning = ("Deprecated, see " "https://bitbucket.org/cthedot/css_parser/issues/69#comment-30669799") warnings.warn(warning, DeprecationWarning) return css.CSSStyleSheet(title=title, media=media) def createDocument(self, *args, **kwargs): # sometimes css_parser is picked automatically for # xml.dom.getDOMImplementation, so we should provide an implementation # see https://bitbucket.org/cthedot/css_parser/issues/69 import xml.dom.minidom as minidom return minidom.DOMImplementation().createDocument(*args, **kwargs) def createDocumentType(self, *args, **kwargs): # sometimes css_parser is picked automatically for # xml.dom.getDOMImplementation, so we should provide an implementation # see https://bitbucket.org/cthedot/css_parser/issues/69 import xml.dom.minidom as minidom return minidom.DOMImplementation().createDocumentType(*args, **kwargs) def hasFeature(self, feature, version): return (feature.lower(), text_type(version)) in self._features xml.dom.registerDOMImplementation('css_parser', DOMImplementationCSS) def parseString(*a, **k): return CSSParser().parseString(*a, **k) parseString.__doc__ = CSSParser.parseString.__doc__ def parseFile(*a, **k): return CSSParser().parseFile(*a, **k) parseFile.__doc__ = CSSParser.parseFile.__doc__ def parseUrl(*a, **k): return CSSParser().parseUrl(*a, **k) parseUrl.__doc__ = CSSParser.parseUrl.__doc__ def parseStyle(*a, **k): return CSSParser().parseStyle(*a, **k) parseStyle.__doc__ = CSSParser.parseStyle.__doc__ # set "ser", default serializer def setSerializer(serializer): """Set the global serializer used by all class in css_parser.""" global ser ser = serializer def getUrls(sheet): """Retrieve all ``url(urlstring)`` values (in e.g. :class:`css_parser.css.CSSImportRule` or :class:`css_parser.css.CSSValue` objects of given `sheet`. :param sheet: :class:`css_parser.css.CSSStyleSheet` object whose URLs are yielded This function is a generator. The generated URL values exclude ``url(`` and ``)`` and surrounding single or double quotes. """ for importrule in (r for r in sheet if r.type == r.IMPORT_RULE): yield importrule.href def styleDeclarations(base): "recursive generator to find all CSSStyleDeclarations" if hasattr(base, 'cssRules'): for rule in base.cssRules: for s in styleDeclarations(rule): yield s elif hasattr(base, 'style'): yield base.style for style in styleDeclarations(sheet): for p in style.getProperties(all=True): for v in p.propertyValue: if v.type == 'URI': yield v.uri def replaceUrls(sheetOrStyle, replacer, ignoreImportRules=False): """Replace all URLs in :class:`css_parser.css.CSSImportRule` or :class:`css_parser.css.CSSValue` objects of given `sheetOrStyle`. :param sheetOrStyle: a :class:`css_parser.css.CSSStyleSheet` or a :class:`css_parser.css.CSSStyleDeclaration` which is changed in place :param replacer: a function which is called with a single argument `url` which is the current value of each url() excluding ``url(``, ``)`` and surrounding (single or double) quotes. :param ignoreImportRules: if ``True`` does not call `replacer` with URLs from @import rules. """ if not ignoreImportRules and not isinstance(sheetOrStyle, css.CSSStyleDeclaration): for importrule in (r for r in sheetOrStyle if r.type == r.IMPORT_RULE): importrule.href = replacer(importrule.href) def styleDeclarations(base): "recursive generator to find all CSSStyleDeclarations" if hasattr(base, 'cssRules'): for rule in base.cssRules: for s in styleDeclarations(rule): yield s elif hasattr(base, 'style'): yield base.style elif isinstance(sheetOrStyle, css.CSSStyleDeclaration): # base is a style already yield base for style in styleDeclarations(sheetOrStyle): for p in style.getProperties(all=True): for v in p.propertyValue: if v.type == v.URI: v.uri = replacer(v.uri) def resolveImports(sheet, target=None): """Recurcively combine all rules in given `sheet` into a `target` sheet. @import rules which use media information are tried to be wrapped into @media rules so keeping the media information. This may not work in all instances (if e.g. an @import rule itself contains an @import rule with different media infos or if it contains rules which may not be used inside an @media block like @namespace rules.). In these cases the @import rule is kept as in the original sheet and a WARNING is issued. :param sheet: in this given :class:`css_parser.css.CSSStyleSheet` all import rules are resolved and added to a resulting *flat* sheet. :param target: A :class:`css_parser.css.CSSStyleSheet` object which will be the resulting *flat* sheet if given :returns: given `target` or a new :class:`css_parser.css.CSSStyleSheet` object """ if not target: target = css.CSSStyleSheet(href=sheet.href, media=sheet.media, title=sheet.title) def getReplacer(targetbase): "Return a replacer which uses base to return adjusted URLs" basesch, baseloc, basepath, basequery, basefrag = urllib_urlsplit(targetbase) basepath, basepathfilename = os.path.split(basepath) def replacer(uri): scheme, location, path, query, fragment = urllib_urlsplit(uri) if not scheme and not location and not path.startswith('/'): # relative path, filename = os.path.split(path) combined = os.path.normpath(os.path.join(basepath, path, filename)) return urllib_pathname2url(combined) else: # keep anything absolute return uri return replacer for rule in sheet.cssRules: if rule.type == rule.CHARSET_RULE: pass elif rule.type == rule.IMPORT_RULE: log.info('Processing @import %r' % rule.href, neverraise=True) if rule.hrefFound: # add all rules of @import to current sheet target.add(css.CSSComment(cssText='/* START @import "%s" */' % rule.href)) try: # nested imports importedSheet = resolveImports(rule.styleSheet) except xml.dom.HierarchyRequestErr as e: log.warn('@import: Cannot resolve target, keeping rule: %s' % e, neverraise=True) target.add(rule) else: # adjust relative URI references log.info('@import: Adjusting paths for %r' % rule.href, neverraise=True) replaceUrls(importedSheet, getReplacer(rule.href), ignoreImportRules=True) # might have to wrap rules in @media if media given if rule.media.mediaText == 'all': mediaproxy = None else: keepimport = False for r in importedSheet: # check if rules present which may not be # combined with media if r.type not in (r.COMMENT, r.STYLE_RULE, r.IMPORT_RULE): keepimport = True break if keepimport: log.warn('Cannot combine imported sheet with' ' given media as other rules then' ' comments or stylerules found %r,' ' keeping %r' % (r, rule.cssText), neverraise=True) target.add(rule) continue # wrap in @media if media is not `all` log.info('@import: Wrapping some rules in @media ' ' to keep media: %s' % rule.media.mediaText, neverraise=True) mediaproxy = css.CSSMediaRule(rule.media.mediaText) for r in importedSheet: if mediaproxy: mediaproxy.add(r) else: # add to top sheet directly but are difficult anyway target.add(r) if mediaproxy: target.add(mediaproxy) else: # keep @import as it is log.error('Cannot get referenced stylesheet %r, keeping rule' % rule.href, neverraise=True) target.add(rule) else: target.add(rule) return target if __name__ == '__main__': print(__doc__) css-parser-1.0.4/src/css_parser/_codec2.py0000644000175000017500000005634713407702010020664 0ustar kovidkovid00000000000000#!/usr/bin/env python """Python codec for CSS.""" __docformat__ = 'restructuredtext' __author__ = 'Walter Doerwald' __version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $' import codecs import marshal # We're using bits to store all possible candidate encodings (or variants, i.e. # we have two bits for the variants of UTF-16 and two for the # variants of UTF-32). # # Prefixes for various CSS encodings # UTF-8-SIG xEF xBB xBF # UTF-16 (LE) xFF xFE ~x00|~x00 # UTF-16 (BE) xFE xFF # UTF-16-LE @ x00 @ x00 # UTF-16-BE x00 @ # UTF-32 (LE) xFF xFE x00 x00 # UTF-32 (BE) x00 x00 xFE xFF # UTF-32-LE @ x00 x00 x00 # UTF-32-BE x00 x00 x00 @ # CHARSET @ c h a ... def detectencoding_str(input, final=False): """ Detect the encoding of the byte string ``input``, which contains the beginning of a CSS file. This function returns the detected encoding (or ``None`` if it hasn't got enough data), and a flag that indicates whether that encoding has been detected explicitely or implicitely. To detect the encoding the first few bytes are used (or if ``input`` is ASCII compatible and starts with a charset rule the encoding name from the rule). "Explicit" detection means that the bytes start with a BOM or a charset rule. If the encoding can't be detected yet, ``None`` is returned as the encoding. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``detectencoding_str()`` will never return ``None`` as the encoding. """ # A bit for every candidate CANDIDATE_UTF_8_SIG = 1 CANDIDATE_UTF_16_AS_LE = 2 CANDIDATE_UTF_16_AS_BE = 4 CANDIDATE_UTF_16_LE = 8 CANDIDATE_UTF_16_BE = 16 CANDIDATE_UTF_32_AS_LE = 32 CANDIDATE_UTF_32_AS_BE = 64 CANDIDATE_UTF_32_LE = 128 CANDIDATE_UTF_32_BE = 256 CANDIDATE_CHARSET = 512 candidates = 1023 # all candidates li = len(input) if li >= 1: # Check first byte c = input[0] if c != "\xef": candidates &= ~CANDIDATE_UTF_8_SIG if c != "\xff": candidates &= ~(CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_16_AS_LE) if c != "\xfe": candidates &= ~CANDIDATE_UTF_16_AS_BE if c != "@": candidates &= ~(CANDIDATE_UTF_32_LE | CANDIDATE_UTF_16_LE | CANDIDATE_CHARSET) if c != "\x00": candidates &= ~(CANDIDATE_UTF_32_AS_BE | CANDIDATE_UTF_32_BE | CANDIDATE_UTF_16_BE) if li >= 2: # Check second byte c = input[1] if c != "\xbb": candidates &= ~CANDIDATE_UTF_8_SIG if c != "\xfe": candidates &= ~(CANDIDATE_UTF_16_AS_LE | CANDIDATE_UTF_32_AS_LE) if c != "\xff": candidates &= ~CANDIDATE_UTF_16_AS_BE if c != "\x00": candidates &= ~(CANDIDATE_UTF_16_LE | CANDIDATE_UTF_32_AS_BE | CANDIDATE_UTF_32_LE | CANDIDATE_UTF_32_BE) if c != "@": candidates &= ~CANDIDATE_UTF_16_BE if c != "c": candidates &= ~CANDIDATE_CHARSET if li >= 3: # Check third byte c = input[2] if c != "\xbf": candidates &= ~CANDIDATE_UTF_8_SIG if c != "c": candidates &= ~CANDIDATE_UTF_16_LE if c != "\x00": candidates &= ~(CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_32_LE | CANDIDATE_UTF_32_BE) if c != "\xfe": candidates &= ~CANDIDATE_UTF_32_AS_BE if c != "h": candidates &= ~CANDIDATE_CHARSET if li >= 4: # Check fourth byte c = input[3] if input[2:4] == "\x00\x00": candidates &= ~CANDIDATE_UTF_16_AS_LE if c != "\x00": candidates &= ~(CANDIDATE_UTF_16_LE | CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_32_LE) if c != "\xff": candidates &= ~CANDIDATE_UTF_32_AS_BE if c != "@": candidates &= ~CANDIDATE_UTF_32_BE if c != "a": candidates &= ~CANDIDATE_CHARSET if candidates == 0: return ("utf-8", False) if not (candidates & (candidates-1)): # only one candidate remaining if candidates == CANDIDATE_UTF_8_SIG and li >= 3: return ("utf-8-sig", True) elif candidates == CANDIDATE_UTF_16_AS_LE and li >= 2: return ("utf-16", True) elif candidates == CANDIDATE_UTF_16_AS_BE and li >= 2: return ("utf-16", True) elif candidates == CANDIDATE_UTF_16_LE and li >= 4: return ("utf-16-le", False) elif candidates == CANDIDATE_UTF_16_BE and li >= 2: return ("utf-16-be", False) elif candidates == CANDIDATE_UTF_32_AS_LE and li >= 4: return ("utf-32", True) elif candidates == CANDIDATE_UTF_32_AS_BE and li >= 4: return ("utf-32", True) elif candidates == CANDIDATE_UTF_32_LE and li >= 4: return ("utf-32-le", False) elif candidates == CANDIDATE_UTF_32_BE and li >= 4: return ("utf-32-be", False) elif candidates == CANDIDATE_CHARSET and li >= 4: prefix = '@charset "' if input[:len(prefix)] == prefix: pos = input.find('"', len(prefix)) if pos >= 0: return (input[len(prefix):pos], True) # if this is the last call, and we haven't determined an encoding yet, # we default to UTF-8 if final: return ("utf-8", False) return (None, False) # dont' know yet def detectencoding_unicode(input, final=False): """ Detect the encoding of the unicode string ``input``, which contains the beginning of a CSS file. The encoding is detected from the charset rule at the beginning of ``input``. If there is no charset rule, ``"utf-8"`` will be returned. If the encoding can't be detected yet, ``None`` is returned. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``detectencoding_unicode()`` will never return ``None``. """ prefix = u'@charset "' if input.startswith(prefix): pos = input.find(u'"', len(prefix)) if pos >= 0: return (input[len(prefix):pos], True) elif final or not prefix.startswith(input): # if this is the last call, and we haven't determined an encoding yet, # (or the string definitely doesn't start with prefix) we default to UTF-8 return ("utf-8", False) return (None, False) # don't know yet def _fixencoding(input, encoding, final=False): """ Replace the name of the encoding in the charset rule at the beginning of ``input`` with ``encoding``. If ``input`` doesn't starts with a charset rule, ``input`` will be returned unmodified. If the encoding can't be found yet, ``None`` is returned. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``_fixencoding()`` will never return ``None``. """ prefix = u'@charset "' if len(input) > len(prefix): if input.startswith(prefix): pos = input.find(u'"', len(prefix)) if pos >= 0: if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = u"utf-8" return prefix + encoding + input[pos:] # we haven't seen the end of the encoding name yet => fall through else: return input # doesn't start with prefix, so nothing to fix elif not prefix.startswith(input) or final: # can't turn out to be a @charset rule later (or there is no "later") return input if final: return input return None # don't know yet def decode(input, errors="strict", encoding=None, force=True): if encoding is None or not force: (_encoding, explicit) = detectencoding_str(input, True) if _encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not force) or encoding is None: # Take the encoding from the input encoding = _encoding (input, consumed) = codecs.getdecoder(encoding)(input, errors) return (_fixencoding(input, unicode(encoding), True), consumed) def encode(input, errors="strict", encoding=None): consumed = len(input) if encoding is None: encoding = detectencoding_unicode(input, True)[0] if encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, u"utf-8", True) else: input = _fixencoding(input, unicode(encoding), True) if encoding == "css": raise ValueError("css not allowed as encoding name") encoder = codecs.getencoder(encoding) return (encoder(input, errors)[0], consumed) def _bytes2int(bytes): # Helper: convert an 8 bit string into an ``int``. i = 0 for byte in bytes: i = (i << 8) + ord(byte) return i def _int2bytes(i): # Helper: convert an ``int`` into an 8-bit string. v = [] while i: v.insert(0, chr(i & 0xff)) i >>= 8 return "".join(v) if hasattr(codecs, "IncrementalDecoder"): class IncrementalDecoder(codecs.IncrementalDecoder): def __init__(self, errors="strict", encoding=None, force=True): self.decoder = None self.encoding = encoding self.force = force codecs.IncrementalDecoder.__init__(self, errors) # Store ``errors`` somewhere else, # because we have to hide it in a property self._errors = errors self.buffer = u"".encode() self.headerfixed = False def iterdecode(self, input): for part in input: result = self.decode(part, False) if result: yield result result = self.decode("", True) if result: yield result def decode(self, input, final=False): # We're doing basically the same as a ``BufferedIncrementalDecoder``, # but since the buffer is only relevant until the encoding has been # detected (in which case the buffer of the underlying codec might # kick in), we're implementing buffering ourselves to avoid some # overhead. if self.decoder is None: input = self.buffer + input # Do we have to detect the encoding from the input? if self.encoding is None or not self.force: (encoding, explicit) = detectencoding_str(input, final) if encoding is None: # no encoding determined yet self.buffer = input # retry the complete input on the next call return u"" # no encoding determined yet, so no output elif encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input self.encoding = encoding self.buffer = "" # drop buffer, as the decoder might keep its own decoder = codecs.getincrementaldecoder(self.encoding) self.decoder = decoder(self._errors) if self.headerfixed: return self.decoder.decode(input, final) # If we haven't fixed the header yet, # the content of ``self.buffer`` is a ``unicode`` object output = self.buffer + self.decoder.decode(input, final) encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newoutput = _fixencoding(output, unicode(encoding), final) if newoutput is None: # retry fixing the @charset rule (but keep the decoded stuff) self.buffer = output return u"" self.headerfixed = True return newoutput def reset(self): codecs.IncrementalDecoder.reset(self) self.decoder = None self.buffer = u"".encode() self.headerfixed = False def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the real decoder too if self.decoder is not None: self.decoder.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) def getstate(self): if self.decoder is not None: state = (self.encoding, self.buffer, self.headerfixed, True, self.decoder.getstate()) else: state = (self.encoding, self.buffer, self.headerfixed, False, None) return ("", _bytes2int(marshal.dumps(state))) def setstate(self, state): state = _int2bytes(marshal.loads(state[1])) # ignore buffered input self.encoding = state[0] self.buffer = state[1] self.headerfixed = state[2] if state[3] is not None: self.decoder = codecs.getincrementaldecoder(self.encoding)(self._errors) self.decoder.setstate(state[4]) else: self.decoder = None if hasattr(codecs, "IncrementalEncoder"): class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors="strict", encoding=None): self.encoder = None self.encoding = encoding codecs.IncrementalEncoder.__init__(self, errors) # Store ``errors`` somewhere else, # because we have to hide it in a property self._errors = errors self.buffer = u"" def iterencode(self, input): for part in input: result = self.encode(part, False) if result: yield result result = self.encode(u"", True) if result: yield result def encode(self, input, final=False): if self.encoder is None: input = self.buffer + input if self.encoding is not None: # Replace encoding in the @charset rule with the specified one encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newinput = _fixencoding(input, unicode(encoding), final) if newinput is None: # @charset rule incomplete => Retry next time self.buffer = input return "" input = newinput else: # Use encoding from the @charset declaration self.encoding = detectencoding_unicode(input, final)[0] if self.encoding is not None: if self.encoding == "css": raise ValueError("css not allowed as encoding name") info = codecs.lookup(self.encoding) encoding = self.encoding if self.encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, u"utf-8", True) self.encoder = info.incrementalencoder(self._errors) self.buffer = u"" else: self.buffer = input return "" return self.encoder.encode(input, final) def reset(self): codecs.IncrementalEncoder.reset(self) self.encoder = None self.buffer = u"" def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors ``must be done on the real encoder too if self.encoder is not None: self.encoder.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) def getstate(self): if self.encoder is not None: state = (self.encoding, self.buffer, True, self.encoder.getstate()) else: state = (self.encoding, self.buffer, False, None) return _bytes2int(marshal.dumps(state)) def setstate(self, state): state = _int2bytes(marshal.loads(state)) self.encoding = state[0] self.buffer = state[1] if state[2] is not None: self.encoder = codecs.getincrementalencoder(self.encoding)(self._errors) self.encoder.setstate(state[4]) else: self.encoder = None class StreamWriter(codecs.StreamWriter): def __init__(self, stream, errors="strict", encoding=None, header=False): codecs.StreamWriter.__init__(self, stream, errors) self.streamwriter = None self.encoding = encoding self._errors = errors self.buffer = u"" def encode(self, input, errors='strict'): li = len(input) if self.streamwriter is None: input = self.buffer + input li = len(input) if self.encoding is not None: # Replace encoding in the @charset rule with the specified one encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newinput = _fixencoding(input, unicode(encoding), False) if newinput is None: # @charset rule incomplete => Retry next time self.buffer = input return ("", 0) input = newinput else: # Use encoding from the @charset declaration self.encoding = detectencoding_unicode(input, False)[0] if self.encoding is not None: if self.encoding == "css": raise ValueError("css not allowed as encoding name") self.streamwriter = codecs.getwriter(self.encoding)(self.stream, self._errors) encoding = self.encoding if self.encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, u"utf-8", True) self.buffer = u"" else: self.buffer = input return ("", 0) return (self.streamwriter.encode(input, errors)[0], li) def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the streamwriter too if self.streamwriter is not None: self.streamwriter.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) class StreamReader(codecs.StreamReader): def __init__(self, stream, errors="strict", encoding=None, force=True): codecs.StreamReader.__init__(self, stream, errors) self.streamreader = None self.encoding = encoding self.force = force self._errors = errors def decode(self, input, errors='strict'): if self.streamreader is None: if self.encoding is None or not self.force: (encoding, explicit) = detectencoding_str(input, False) if encoding is None: # no encoding determined yet return (u"", 0) # no encoding determined yet, so no output elif encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input self.encoding = encoding streamreader = codecs.getreader(self.encoding) streamreader = streamreader(self.stream, self._errors) (output, consumed) = streamreader.decode(input, errors) encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newoutput = _fixencoding(output, unicode(encoding), False) if newoutput is not None: self.streamreader = streamreader return (newoutput, consumed) return (u"", 0) # we will create a new streamreader on the next call return self.streamreader.decode(input, errors) def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the streamreader too if self.streamreader is not None: self.streamreader.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) if hasattr(codecs, "CodecInfo"): # We're running on Python 2.5 or better def search_function(name): if name == "css": return codecs.CodecInfo( name="css", encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) else: # If we're running on Python 2.4, define the utf-8-sig codec here def utf8sig_encode(input, errors='strict'): return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) def utf8sig_decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix) class UTF8SigStreamWriter(codecs.StreamWriter): def reset(self): codecs.StreamWriter.reset(self) try: del self.encode except AttributeError: pass def encode(self, input, errors='strict'): self.encode = codecs.utf_8_encode return utf8sig_encode(input, errors) class UTF8SigStreamReader(codecs.StreamReader): def reset(self): codecs.StreamReader.reset(self) try: del self.decode except AttributeError: pass def decode(self, input, errors='strict'): if len(input) < 3 and codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return (u"", 0) self.decode = codecs.utf_8_decode return utf8sig_decode(input, errors) def search_function(name): import encodings name = encodings.normalize_encoding(name) if name == "css": return (encode, decode, StreamReader, StreamWriter) elif name == "utf_8_sig": return (utf8sig_encode, utf8sig_decode, UTF8SigStreamReader, UTF8SigStreamWriter) codecs.register(search_function) # Error handler for CSS escaping def cssescape(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) return (u"".join(u"\\%06x" % ord(c) for c in exc.object[exc.start:exc.end]), exc.end) codecs.register_error("cssescape", cssescape) css-parser-1.0.4/src/css_parser/_codec3.py0000644000175000017500000005753513407702010020665 0ustar kovidkovid00000000000000#!/usr/bin/env python """Python codec for CSS.""" __docformat__ = 'restructuredtext' __author__ = 'Walter Doerwald' __version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $' import codecs import marshal # We're using bits to store all possible candidate encodings (or variants, i.e. # we have two bits for the variants of UTF-16 and two for the # variants of UTF-32). # # Prefixes for various CSS encodings # UTF-8-SIG xEF xBB xBF # UTF-16 (LE) xFF xFE ~x00|~x00 # UTF-16 (BE) xFE xFF # UTF-16-LE @ x00 @ x00 # UTF-16-BE x00 @ # UTF-32 (LE) xFF xFE x00 x00 # UTF-32 (BE) x00 x00 xFE xFF # UTF-32-LE @ x00 x00 x00 # UTF-32-BE x00 x00 x00 @ # CHARSET @ c h a ... def chars(bytestring): return ''.join(chr(byte) for byte in bytestring) def detectencoding_str(input, final=False): """ Detect the encoding of the byte string ``input``, which contains the beginning of a CSS file. This function returns the detected encoding (or ``None`` if it hasn't got enough data), and a flag that indicates whether that encoding has been detected explicitely or implicitely. To detect the encoding the first few bytes are used (or if ``input`` is ASCII compatible and starts with a charset rule the encoding name from the rule). "Explicit" detection means that the bytes start with a BOM or a charset rule. If the encoding can't be detected yet, ``None`` is returned as the encoding. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``detectencoding_str()`` will never return ``None`` as the encoding. """ # A bit for every candidate CANDIDATE_UTF_8_SIG = 1 CANDIDATE_UTF_16_AS_LE = 2 CANDIDATE_UTF_16_AS_BE = 4 CANDIDATE_UTF_16_LE = 8 CANDIDATE_UTF_16_BE = 16 CANDIDATE_UTF_32_AS_LE = 32 CANDIDATE_UTF_32_AS_BE = 64 CANDIDATE_UTF_32_LE = 128 CANDIDATE_UTF_32_BE = 256 CANDIDATE_CHARSET = 512 candidates = 1023 # all candidates # input = chars(input) li = len(input) if li >= 1: # Check first byte c = input[0] if c != b"\xef"[0]: candidates &= ~CANDIDATE_UTF_8_SIG if c != b"\xff"[0]: candidates &= ~(CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_16_AS_LE) if c != b"\xfe"[0]: candidates &= ~CANDIDATE_UTF_16_AS_BE if c != b"@"[0]: candidates &= ~(CANDIDATE_UTF_32_LE | CANDIDATE_UTF_16_LE | CANDIDATE_CHARSET) if c != b"\x00"[0]: candidates &= ~(CANDIDATE_UTF_32_AS_BE | CANDIDATE_UTF_32_BE | CANDIDATE_UTF_16_BE) if li >= 2: # Check second byte c = input[1] if c != b"\xbb"[0]: candidates &= ~CANDIDATE_UTF_8_SIG if c != b"\xfe"[0]: candidates &= ~(CANDIDATE_UTF_16_AS_LE | CANDIDATE_UTF_32_AS_LE) if c != b"\xff"[0]: candidates &= ~CANDIDATE_UTF_16_AS_BE if c != b"\x00"[0]: candidates &= ~(CANDIDATE_UTF_16_LE | CANDIDATE_UTF_32_AS_BE | CANDIDATE_UTF_32_LE | CANDIDATE_UTF_32_BE) if c != b"@"[0]: candidates &= ~CANDIDATE_UTF_16_BE if c != b"c"[0]: candidates &= ~CANDIDATE_CHARSET if li >= 3: # Check third byte c = input[2] if c != b"\xbf"[0]: candidates &= ~CANDIDATE_UTF_8_SIG if c != b"c"[0]: candidates &= ~CANDIDATE_UTF_16_LE if c != b"\x00"[0]: candidates &= ~(CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_32_LE | CANDIDATE_UTF_32_BE) if c != b"\xfe"[0]: candidates &= ~CANDIDATE_UTF_32_AS_BE if c != b"h"[0]: candidates &= ~CANDIDATE_CHARSET if li >= 4: # Check fourth byte c = input[3] if input[2:4] == b"\x00\x00"[0:2]: candidates &= ~CANDIDATE_UTF_16_AS_LE if c != b"\x00"[0]: candidates &= ~(CANDIDATE_UTF_16_LE | CANDIDATE_UTF_32_AS_LE | CANDIDATE_UTF_32_LE) if c != b"\xff"[0]: candidates &= ~CANDIDATE_UTF_32_AS_BE if c != b"@"[0]: candidates &= ~CANDIDATE_UTF_32_BE if c != b"a"[0]: candidates &= ~CANDIDATE_CHARSET if candidates == 0: return ("utf-8", False) if not (candidates & (candidates-1)): # only one candidate remaining if candidates == CANDIDATE_UTF_8_SIG and li >= 3: return ("utf-8-sig", True) elif candidates == CANDIDATE_UTF_16_AS_LE and li >= 2: return ("utf-16", True) elif candidates == CANDIDATE_UTF_16_AS_BE and li >= 2: return ("utf-16", True) elif candidates == CANDIDATE_UTF_16_LE and li >= 4: return ("utf-16-le", False) elif candidates == CANDIDATE_UTF_16_BE and li >= 2: return ("utf-16-be", False) elif candidates == CANDIDATE_UTF_32_AS_LE and li >= 4: return ("utf-32", True) elif candidates == CANDIDATE_UTF_32_AS_BE and li >= 4: return ("utf-32", True) elif candidates == CANDIDATE_UTF_32_LE and li >= 4: return ("utf-32-le", False) elif candidates == CANDIDATE_UTF_32_BE and li >= 4: return ("utf-32-be", False) elif candidates == CANDIDATE_CHARSET and li >= 4: prefix = '@charset "' charsinput = chars(input) if charsinput[:len(prefix)] == prefix: pos = charsinput.find('"', len(prefix)) if pos >= 0: # TODO: return str and not bytes! return (charsinput[len(prefix):pos], True) # if this is the last call, and we haven't determined an encoding yet, # we default to UTF-8 if final: return ("utf-8", False) return (None, False) # dont' know yet def detectencoding_unicode(input, final=False): """ Detect the encoding of the unicode string ``input``, which contains the beginning of a CSS file. The encoding is detected from the charset rule at the beginning of ``input``. If there is no charset rule, ``"utf-8"`` will be returned. If the encoding can't be detected yet, ``None`` is returned. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``detectencoding_unicode()`` will never return ``None``. """ prefix = '@charset "' if input.startswith(prefix): pos = input.find('"', len(prefix)) if pos >= 0: return (input[len(prefix):pos], True) elif final or not prefix.startswith(input): # if this is the last call, and we haven't determined an encoding yet, # (or the string definitely doesn't start with prefix) we default to UTF-8 return ("utf-8", False) return (None, False) # don't know yet def _fixencoding(input, encoding, final=False): """ Replace the name of the encoding in the charset rule at the beginning of ``input`` with ``encoding``. If ``input`` doesn't starts with a charset rule, ``input`` will be returned unmodified. If the encoding can't be found yet, ``None`` is returned. ``final`` specifies whether more data will be available in later calls or not. If ``final`` is true, ``_fixencoding()`` will never return ``None``. """ prefix = '@charset "' if len(input) > len(prefix): if input.startswith(prefix): pos = input.find('"', len(prefix)) if pos >= 0: if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" return prefix + encoding + input[pos:] # we haven't seen the end of the encoding name yet => fall through else: return input # doesn't start with prefix, so nothing to fix elif not prefix.startswith(input) or final: # can't turn out to be a @charset rule later (or there is no "later") return input if final: return input return None # don't know yet def decode(input, errors="strict", encoding=None, force=True): try: # py 3 only, memory?! object to bytes input = input.tobytes() except AttributeError: pass if encoding is None or not force: (_encoding, explicit) = detectencoding_str(input, True) if _encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not force) or encoding is None: # Take the encoding from the input encoding = _encoding # NEEDS: change in parse.py (str to bytes!) (input, consumed) = codecs.getdecoder(encoding)(input, errors) return (_fixencoding(input, str(encoding), True), consumed) def encode(input, errors="strict", encoding=None): consumed = len(input) if encoding is None: encoding = detectencoding_unicode(input, True)[0] if encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, "utf-8", True) else: input = _fixencoding(input, str(encoding), True) if encoding == "css": raise ValueError("css not allowed as encoding name") encoder = codecs.getencoder(encoding) return (encoder(input, errors)[0], consumed) def _bytes2int(bytes): # Helper: convert an 8 bit string into an ``int``. i = 0 for byte in bytes: i = (i << 8) + ord(byte) return i def _int2bytes(i): # Helper: convert an ``int`` into an 8-bit string. v = [] while i: v.insert(0, chr(i & 0xff)) i >>= 8 return "".join(v) if hasattr(codecs, "IncrementalDecoder"): class IncrementalDecoder(codecs.IncrementalDecoder): def __init__(self, errors="strict", encoding=None, force=True): self.decoder = None self.encoding = encoding self.force = force codecs.IncrementalDecoder.__init__(self, errors) # Store ``errors`` somewhere else, # because we have to hide it in a property self._errors = errors self.buffer = b"" self.headerfixed = False def iterdecode(self, input): for part in input: result = self.decode(part, False) if result: yield result result = self.decode("", True) if result: yield result def decode(self, input, final=False): # We're doing basically the same as a ``BufferedIncrementalDecoder``, # but since the buffer is only relevant until the encoding has been # detected (in which case the buffer of the underlying codec might # kick in), we're implementing buffering ourselves to avoid some # overhead. if self.decoder is None: input = self.buffer + input # Do we have to detect the encoding from the input? if self.encoding is None or not self.force: (encoding, explicit) = detectencoding_str(input, final) if encoding is None: # no encoding determined yet self.buffer = input # retry the complete input on the next call return "" # no encoding determined yet, so no output elif encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input self.encoding = encoding self.buffer = "" # drop buffer, as the decoder might keep its own decoder = codecs.getincrementaldecoder(self.encoding) self.decoder = decoder(self._errors) if self.headerfixed: return self.decoder.decode(input, final) # If we haven't fixed the header yet, # the content of ``self.buffer`` is a ``unicode`` object output = self.buffer + self.decoder.decode(input, final) encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newoutput = _fixencoding(output, str(encoding), final) if newoutput is None: # retry fixing the @charset rule (but keep the decoded stuff) self.buffer = output return "" self.headerfixed = True return newoutput def reset(self): codecs.IncrementalDecoder.reset(self) self.decoder = None self.buffer = b"" self.headerfixed = False def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the real decoder too if self.decoder is not None: self.decoder.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) def getstate(self): if self.decoder is not None: state = (self.encoding, self.buffer, self.headerfixed, True, self.decoder.getstate()) else: state = (self.encoding, self.buffer, self.headerfixed, False, None) return ("", _bytes2int(marshal.dumps(state))) def setstate(self, state): state = _int2bytes(marshal.loads(state[1])) # ignore buffered input self.encoding = state[0] self.buffer = state[1] self.headerfixed = state[2] if state[3] is not None: self.decoder = codecs.getincrementaldecoder(self.encoding)(self._errors) self.decoder.setstate(state[4]) else: self.decoder = None if hasattr(codecs, "IncrementalEncoder"): class IncrementalEncoder(codecs.IncrementalEncoder): def __init__(self, errors="strict", encoding=None): self.encoder = None self.encoding = encoding codecs.IncrementalEncoder.__init__(self, errors) # Store ``errors`` somewhere else, # because we have to hide it in a property self._errors = errors self.buffer = "" def iterencode(self, input): for part in input: result = self.encode(part, False) if result: yield result result = self.encode("", True) if result: yield result def encode(self, input, final=False): if self.encoder is None: input = self.buffer + input if self.encoding is not None: # Replace encoding in the @charset rule with the specified one encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newinput = _fixencoding(input, str(encoding), final) if newinput is None: # @charset rule incomplete => Retry next time self.buffer = input return "" input = newinput else: # Use encoding from the @charset declaration self.encoding = detectencoding_unicode(input, final)[0] if self.encoding is not None: if self.encoding == "css": raise ValueError("css not allowed as encoding name") info = codecs.lookup(self.encoding) encoding = self.encoding if self.encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, "utf-8", True) self.encoder = info.incrementalencoder(self._errors) self.buffer = "" else: self.buffer = input return "" return self.encoder.encode(input, final) def reset(self): codecs.IncrementalEncoder.reset(self) self.encoder = None self.buffer = "" def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors ``must be done on the real encoder too if self.encoder is not None: self.encoder.errors = errors self._errors = errors errors = property(_geterrors, _seterrors) def getstate(self): if self.encoder is not None: state = (self.encoding, self.buffer, True, self.encoder.getstate()) else: state = (self.encoding, self.buffer, False, None) return _bytes2int(marshal.dumps(state)) def setstate(self, state): state = _int2bytes(marshal.loads(state)) self.encoding = state[0] self.buffer = state[1] if state[2] is not None: self.encoder = codecs.getincrementalencoder(self.encoding)(self._errors) self.encoder.setstate(state[4]) else: self.encoder = None class StreamWriter(codecs.StreamWriter): def __init__(self, stream, errors="strict", encoding=None, header=False): codecs.StreamWriter.__init__(self, stream, errors) self.streamwriter = None self.encoding = encoding self._errors = errors self.buffer = "" def encode(self, input, errors='strict'): li = len(input) if self.streamwriter is None: input = self.buffer + input li = len(input) if self.encoding is not None: # Replace encoding in the @charset rule with the specified one encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newinput = _fixencoding(input, str(encoding), False) if newinput is None: # @charset rule incomplete => Retry next time self.buffer = input return ("", 0) input = newinput else: # Use encoding from the @charset declaration self.encoding = detectencoding_unicode(input, False)[0] if self.encoding is not None: if self.encoding == "css": raise ValueError("css not allowed as encoding name") self.streamwriter = codecs.getwriter(self.encoding)(self.stream, self._errors) encoding = self.encoding if self.encoding.replace("_", "-").lower() == "utf-8-sig": input = _fixencoding(input, "utf-8", True) self.buffer = "" else: self.buffer = input return ("", 0) return (self.streamwriter.encode(input, errors)[0], li) def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the streamwriter too try: if self.streamwriter is not None: self.streamwriter.errors = errors except AttributeError: # TODO: py3 only exception? pass self._errors = errors errors = property(_geterrors, _seterrors) class StreamReader(codecs.StreamReader): def __init__(self, stream, errors="strict", encoding=None, force=True): codecs.StreamReader.__init__(self, stream, errors) self.streamreader = None self.encoding = encoding self.force = force self._errors = errors def decode(self, input, errors='strict'): if self.streamreader is None: if self.encoding is None or not self.force: (encoding, explicit) = detectencoding_str(input, False) if encoding is None: # no encoding determined yet return ("", 0) # no encoding determined yet, so no output elif encoding == "css": raise ValueError("css not allowed as encoding name") if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input self.encoding = encoding streamreader = codecs.getreader(self.encoding) streamreader = streamreader(self.stream, self._errors) (output, consumed) = streamreader.decode(input, errors) encoding = self.encoding if encoding.replace("_", "-").lower() == "utf-8-sig": encoding = "utf-8" newoutput = _fixencoding(output, str(encoding), False) if newoutput is not None: self.streamreader = streamreader return (newoutput, consumed) return ("", 0) # we will create a new streamreader on the next call return self.streamreader.decode(input, errors) def _geterrors(self): return self._errors def _seterrors(self, errors): # Setting ``errors`` must be done on the streamreader too try: if self.streamreader is not None: self.streamreader.errors = errors except AttributeError: # TODO: py3 only exception? pass self._errors = errors errors = property(_geterrors, _seterrors) if hasattr(codecs, "CodecInfo"): # We're running on Python 2.5 or better def search_function(name): if name == "css": return codecs.CodecInfo( name="css", encode=encode, decode=decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader, ) else: # If we're running on Python 2.4, define the utf-8-sig codec here def utf8sig_encode(input, errors='strict'): return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) def utf8sig_decode(input, errors='strict'): prefix = 0 if input[:3] == codecs.BOM_UTF8: input = input[3:] prefix = 3 (output, consumed) = codecs.utf_8_decode(input, errors, True) return (output, consumed+prefix) class UTF8SigStreamWriter(codecs.StreamWriter): def reset(self): codecs.StreamWriter.reset(self) try: del self.encode except AttributeError: pass def encode(self, input, errors='strict'): self.encode = codecs.utf_8_encode return utf8sig_encode(input, errors) class UTF8SigStreamReader(codecs.StreamReader): def reset(self): codecs.StreamReader.reset(self) try: del self.decode except AttributeError: pass def decode(self, input, errors='strict'): if len(input) < 3 and codecs.BOM_UTF8.startswith(input): # not enough data to decide if this is a BOM # => try again on the next call return ("", 0) self.decode = codecs.utf_8_decode return utf8sig_decode(input, errors) def search_function(name): import encodings name = encodings.normalize_encoding(name) if name == "css": return (encode, decode, StreamReader, StreamWriter) elif name == "utf_8_sig": return (utf8sig_encode, utf8sig_decode, UTF8SigStreamReader, UTF8SigStreamWriter) codecs.register(search_function) # Error handler for CSS escaping def cssescape(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) return ("".join("\\%06x" % ord(c) for c in exc.object[exc.start:exc.end]), exc.end) codecs.register_error("cssescape", cssescape) css-parser-1.0.4/src/css_parser/_fetch.py0000644000175000017500000000430513407702566020621 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function from . import errorhandler import css_parser.encutils as encutils from css_parser.version import VERSION """Default URL reading functions""" __all__ = ['_defaultFetcher'] __docformat__ = 'restructuredtext' __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $' import sys if sys.version_info[0] >= 3: from urllib.request import urlopen as urllib_urlopen from urllib.request import Request as urllib_Request from urllib.error import HTTPError as urllib_HTTPError from urllib.error import URLError as urllib_URLError else: from urllib2 import urlopen as urllib_urlopen from urllib2 import Request as urllib_Request from urllib2 import HTTPError as urllib_HTTPError from urllib2 import URLError as urllib_URLError log = errorhandler.ErrorHandler() def _defaultFetcher(url): """Retrieve data from ``url``. css_parser default implementation of fetch URL function. Returns ``(encoding, string)`` or ``None`` """ try: request = urllib_Request(url) request.add_header('User-agent', 'css_parser %s (http://www.cthedot.de/css_parser/)' % VERSION) res = urllib_urlopen(request) except urllib_HTTPError as e: # http error, e.g. 404, e can be raised log.warn('HTTPError opening url=%s: %s %s' % (url, e.code, e.msg), error=e) except urllib_URLError as e: # URLError like mailto: or other IO errors, e can be raised log.warn('URLError, %s' % e.reason, error=e) except OSError as e: # e.g if file URL and not found log.warn(e, error=OSError) except ValueError as e: # invalid url, e.g. "1" log.warn('ValueError, %s' % e.args[0], error=ValueError) else: if res: mimeType, encoding = encutils.getHTTPInfo(res) if mimeType != 'text/css': log.error('Expected "text/css" mime type for url=%r but found: %r' % (url, mimeType), error=ValueError) content = res.read() if hasattr(res, 'close'): res.close() return encoding, content css-parser-1.0.4/src/css_parser/_fetchgae.py0000644000175000017500000000516213407702010021260 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function from . import errorhandler import cgi from google.appengine.api import urlfetch """GAE specific URL reading functions""" import sys PY3 = sys.version_info[0] >= 3 __all__ = ['_defaultFetcher'] __docformat__ = 'restructuredtext' __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $' # raises ImportError of not on GAE log = errorhandler.ErrorHandler() def _defaultFetcher(url): """ uses GoogleAppEngine (GAE) fetch(url, payload=None, method=GET, headers={}, allow_truncated=False) Response content The body content of the response. content_was_truncated True if the allow_truncated parameter to fetch() was True and the response exceeded the maximum response size. In this case, the content attribute contains the truncated response. status_code The HTTP status code. headers The HTTP response headers, as a mapping of names to values. Exceptions exception InvalidURLError() The URL of the request was not a valid URL, or it used an unsupported method. Only http and https URLs are supported. exception DownloadError() There was an error retrieving the data. This exception is not raised if the server returns an HTTP error code: In that case, the response data comes back intact, including the error code. exception ResponseTooLargeError() The response data exceeded the maximum allowed size, and the allow_truncated parameter passed to fetch() was False. """ # from google.appengine.api import urlfetch try: r = urlfetch.fetch(url, method=urlfetch.GET) except urlfetch.Error as e: log.warn('Error opening url=%r: %s' % (url, e), error=IOError) else: if r.status_code == 200: # find mimetype and encoding mimetype = 'application/octet-stream' try: mimetype, params = cgi.parse_header(r.headers['content-type']) encoding = params['charset'] except KeyError: encoding = None if mimetype != 'text/css': log.error('Expected "text/css" mime type for url %r but found: %r' % (url, mimetype), error=ValueError) return encoding, r.content else: # TODO: 301 etc log.warn('Error opening url=%r: HTTP status %s' % (url, r.status_code), error=IOError) css-parser-1.0.4/src/css_parser/codec.py0000644000175000017500000000071713407702010020431 0ustar kovidkovid00000000000000#!/usr/bin/env python """Python codec for CSS.""" from __future__ import absolute_import __docformat__ = 'restructuredtext' __author__ = 'Walter Doerwald' __version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $' import sys if sys.version_info < (3,): from ._codec2 import * # noqa # for tests from ._codec2 import _fixencoding # noqa else: from ._codec3 import * # noqa # for tests from ._codec3 import _fixencoding # noqa css-parser-1.0.4/src/css_parser/css/0000755000175000017500000000000013413156014017571 5ustar kovidkovid00000000000000css-parser-1.0.4/src/css_parser/css/__init__.py0000644000175000017500000000426513407702010021705 0ustar kovidkovid00000000000000"""Implements Document Object Model Level 2 CSS http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/css.html currently implemented - CSSStyleSheet - CSSRuleList - CSSRule - CSSComment (css_parser addon) - CSSCharsetRule - CSSFontFaceRule - CSSImportRule - CSSMediaRule - CSSNamespaceRule (WD) - CSSPageRule - CSSStyleRule - CSSUnkownRule - Selector and SelectorList - CSSStyleDeclaration - CSS2Properties - CSSValue - CSSPrimitiveValue - CSSValueList - CSSVariablesRule - CSSVariablesDeclaration todo - RGBColor, Rect, Counter """ from __future__ import division, absolute_import, print_function __all__ = [ 'CSSStyleSheet', 'CSSRuleList', 'CSSRule', 'CSSComment', 'CSSCharsetRule', 'CSSFontFaceRule', 'CSSImportRule', 'CSSMediaRule', 'CSSNamespaceRule', 'CSSPageRule', 'MarginRule', 'CSSStyleRule', 'CSSUnknownRule', 'CSSVariablesRule', 'CSSVariablesDeclaration', 'Selector', 'SelectorList', 'CSSStyleDeclaration', 'Property', # 'CSSValue', 'CSSPrimitiveValue', 'CSSValueList' 'PropertyValue', 'Value', 'ColorValue', 'DimensionValue', 'URIValue', 'CSSFunction', 'CSSVariable', 'MSValue', ] __docformat__ = 'restructuredtext' __version__ = '$Id$' from .cssstylesheet import CSSStyleSheet from .cssrulelist import CSSRuleList from .cssrule import CSSRule from .csscomment import CSSComment from .csscharsetrule import CSSCharsetRule from .cssfontfacerule import CSSFontFaceRule from .cssimportrule import CSSImportRule from .cssmediarule import CSSMediaRule from .cssnamespacerule import CSSNamespaceRule from .csspagerule import CSSPageRule from .marginrule import MarginRule from .cssstylerule import CSSStyleRule from .cssvariablesrule import CSSVariablesRule from .cssunknownrule import CSSUnknownRule from .selector import Selector from .selectorlist import SelectorList from .cssstyledeclaration import CSSStyleDeclaration from .cssvariablesdeclaration import CSSVariablesDeclaration from .property import Property from .value import PropertyValue, Value, ColorValue, DimensionValue, URIValue, CSSFunction, CSSVariable, MSValue css-parser-1.0.4/src/css_parser/css/colors.py0000644000175000017500000001465013407702010021446 0ustar kovidkovid00000000000000# -*- coding: utf-8 -*- """ Built from something like this: print [ ( row[2].text_content().strip(), eval(row[4].text_content().strip()) ) for row in lxml.html.parse('http://www.w3.org/TR/css3-color/') .xpath("//*[@class='colortable']//tr[position()>1]") ] by Simon Sapin """ from __future__ import unicode_literals, division, absolute_import, print_function COLORS = { 'transparent': (0, 0, 0, 0.0), 'black': (0, 0, 0, 1.0), 'silver': (192, 192, 192, 1.0), 'gray': (128, 128, 128, 1.0), 'white': (255, 255, 255, 1.0), 'maroon': (128, 0, 0, 1.0), 'red': (255, 0, 0, 1.0), 'purple': (128, 0, 128, 1.0), 'fuchsia': (255, 0, 255, 1.0), 'green': (0, 128, 0, 1.0), 'lime': (0, 255, 0, 1.0), 'olive': (128, 128, 0, 1.0), 'yellow': (255, 255, 0, 1.0), 'navy': (0, 0, 128, 1.0), 'blue': (0, 0, 255, 1.0), 'teal': (0, 128, 128, 1.0), 'aqua': (0, 255, 255, 1.0), 'aliceblue': (240, 248, 255, 1.0), 'antiquewhite': (250, 235, 215, 1.0), 'aqua': (0, 255, 255, 1.0), 'aquamarine': (127, 255, 212, 1.0), 'azure': (240, 255, 255, 1.0), 'beige': (245, 245, 220, 1.0), 'bisque': (255, 228, 196, 1.0), 'black': (0, 0, 0, 1.0), 'blanchedalmond': (255, 235, 205, 1.0), 'blue': (0, 0, 255, 1.0), 'blueviolet': (138, 43, 226, 1.0), 'brown': (165, 42, 42, 1.0), 'burlywood': (222, 184, 135, 1.0), 'cadetblue': (95, 158, 160, 1.0), 'chartreuse': (127, 255, 0, 1.0), 'chocolate': (210, 105, 30, 1.0), 'coral': (255, 127, 80, 1.0), 'cornflowerblue': (100, 149, 237, 1.0), 'cornsilk': (255, 248, 220, 1.0), 'crimson': (220, 20, 60, 1.0), 'cyan': (0, 255, 255, 1.0), 'darkblue': (0, 0, 139, 1.0), 'darkcyan': (0, 139, 139, 1.0), 'darkgoldenrod': (184, 134, 11, 1.0), 'darkgray': (169, 169, 169, 1.0), 'darkgreen': (0, 100, 0, 1.0), 'darkgrey': (169, 169, 169, 1.0), 'darkkhaki': (189, 183, 107, 1.0), 'darkmagenta': (139, 0, 139, 1.0), 'darkolivegreen': (85, 107, 47, 1.0), 'darkorange': (255, 140, 0, 1.0), 'darkorchid': (153, 50, 204, 1.0), 'darkred': (139, 0, 0, 1.0), 'darksalmon': (233, 150, 122, 1.0), 'darkseagreen': (143, 188, 143, 1.0), 'darkslateblue': (72, 61, 139, 1.0), 'darkslategray': (47, 79, 79, 1.0), 'darkslategrey': (47, 79, 79, 1.0), 'darkturquoise': (0, 206, 209, 1.0), 'darkviolet': (148, 0, 211, 1.0), 'deeppink': (255, 20, 147, 1.0), 'deepskyblue': (0, 191, 255, 1.0), 'dimgray': (105, 105, 105, 1.0), 'dimgrey': (105, 105, 105, 1.0), 'dodgerblue': (30, 144, 255, 1.0), 'firebrick': (178, 34, 34, 1.0), 'floralwhite': (255, 250, 240, 1.0), 'forestgreen': (34, 139, 34, 1.0), 'fuchsia': (255, 0, 255, 1.0), 'gainsboro': (220, 220, 220, 1.0), 'ghostwhite': (248, 248, 255, 1.0), 'gold': (255, 215, 0, 1.0), 'goldenrod': (218, 165, 32, 1.0), 'gray': (128, 128, 128, 1.0), 'green': (0, 128, 0, 1.0), 'greenyellow': (173, 255, 47, 1.0), 'grey': (128, 128, 128, 1.0), 'honeydew': (240, 255, 240, 1.0), 'hotpink': (255, 105, 180, 1.0), 'indianred': (205, 92, 92, 1.0), 'indigo': (75, 0, 130, 1.0), 'ivory': (255, 255, 240, 1.0), 'khaki': (240, 230, 140, 1.0), 'lavender': (230, 230, 250, 1.0), 'lavenderblush': (255, 240, 245, 1.0), 'lawngreen': (124, 252, 0, 1.0), 'lemonchiffon': (255, 250, 205, 1.0), 'lightblue': (173, 216, 230, 1.0), 'lightcoral': (240, 128, 128, 1.0), 'lightcyan': (224, 255, 255, 1.0), 'lightgoldenrodyellow': (250, 250, 210, 1.0), 'lightgray': (211, 211, 211, 1.0), 'lightgreen': (144, 238, 144, 1.0), 'lightgrey': (211, 211, 211, 1.0), 'lightpink': (255, 182, 193, 1.0), 'lightsalmon': (255, 160, 122, 1.0), 'lightseagreen': (32, 178, 170, 1.0), 'lightskyblue': (135, 206, 250, 1.0), 'lightslategray': (119, 136, 153, 1.0), 'lightslategrey': (119, 136, 153, 1.0), 'lightsteelblue': (176, 196, 222, 1.0), 'lightyellow': (255, 255, 224, 1.0), 'lime': (0, 255, 0, 1.0), 'limegreen': (50, 205, 50, 1.0), 'linen': (250, 240, 230, 1.0), 'magenta': (255, 0, 255, 1.0), 'maroon': (128, 0, 0, 1.0), 'mediumaquamarine': (102, 205, 170, 1.0), 'mediumblue': (0, 0, 205, 1.0), 'mediumorchid': (186, 85, 211, 1.0), 'mediumpurple': (147, 112, 219, 1.0), 'mediumseagreen': (60, 179, 113, 1.0), 'mediumslateblue': (123, 104, 238, 1.0), 'mediumspringgreen': (0, 250, 154, 1.0), 'mediumturquoise': (72, 209, 204, 1.0), 'mediumvioletred': (199, 21, 133, 1.0), 'midnightblue': (25, 25, 112, 1.0), 'mintcream': (245, 255, 250, 1.0), 'mistyrose': (255, 228, 225, 1.0), 'moccasin': (255, 228, 181, 1.0), 'navajowhite': (255, 222, 173, 1.0), 'navy': (0, 0, 128, 1.0), 'oldlace': (253, 245, 230, 1.0), 'olive': (128, 128, 0, 1.0), 'olivedrab': (107, 142, 35, 1.0), 'orange': (255, 165, 0, 1.0), 'orangered': (255, 69, 0, 1.0), 'orchid': (218, 112, 214, 1.0), 'palegoldenrod': (238, 232, 170, 1.0), 'palegreen': (152, 251, 152, 1.0), 'paleturquoise': (175, 238, 238, 1.0), 'palevioletred': (219, 112, 147, 1.0), 'papayawhip': (255, 239, 213, 1.0), 'peachpuff': (255, 218, 185, 1.0), 'peru': (205, 133, 63, 1.0), 'pink': (255, 192, 203, 1.0), 'plum': (221, 160, 221, 1.0), 'powderblue': (176, 224, 230, 1.0), 'purple': (128, 0, 128, 1.0), 'red': (255, 0, 0, 1.0), 'rosybrown': (188, 143, 143, 1.0), 'royalblue': (65, 105, 225, 1.0), 'saddlebrown': (139, 69, 19, 1.0), 'salmon': (250, 128, 114, 1.0), 'sandybrown': (244, 164, 96, 1.0), 'seagreen': (46, 139, 87, 1.0), 'seashell': (255, 245, 238, 1.0), 'sienna': (160, 82, 45, 1.0), 'silver': (192, 192, 192, 1.0), 'skyblue': (135, 206, 235, 1.0), 'slateblue': (106, 90, 205, 1.0), 'slategray': (112, 128, 144, 1.0), 'slategrey': (112, 128, 144, 1.0), 'snow': (255, 250, 250, 1.0), 'springgreen': (0, 255, 127, 1.0), 'steelblue': (70, 130, 180, 1.0), 'tan': (210, 180, 140, 1.0), 'teal': (0, 128, 128, 1.0), 'thistle': (216, 191, 216, 1.0), 'tomato': (255, 99, 71, 1.0), 'turquoise': (64, 224, 208, 1.0), 'violet': (238, 130, 238, 1.0), 'wheat': (245, 222, 179, 1.0), 'white': (255, 255, 255, 1.0), 'whitesmoke': (245, 245, 245, 1.0), 'yellow': (255, 255, 0, 1.0), 'yellowgreen': (154, 205, 50, 1.0), } css-parser-1.0.4/src/css_parser/css/csscharsetrule.py0000644000175000017500000001376313407702010023203 0ustar kovidkovid00000000000000"""CSSCharsetRule implements DOM Level 2 CSS CSSCharsetRule.""" from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSCharsetRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import codecs from . import cssrule import css_parser import xml.dom class CSSCharsetRule(cssrule.CSSRule): """ The CSSCharsetRule interface represents an @charset rule in a CSS style sheet. The value of the encoding attribute does not affect the encoding of text data in the DOM objects; this encoding is always UTF-16 (also in Python?). After a stylesheet is loaded, the value of the encoding attribute is the value found in the @charset rule. If there was no @charset in the original document, then no CSSCharsetRule is created. The value of the encoding attribute may also be used as a hint for the encoding used on serialization of the style sheet. The value of the @charset rule (and therefore of the CSSCharsetRule) may not correspond to the encoding the document actually came in; character encoding information e.g. in an HTTP header, has priority (see CSS document representation) but this is not reflected in the CSSCharsetRule. This rule is not really needed anymore as setting :attr:`CSSStyleSheet.encoding` is much easier. Format:: charsetrule: CHARSET_SYM S* STRING S* ';' BUT: Only valid format is (single space, double quotes!):: @charset "ENCODING"; """ def __init__(self, encoding=None, parentRule=None, parentStyleSheet=None, readonly=False): """ :param encoding: a valid character encoding :param readonly: defaults to False, not used yet """ super(CSSCharsetRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@charset' if encoding: self.encoding = encoding else: self._encoding = None self._readonly = readonly def __repr__(self): return "css_parser.css.%s(encoding=%r)" % ( self.__class__.__name__, self.encoding) def __str__(self): return "" % ( self.__class__.__name__, self.encoding, id(self)) def _getCssText(self): """The parsable textual representation.""" return css_parser.ser.do_CSSCharsetRule(self) def _setCssText(self, cssText): """ :param cssText: A parsable DOMString. :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSCharsetRule, self)._setCssText(cssText) wellformed = True tokenizer = self._tokenize2(cssText) if self._type(self._nexttoken(tokenizer)) != self._prods.CHARSET_SYM: wellformed = False self._log.error('CSSCharsetRule must start with "@charset "', error=xml.dom.InvalidModificationErr) encodingtoken = self._nexttoken(tokenizer) encodingtype = self._type(encodingtoken) encoding = self._stringtokenvalue(encodingtoken) if self._prods.STRING != encodingtype or not encoding: wellformed = False self._log.error('CSSCharsetRule: no encoding found; %r.' % self._valuestr(cssText)) semicolon = self._tokenvalue(self._nexttoken(tokenizer)) EOFtype = self._type(self._nexttoken(tokenizer)) if ';' != semicolon or EOFtype not in ('EOF', None): wellformed = False self._log.error('CSSCharsetRule: Syntax Error: %r.' % self._valuestr(cssText)) if wellformed: self.encoding = encoding cssText = property(fget=_getCssText, fset=_setCssText, doc="(DOM) The parsable textual representation.") def _setEncoding(self, encoding): """ :param encoding: a valid encoding to be used. Currently only valid Python encodings are allowed. :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this encoding rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified encoding value has a syntax error and is unparsable. """ self._checkReadonly() tokenizer = self._tokenize2(encoding) encodingtoken = self._nexttoken(tokenizer) unexpected = self._nexttoken(tokenizer) if not encodingtoken or unexpected or\ self._prods.IDENT != self._type(encodingtoken): self._log.error('CSSCharsetRule: Syntax Error in encoding value ' '%r.' % encoding) else: try: codecs.lookup(encoding) except LookupError: self._log.error('CSSCharsetRule: Unknown (Python) encoding %r.' % encoding) else: self._encoding = encoding.lower() encoding = property(lambda self: self._encoding, _setEncoding, doc="(DOM)The encoding information used in this @charset rule.") type = property(lambda self: self.CHARSET_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: bool(self.encoding)) css-parser-1.0.4/src/css_parser/css/csscomment.py0000644000175000017500000000557613407702010022327 0ustar kovidkovid00000000000000"""CSSComment is not defined in DOM Level 2 at all but a css_parser defined class only. Implements CSSRule which is also extended for a CSSComment rule type. """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSComment'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from . import cssrule import css_parser import xml.dom class CSSComment(cssrule.CSSRule): """ Represents a CSS comment (css_parser only). Format:: /*...*/ """ def __init__(self, cssText=None, parentRule=None, parentStyleSheet=None, readonly=False): super(CSSComment, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._cssText = None if cssText: self._setCssText(cssText) self._readonly = readonly def __repr__(self): return "css_parser.css.%s(cssText=%r)" % ( self.__class__.__name__, self.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.cssText, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSComment(self) def _setCssText(self, cssText): """ :param cssText: textual text to set or tokenlist which is not tokenized anymore. May also be a single token for this rule :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSComment, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) commenttoken = self._nexttoken(tokenizer) unexpected = self._nexttoken(tokenizer) if not commenttoken or\ self._type(commenttoken) != self._prods.COMMENT or\ unexpected: self._log.error('CSSComment: Not a CSSComment: %r' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: self._cssText = self._tokenvalue(commenttoken) cssText = property(_getCssText, _setCssText, doc="The parsable textual representation of this rule.") type = property(lambda self: self.COMMENT, doc="The type of this rule, as defined by a CSSRule " "type constant.") # constant but needed: wellformed = property(lambda self: True) css-parser-1.0.4/src/css_parser/css/cssfontfacerule.py0000644000175000017500000001601113407702010023324 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser from . import cssrule from .cssstyledeclaration import CSSStyleDeclaration """CSSFontFaceRule implements DOM Level 2 CSS CSSFontFaceRule. From css_parser 0.9.6 additions from CSS Fonts Module Level 3 are added http://www.w3.org/TR/css3-fonts/. """ __all__ = ['CSSFontFaceRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class CSSFontFaceRule(cssrule.CSSRule): """ The CSSFontFaceRule interface represents a @font-face rule in a CSS style sheet. The @font-face rule is used to hold a set of font descriptions. Format:: font_face : FONT_FACE_SYM S* '{' S* declaration [ ';' S* declaration ]* '}' S* ; css_parser uses a :class:`~css_parser.css.CSSStyleDeclaration` to represent the font descriptions. For validation a specific profile is used though were some properties have other valid values than when used in e.g. a :class:`~css_parser.css.CSSStyleRule`. """ def __init__(self, style=None, parentRule=None, parentStyleSheet=None, readonly=False): """ If readonly allows setting of properties in constructor only. :param style: CSSStyleDeclaration used to hold any font descriptions for this CSSFontFaceRule """ super(CSSFontFaceRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@font-face' if style: self.style = style else: self.style = CSSStyleDeclaration() self._readonly = readonly def __repr__(self): return "css_parser.css.%s(style=%r)" % ( self.__class__.__name__, self.style.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.style.cssText, self.valid, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSFontFaceRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSFontFaceRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if self._type(attoken) != self._prods.FONT_FACE_SYM: self._log.error('CSSFontFaceRule: No CSSFontFaceRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: newStyle = CSSStyleDeclaration(parentRule=self) ok = True beforetokens, brace = self._tokensupto2(tokenizer, blockstartonly=True, separateEnd=True) if self._tokenvalue(brace) != '{': ok = False self._log.error('CSSFontFaceRule: No start { of style ' 'declaration found: %r' % self._valuestr(cssText), brace) # parse stuff before { which should be comments and S only new = {'wellformed': True} newseq = self._tempSeq() beforewellformed, expected = self._parse(expected=':', seq=newseq, tokenizer=self._tokenize2(beforetokens), productions={}) ok = ok and beforewellformed and new['wellformed'] styletokens, braceorEOFtoken = self._tokensupto2(tokenizer, blockendonly=True, separateEnd=True) val, type_ = self._tokenvalue(braceorEOFtoken),\ self._type(braceorEOFtoken) if val != '}' and type_ != 'EOF': ok = False self._log.error('CSSFontFaceRule: No "}" after style ' 'declaration found: %r' % self._valuestr(cssText)) nonetoken = self._nexttoken(tokenizer) if nonetoken: ok = False self._log.error('CSSFontFaceRule: Trailing content found.', token=nonetoken) if 'EOF' == type_: # add again as style needs it styletokens.append(braceorEOFtoken) # SET, may raise: newStyle.cssText = styletokens if ok: # contains probably comments only (upto ``{``) self._setSeq(newseq) self.style = newStyle cssText = property(_getCssText, _setCssText, doc="(DOM) The parsable textual representation of this " "rule.") def _setStyle(self, style): """ :param style: a CSSStyleDeclaration or string """ self._checkReadonly() # under Python 2.X this was basestring but given unicode literals ... if isinstance(style, string_type): self._style = CSSStyleDeclaration(cssText=style, parentRule=self) else: style._parentRule = self self._style = style style = property(lambda self: self._style, _setStyle, doc="(DOM) The declaration-block of this rule set, " "a :class:`~css_parser.css.CSSStyleDeclaration`.") type = property(lambda self: self.FONT_FACE_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") def _getValid(self): needed = ['font-family', 'src'] for p in self.style.getProperties(all=True): if not p.valid: return False try: needed.remove(p.name) except ValueError: pass return not bool(needed) valid = property(_getValid, doc="CSSFontFace is valid if properties `font-family` " "and `src` are set and all properties are valid.") # constant but needed: wellformed = property(lambda self: True) css-parser-1.0.4/src/css_parser/css/cssimportrule.py0000644000175000017500000003567213407702010023067 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom """CSSImportRule implements DOM Level 2 CSS CSSImportRule plus the ``name`` property from http://www.w3.org/TR/css3-cascade/#cascading.""" __all__ = ['CSSImportRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from . import cssrule import css_parser import os import sys if sys.version_info[0] >= 3: string_type = str from urllib.parse import urljoin as urllib_urljoin else: string_type = basestring from urlparse import urljoin as urllib_urljoin class CSSImportRule(cssrule.CSSRule): """ Represents an @import rule within a CSS style sheet. The @import rule is used to import style rules from other style sheets. Format:: import : IMPORT_SYM S* [STRING|URI] S* [ medium [ COMMA S* medium]* ]? S* STRING? S* ';' S* ; """ def __init__(self, href=None, mediaText=None, name=None, parentRule=None, parentStyleSheet=None, readonly=False): """ If readonly allows setting of properties in constructor only :param href: location of the style sheet to be imported. :param mediaText: A list of media types for which this style sheet may be used as a string :param name: Additional name of imported style sheet """ super(CSSImportRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@import' self._styleSheet = None # string or uri used for reserialization self.hreftype = None # prepare seq seq = self._tempSeq() seq.append(None, 'href') # seq.append(None, 'media') seq.append(None, 'name') self._setSeq(seq) # 1. media if mediaText: self.media = mediaText else: # must be all for @import self.media = css_parser.stylesheets.MediaList(mediaText='all') # 2. name self.name = name # 3. href and styleSheet self.href = href self._readonly = readonly def __repr__(self): if self._usemedia: mediaText = self.media.mediaText else: mediaText = None return "css_parser.css.%s(href=%r, mediaText=%r, name=%r)" % ( self.__class__.__name__, self.href, mediaText, self.name) def __str__(self): if self._usemedia: mediaText = self.media.mediaText else: mediaText = None return ""\ % (self.__class__.__name__, self.href, mediaText, self.name, id(self)) _usemedia = property(lambda self: self.media.mediaText not in ('', 'all'), doc="if self.media is used (or simply empty)") def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSImportRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. """ super(CSSImportRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if self._type(attoken) != self._prods.IMPORT_SYM: self._log.error('CSSImportRule: No CSSImportRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: # for closures: must be a mutable new = {'keyword': self._tokenvalue(attoken), 'href': None, 'hreftype': None, 'media': None, 'name': None, 'wellformed': True } def __doname(seq, token): # called by _string or _ident new['name'] = self._stringtokenvalue(token) seq.append(new['name'], 'name') return ';' def _string(expected, seq, token, tokenizer=None): if 'href' == expected: # href new['href'] = self._stringtokenvalue(token) new['hreftype'] = 'string' seq.append(new['href'], 'href') return 'media name ;' elif 'name' in expected: # name return __doname(seq, token) else: new['wellformed'] = False self._log.error( 'CSSImportRule: Unexpected string.', token) return expected def _uri(expected, seq, token, tokenizer=None): # href if 'href' == expected: uri = self._uritokenvalue(token) new['hreftype'] = 'uri' new['href'] = uri seq.append(new['href'], 'href') return 'media name ;' else: new['wellformed'] = False self._log.error( 'CSSImportRule: Unexpected URI.', token) return expected def _ident(expected, seq, token, tokenizer=None): # medialist ending with ; which is checked upon too if expected.startswith('media'): mediatokens = self._tokensupto2( tokenizer, importmediaqueryendonly=True) mediatokens.insert(0, token) # push found token last = mediatokens.pop() # retrieve ; lastval, lasttyp = self._tokenvalue(last), self._type(last) if lastval != ';' and lasttyp not in ('EOF', self._prods.STRING): new['wellformed'] = False self._log.error('CSSImportRule: No ";" found: %s' % self._valuestr(cssText), token=token) newMedia = css_parser.stylesheets.MediaList(parentRule=self) newMedia.mediaText = mediatokens if newMedia.wellformed: new['media'] = newMedia seq.append(newMedia, 'media') else: new['wellformed'] = False self._log.error('CSSImportRule: Invalid MediaList: %s' % self._valuestr(cssText), token=token) if lasttyp == self._prods.STRING: # name return __doname(seq, last) else: return 'EOF' # ';' is token "last" else: new['wellformed'] = False self._log.error('CSSImportRule: Unexpected ident.', token) return expected def _char(expected, seq, token, tokenizer=None): # final ; val = self._tokenvalue(token) if expected.endswith(';') and ';' == val: return 'EOF' else: new['wellformed'] = False self._log.error( 'CSSImportRule: Unexpected char.', token) return expected # import : IMPORT_SYM S* [STRING|URI] # S* [ medium [ ',' S* medium]* ]? ';' S* # STRING? # see http://www.w3.org/TR/css3-cascade/#cascading # ; newseq = self._tempSeq() wellformed, expected = self._parse(expected='href', seq=newseq, tokenizer=tokenizer, productions={'STRING': _string, 'URI': _uri, 'IDENT': _ident, 'CHAR': _char}, new=new) # wellformed set by parse ok = wellformed and new['wellformed'] # post conditions if not new['href']: ok = False self._log.error('CSSImportRule: No href found: %s' % self._valuestr(cssText)) if expected != 'EOF': ok = False self._log.error('CSSImportRule: No ";" found: %s' % self._valuestr(cssText)) # set all if ok: self._setSeq(newseq) self.atkeyword = new['keyword'] self.hreftype = new['hreftype'] self.name = new['name'] if new['media']: self.media = new['media'] else: # must be all for @import self.media = css_parser.stylesheets.MediaList(mediaText='all') # needs new self.media self.href = new['href'] cssText = property(fget=_getCssText, fset=_setCssText, doc="(DOM) The parsable textual representation of this rule.") def _setHref(self, href): # set new href self._href = href # update seq for i, item in enumerate(self.seq): type_ = item.type if 'href' == type_: self._seq[i] = (href, type_, item.line, item.col) break importedSheet = css_parser.css.CSSStyleSheet(media=self.media, ownerRule=self, title=self.name) self.hrefFound = False # set styleSheet if href and self.parentStyleSheet: # loading errors are all catched! # relative href parentHref = self.parentStyleSheet.href if parentHref is None: # use cwd instead parentHref = css_parser.helper.path2url(os.getcwd()) + '/' fullhref = urllib_urljoin(parentHref, self.href) # all possible exceptions are ignored try: usedEncoding, enctype, cssText = \ self.parentStyleSheet._resolveImport(fullhref) if cssText is None: # catched in next except below! raise IOError('Cannot read Stylesheet.') # contentEncoding with parentStyleSheet.overrideEncoding, # HTTP or parent encodingOverride, encoding = None, None if enctype == 0: encodingOverride = usedEncoding elif 0 < enctype < 5: encoding = usedEncoding # inherit fetcher for @imports in styleSheet importedSheet._href = fullhref importedSheet._setFetcher(self.parentStyleSheet._fetcher) importedSheet._setCssTextWithEncodingOverride( cssText, encodingOverride=encodingOverride, encoding=encoding) except (OSError, IOError, ValueError) as e: self._log.warn('CSSImportRule: While processing imported ' 'style sheet href=%s: %r' % (self.href, e), neverraise=True) else: # used by resolveImports if to keep unprocessed href self.hrefFound = True self._styleSheet = importedSheet _href = None # needs to be set href = property(lambda self: self._href, _setHref, doc="Location of the style sheet to be imported.") def _setMedia(self, media): """ :param media: a :class:`~css_parser.stylesheets.MediaList` or string """ self._checkReadonly() # Under Python 2.X this was basestring but given unicode literals ... if isinstance(media, string_type): self._media = css_parser.stylesheets.MediaList(mediaText=media, parentRule=self) else: media._parentRule = self self._media = media # update seq ihref = 0 for i, item in enumerate(self.seq): if item.type == 'href': ihref = i elif item.type == 'media': self.seq[i] = (self._media, 'media', None, None) break else: # if no media until now add after href self.seq.insert(ihref+1, self._media, 'media', None, None) media = property(lambda self: self._media, _setMedia, doc="(DOM) A list of media types for this rule " "of type :class:`~css_parser.stylesheets.MediaList`.") def _setName(self, name=''): """Raises xml.dom.SyntaxErr if name is not a string.""" # Under Python 2.X this was basestring but given unicode literals ... if name is None or isinstance(name, string_type): # "" or '' handled as None if not name: name = None # save name self._name = name # update seq for i, item in enumerate(self.seq): typ = item.type if 'name' == typ: self._seq[i] = (name, typ, item.line, item.col) break # set title of imported sheet if self.styleSheet: self.styleSheet.title = name else: self._log.error('CSSImportRule: Not a valid name: %s' % name) name = property(lambda self: self._name, _setName, doc="An optional name for the imported sheet.") styleSheet = property(lambda self: self._styleSheet, doc="(readonly) The style sheet referred to by this " "rule.") type = property(lambda self: self.IMPORT_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") def _getWellformed(self): "Depending on if media is used at all." if self._usemedia: return bool(self.href and self.media.wellformed) else: return bool(self.href) wellformed = property(_getWellformed) css-parser-1.0.4/src/css_parser/css/cssmediarule.py0000644000175000017500000003115513407703115022633 0ustar kovidkovid00000000000000"""CSSMediaRule implements DOM Level 2 CSS CSSMediaRule.""" from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSMediaRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from . import cssrule import css_parser import xml.dom import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class CSSMediaRule(cssrule.CSSRuleRules): """ Objects implementing the CSSMediaRule interface can be identified by the MEDIA_RULE constant. On these objects the type attribute must return the value of that constant. Format:: : MEDIA_SYM S* medium [ COMMA S* medium ]* STRING? # the name LBRACE S* ruleset* '}' S*; ``cssRules`` All Rules in this media rule, a :class:`~css_parser.css.CSSRuleList`. """ def __init__(self, mediaText='all', name=None, parentRule=None, parentStyleSheet=None, readonly=False): """constructor""" super(CSSMediaRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@media' # 1. media if mediaText: self.media = mediaText else: self.media = css_parser.stylesheets.MediaList() self.name = name self._readonly = readonly def __repr__(self): return "css_parser.css.%s(mediaText=%r)" % ( self.__class__.__name__, self.media.mediaText) def __str__(self): return "" % ( self.__class__.__name__, self.media.mediaText, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSMediaRule(self) def _setCssText(self, cssText): """ :param cssText: a parseable string or a tuple of (cssText, dict-of-namespaces) :Exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if a specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ # media "name"? { cssRules } super(CSSMediaRule, self)._setCssText(cssText) # might be (cssText, namespaces) cssText, namespaces = self._splitNamespacesOff(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if self._type(attoken) != self._prods.MEDIA_SYM: self._log.error('CSSMediaRule: No CSSMediaRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: # save if parse goes wrong oldMedia = self._media oldCssRules = self._cssRules ok = True # media mediatokens, end = self._tokensupto2(tokenizer, mediaqueryendonly=True, separateEnd=True) if '{' == self._tokenvalue(end)\ or self._prods.STRING == self._type(end): self.media = css_parser.stylesheets.MediaList(parentRule=self) # TODO: remove special case self.media.mediaText = mediatokens ok = ok and self.media.wellformed else: ok = False # name (optional) name = None nameseq = self._tempSeq() if self._prods.STRING == self._type(end): name = self._stringtokenvalue(end) # TODO: for now comments are lost after name nametokens, end = self._tokensupto2(tokenizer, blockstartonly=True, separateEnd=True) wellformed, expected = self._parse(None, nameseq, nametokens, {}) if not wellformed: ok = False self._log.error('CSSMediaRule: Syntax Error: %s' % self._valuestr(cssText)) # check for { if '{' != self._tokenvalue(end): self._log.error('CSSMediaRule: No "{" found: %s' % self._valuestr(cssText)) return # cssRules cssrulestokens, braceOrEOF = self._tokensupto2(tokenizer, mediaendonly=True, separateEnd=True) nonetoken = self._nexttoken(tokenizer, None) if 'EOF' == self._type(braceOrEOF): # HACK!!! # TODO: Not complete, add EOF to rule and } to @media cssrulestokens.append(braceOrEOF) braceOrEOF = ('CHAR', '}', 0, 0) self._log.debug('CSSMediaRule: Incomplete, adding "}".', token=braceOrEOF, neverraise=True) if '}' != self._tokenvalue(braceOrEOF): self._log.error('CSSMediaRule: No "}" found.', token=braceOrEOF) elif nonetoken: self._log.error('CSSMediaRule: Trailing content found.', token=nonetoken) else: # for closures: must be a mutable new = {'wellformed': True} def COMMENT(expected, seq, token, tokenizer=None): self.insertRule(css_parser.css.CSSComment( [token], parentRule=self, parentStyleSheet=self.parentStyleSheet)) return expected def ruleset(expected, seq, token, tokenizer): rule = css_parser.css.CSSStyleRule( parentRule=self, parentStyleSheet=self.parentStyleSheet) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) return expected def atrule(expected, seq, token, tokenizer): # TODO: get complete rule! tokens = self._tokensupto2(tokenizer, token) atval = self._tokenvalue(token) factories = { '@page': css_parser.css.CSSPageRule, '@media': CSSMediaRule, } if atval in ('@charset ', '@font-face', '@import', '@namespace', '@variables'): self._log.error('CSSMediaRule: This rule is not ' 'allowed in CSSMediaRule - ignored: ' '%s.' % self._valuestr(tokens), token=token, error=xml.dom.HierarchyRequestErr) elif atval in factories: rule = factories[atval]( parentRule=self, parentStyleSheet=self.parentStyleSheet) rule.cssText = tokens if rule.wellformed: self.insertRule(rule) else: rule = css_parser.css.CSSUnknownRule( tokens, parentRule=self, parentStyleSheet=self.parentStyleSheet) if rule.wellformed: self.insertRule(rule) return expected # save for possible reset oldCssRules = self.cssRules self.cssRules = css_parser.css.CSSRuleList() seq = [] # not used really tokenizer = iter(cssrulestokens) wellformed, expected = self._parse(braceOrEOF, seq, tokenizer, { 'COMMENT': COMMENT, 'CHARSET_SYM': atrule, 'FONT_FACE_SYM': atrule, 'IMPORT_SYM': atrule, 'NAMESPACE_SYM': atrule, 'PAGE_SYM': atrule, 'MEDIA_SYM': atrule, 'ATKEYWORD': atrule }, default=ruleset, new=new) ok = ok and wellformed if ok: self.name = name self._setSeq(nameseq) else: self._media = oldMedia self._cssRules = oldCssRules cssText = property(_getCssText, _setCssText, doc="(DOM) The parsable textual representation of this " "rule.") def _setName(self, name): # Under Python 2.x this was basestring but given unicode literals ... if isinstance(name, string_type) or name is None: # "" or '' if not name: name = None self._name = name else: self._log.error('CSSImportRule: Not a valid name: %s' % name) name = property(lambda self: self._name, _setName, doc="An optional name for this media rule.") def _setMedia(self, media): """ :param media: a :class:`~css_parser.stylesheets.MediaList` or string """ self._checkReadonly() # Under Python 2.x this was basestring but given unicode literals ... if isinstance(media, string_type): self._media = css_parser.stylesheets.MediaList( mediaText=media, parentRule=self) else: media._parentRule = self self._media = media # NOT IN @media seq at all?! # # update seq # for i, item in enumerate(self.seq): # if item.type == 'media': # self._seq[i] = (self._media, 'media', None, None) # break # else: # # insert after @media if not in seq at all # self.seq.insert(0, # self._media, 'media', None, None) media = property(lambda self: self._media, _setMedia, doc="(DOM) A list of media types for this rule " "of type :class:`~css_parser.stylesheets.MediaList`.") def insertRule(self, rule, index=None): """Implements base ``insertRule``.""" rule, index = self._prepareInsertRule(rule, index) if rule is False or rule is True: # done or error return # check hierarchy if isinstance(rule, css_parser.css.CSSCharsetRule) or \ isinstance(rule, css_parser.css.CSSFontFaceRule) or \ isinstance(rule, css_parser.css.CSSImportRule) or \ isinstance(rule, css_parser.css.CSSNamespaceRule) or \ isinstance(rule, css_parser.css.MarginRule): self._log.error('%s: This type of rule is not allowed here: %s' % (self.__class__.__name__, rule.cssText), error=xml.dom.HierarchyRequestErr) return return self._finishInsertRule(rule, index) type = property(lambda self: self.MEDIA_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: self.media.wellformed) css-parser-1.0.4/src/css_parser/css/cssnamespacerule.py0000644000175000017500000002653513407702010023507 0ustar kovidkovid00000000000000"""CSSNamespaceRule currently implements http://dev.w3.org/csswg/css3-namespace/ """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSNamespaceRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from . import cssrule import css_parser import xml.dom class CSSNamespaceRule(cssrule.CSSRule): """ Represents an @namespace rule within a CSS style sheet. The @namespace at-rule declares a namespace prefix and associates it with a given namespace (a string). This namespace prefix can then be used in namespace-qualified names such as those described in the Selectors Module [SELECT] or the Values and Units module [CSS3VAL]. Dealing with these rules directly is not needed anymore, easier is the use of :attr:`css_parser.css.CSSStyleSheet.namespaces`. Format:: namespace : NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S* ; namespace_prefix : IDENT ; """ def __init__(self, namespaceURI=None, prefix=None, cssText=None, parentRule=None, parentStyleSheet=None, readonly=False): """ :Parameters: namespaceURI The namespace URI (a simple string!) which is bound to the given prefix. If no prefix is set (``CSSNamespaceRule.prefix==''``) the namespace defined by namespaceURI is set as the default namespace prefix The prefix used in the stylesheet for the given ``CSSNamespaceRule.uri``. cssText if no namespaceURI is given cssText must be given to set a namespaceURI as this is readonly later on parentStyleSheet sheet where this rule belongs to Do not use as positional but as keyword parameters only! If readonly allows setting of properties in constructor only format namespace:: namespace : NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S* ; namespace_prefix : IDENT ; """ super(CSSNamespaceRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@namespace' self._prefix = '' self._namespaceURI = None if namespaceURI: self.namespaceURI = namespaceURI self.prefix = prefix tempseq = self._tempSeq() tempseq.append(self.prefix, 'prefix') tempseq.append(self.namespaceURI, 'namespaceURI') self._setSeq(tempseq) elif cssText is not None: self.cssText = cssText if parentStyleSheet: self._parentStyleSheet = parentStyleSheet self._readonly = readonly def __repr__(self): return "css_parser.css.%s(namespaceURI=%r, prefix=%r)" % ( self.__class__.__name__, self.namespaceURI, self.prefix) def __str__(self): return "" % ( self.__class__.__name__, self.namespaceURI, self.prefix, id(self)) def _getCssText(self): """Return serialized property cssText""" return css_parser.ser.do_CSSNamespaceRule(self) def _setCssText(self, cssText): """ :param cssText: initial value for this rules cssText which is parsed :exceptions: - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. """ super(CSSNamespaceRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if self._type(attoken) != self._prods.NAMESPACE_SYM: self._log.error('CSSNamespaceRule: No CSSNamespaceRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: # for closures: must be a mutable new = {'keyword': self._tokenvalue(attoken), 'prefix': '', 'uri': None, 'wellformed': True } def _ident(expected, seq, token, tokenizer=None): # the namespace prefix, optional if 'prefix or uri' == expected: new['prefix'] = self._tokenvalue(token) seq.append(new['prefix'], 'prefix') return 'uri' else: new['wellformed'] = False self._log.error( 'CSSNamespaceRule: Unexpected ident.', token) return expected def _string(expected, seq, token, tokenizer=None): # the namespace URI as a STRING if expected.endswith('uri'): new['uri'] = self._stringtokenvalue(token) seq.append(new['uri'], 'namespaceURI') return ';' else: new['wellformed'] = False self._log.error( 'CSSNamespaceRule: Unexpected string.', token) return expected def _uri(expected, seq, token, tokenizer=None): # the namespace URI as URI which is DEPRECATED if expected.endswith('uri'): uri = self._uritokenvalue(token) new['uri'] = uri seq.append(new['uri'], 'namespaceURI') return ';' else: new['wellformed'] = False self._log.error( 'CSSNamespaceRule: Unexpected URI.', token) return expected def _char(expected, seq, token, tokenizer=None): # final ; val = self._tokenvalue(token) if ';' == expected and ';' == val: return 'EOF' else: new['wellformed'] = False self._log.error( 'CSSNamespaceRule: Unexpected char.', token) return expected # "NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S*" newseq = self._tempSeq() wellformed, expected = self._parse(expected='prefix or uri', seq=newseq, tokenizer=tokenizer, productions={'IDENT': _ident, 'STRING': _string, 'URI': _uri, 'CHAR': _char}, new=new) # wellformed set by parse wellformed = wellformed and new['wellformed'] # post conditions if new['uri'] is None: wellformed = False self._log.error('CSSNamespaceRule: No namespace URI found: %s' % self._valuestr(cssText)) if expected != 'EOF': wellformed = False self._log.error('CSSNamespaceRule: No ";" found: %s' % self._valuestr(cssText)) # set all if wellformed: self.atkeyword = new['keyword'] self._prefix = new['prefix'] self.namespaceURI = new['uri'] self._setSeq(newseq) cssText = property(fget=_getCssText, fset=_setCssText, doc="(DOM) The parsable textual representation of this " "rule.") def _setNamespaceURI(self, namespaceURI): """ :param namespaceURI: the initial value for this rules namespaceURI :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: (CSSRule) Raised if this rule is readonly or a namespaceURI is already set in this rule. """ self._checkReadonly() if not self._namespaceURI: # initial setting self._namespaceURI = namespaceURI tempseq = self._tempSeq() tempseq.append(namespaceURI, 'namespaceURI') self._setSeq(tempseq) # makes seq readonly! elif self._namespaceURI != namespaceURI: self._log.error('CSSNamespaceRule: namespaceURI is readonly.', error=xml.dom.NoModificationAllowedErr) namespaceURI = property(lambda self: self._namespaceURI, _setNamespaceURI, doc="URI (handled as simple string) of the defined namespace.") def _replaceNamespaceURI(self, namespaceURI): """Used during parse of new sheet only! :param namespaceURI: the new value for this rules namespaceURI """ self._namespaceURI = namespaceURI for i, x in enumerate(self._seq): if 'namespaceURI' == x.type: self._seq._readonly = False self._seq.replace(i, namespaceURI, 'namespaceURI') self._seq._readonly = True break def _setPrefix(self, prefix=None): """ :param prefix: the new prefix :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() if not prefix: prefix = '' else: tokenizer = self._tokenize2(prefix) prefixtoken = self._nexttoken(tokenizer, None) if not prefixtoken or self._type(prefixtoken) != self._prods.IDENT: self._log.error('CSSNamespaceRule: No valid prefix "%s".' % self._valuestr(prefix), error=xml.dom.SyntaxErr) return else: prefix = self._tokenvalue(prefixtoken) # update seq for i, x in enumerate(self._seq): if x == self._prefix: self._seq[i] = (prefix, 'prefix', None, None) break else: # put prefix at the beginning! self._seq[0] = (prefix, 'prefix', None, None) # set new prefix self._prefix = prefix prefix = property(lambda self: self._prefix, _setPrefix, doc="Prefix used for the defined namespace.") type = property(lambda self: self.NAMESPACE_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: self.namespaceURI is not None) css-parser-1.0.4/src/css_parser/css/csspagerule.py0000644000175000017500000003762113407702010022465 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser from . import cssrule from .marginrule import MarginRule from .cssstyledeclaration import CSSStyleDeclaration from itertools import chain """CSSPageRule implements DOM Level 2 CSS CSSPageRule.""" __all__ = ['CSSPageRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring def as_list(p): if isinstance(p, list): return p return list(p) class CSSPageRule(cssrule.CSSRuleRules): """ The CSSPageRule interface represents a @page rule within a CSS style sheet. The @page rule is used to specify the dimensions, orientation, margins, etc. of a page box for paged media. Format:: page : PAGE_SYM S* IDENT? pseudo_page? S* '{' S* [ declaration | margin ]? [ ';' S* [ declaration | margin ]? ]* '}' S* ; pseudo_page : ':' [ "left" | "right" | "first" ] ; margin : margin_sym S* '{' declaration [ ';' S* declaration? ]* '}' S* ; margin_sym : TOPLEFTCORNER_SYM | TOPLEFT_SYM | TOPCENTER_SYM | TOPRIGHT_SYM | TOPRIGHTCORNER_SYM | BOTTOMLEFTCORNER_SYM | BOTTOMLEFT_SYM | BOTTOMCENTER_SYM | BOTTOMRIGHT_SYM | BOTTOMRIGHTCORNER_SYM | LEFTTOP_SYM | LEFTMIDDLE_SYM | LEFTBOTTOM_SYM | RIGHTTOP_SYM | RIGHTMIDDLE_SYM | RIGHTBOTTOM_SYM ; `cssRules` contains a list of `MarginRule` objects. """ def __init__(self, selectorText=None, style=None, parentRule=None, parentStyleSheet=None, readonly=False): """ If readonly allows setting of properties in constructor only. :param selectorText: type string :param style: CSSStyleDeclaration for this CSSStyleRule """ super(CSSPageRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@page' self._specificity = (0, 0, 0) tempseq = self._tempSeq() if selectorText: self.selectorText = selectorText tempseq.append(self.selectorText, 'selectorText') else: self._selectorText = self._tempSeq() if style: self.style = style else: self.style = CSSStyleDeclaration() tempseq.append(self.style, 'style') self._setSeq(tempseq) self._readonly = readonly def __repr__(self): return "css_parser.css.%s(selectorText=%r, style=%r)" % ( self.__class__.__name__, self.selectorText, self.style.cssText) def __str__(self): return ("") % ( self.__class__.__name__, self.selectorText, self.specificity, self.style.cssText, len(self.cssRules), id(self)) def __contains__(self, margin): """Check if margin is set in the rule.""" return margin in as_list(self.keys()) def keys(self): "Return list of all set margins (MarginRule)." return as_list(r.margin for r in self.cssRules) def __getitem__(self, margin): """Retrieve the style (of MarginRule) for `margin` (which must be normalized). """ for r in self.cssRules: if r.margin == margin: return r.style def __setitem__(self, margin, style): """Set the style (of MarginRule) for `margin` (which must be normalized). """ for i, r in enumerate(self.cssRules): if r.margin == margin: r.style = style return i else: return self.add(MarginRule(margin, style)) def __delitem__(self, margin): """Delete the style (the MarginRule) for `margin` (which must be normalized). """ for r in self.cssRules: if r.margin == margin: self.deleteRule(r) def __parseSelectorText(self, selectorText): """ Parse `selectorText` which may also be a list of tokens and returns (selectorText, seq). see _setSelectorText for details """ # for closures: must be a mutable new = {'wellformed': True, 'last-S': False, 'name': 0, 'first': 0, 'lr': 0} def _char(expected, seq, token, tokenizer=None): # pseudo_page, :left, :right or :first val = self._tokenvalue(token) if not new['last-S'] and expected in ['page', ': or EOF']\ and ':' == val: try: identtoken = next(tokenizer) except StopIteration: self._log.error( 'CSSPageRule selectorText: No IDENT found.', token) else: ival, ityp = self._tokenvalue(identtoken),\ self._type(identtoken) if self._prods.IDENT != ityp: self._log.error('CSSPageRule selectorText: Expected ' 'IDENT but found: %r' % ival, token) else: if ival not in ('first', 'left', 'right'): self._log.warn('CSSPageRule: Unknown @page ' 'selector: %r' % (':'+ival,), neverraise=True) if ival == 'first': new['first'] = 1 else: new['lr'] = 1 seq.append(val + ival, 'pseudo') return 'EOF' return expected else: new['wellformed'] = False self._log.error('CSSPageRule selectorText: Unexpected CHAR: %r' % val, token) return expected def S(expected, seq, token, tokenizer=None): "Does not raise if EOF is found." if expected == ': or EOF': # pseudo must directly follow IDENT if given new['last-S'] = True return expected def IDENT(expected, seq, token, tokenizer=None): "" val = self._tokenvalue(token) if 'page' == expected: if self._normalize(val) == 'auto': self._log.error('CSSPageRule selectorText: Invalid pagename.', token) else: new['name'] = 1 seq.append(val, 'IDENT') return ': or EOF' else: new['wellformed'] = False self._log.error('CSSPageRule selectorText: Unexpected IDENT: ' '%r' % val, token) return expected def COMMENT(expected, seq, token, tokenizer=None): "Does not raise if EOF is found." seq.append(css_parser.css.CSSComment([token]), 'COMMENT') return expected newseq = self._tempSeq() wellformed, expected = self._parse(expected='page', seq=newseq, tokenizer=self._tokenize2(selectorText), productions={'CHAR': _char, 'IDENT': IDENT, 'COMMENT': COMMENT, 'S': S}, new=new) wellformed = wellformed and new['wellformed'] # post conditions if expected == 'ident': self._log.error( 'CSSPageRule selectorText: No valid selector: %r' % self._valuestr(selectorText)) return wellformed, newseq, (new['name'], new['first'], new['lr']) def __parseMarginAndStyle(self, tokens): "tokens is a list, no generator (yet)" g = iter(tokens) styletokens = [] # new rules until parse done cssRules = [] for token in g: if token[0] == 'ATKEYWORD' and \ self._normalize(token[1]) in MarginRule.margins: # MarginRule m = MarginRule(parentRule=self, parentStyleSheet=self.parentStyleSheet) m.cssText = chain([token], g) # merge if margin set more than once for r in cssRules: if r.margin == m.margin: for p in m.style: r.style.setProperty(p, replace=False) break else: cssRules.append(m) continue # TODO: Properties? styletokens.append(token) return cssRules, styletokens def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSPageRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSPageRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) if self._type(self._nexttoken(tokenizer)) != self._prods.PAGE_SYM: self._log.error('CSSPageRule: No CSSPageRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: newStyle = CSSStyleDeclaration(parentRule=self) ok = True selectortokens, startbrace = self._tokensupto2(tokenizer, blockstartonly=True, separateEnd=True) styletokens, braceorEOFtoken = self._tokensupto2(tokenizer, blockendonly=True, separateEnd=True) nonetoken = self._nexttoken(tokenizer) if self._tokenvalue(startbrace) != '{': ok = False self._log.error('CSSPageRule: No start { of style declaration ' 'found: %r' % self._valuestr(cssText), startbrace) elif nonetoken: ok = False self._log.error('CSSPageRule: Trailing content found.', token=nonetoken) selok, newselseq, specificity = self.__parseSelectorText(selectortokens) ok = ok and selok val, type_ = self._tokenvalue(braceorEOFtoken),\ self._type(braceorEOFtoken) if val != '}' and type_ != 'EOF': ok = False self._log.error( 'CSSPageRule: No "}" after style declaration found: %r' % self._valuestr(cssText)) else: if 'EOF' == type_: # add again as style needs it styletokens.append(braceorEOFtoken) # filter pagemargin rules out first cssRules, styletokens = self.__parseMarginAndStyle(styletokens) # SET, may raise: newStyle.cssText = styletokens if ok: self._selectorText = newselseq self._specificity = specificity self.style = newStyle self.cssRules = css_parser.css.CSSRuleList() for r in cssRules: self.cssRules.append(r) cssText = property(_getCssText, _setCssText, doc="(DOM) The parsable textual representation of this rule.") def _getSelectorText(self): """Wrapper for css_parser Selector object.""" return css_parser.ser.do_CSSPageRuleSelector(self._selectorText) def _setSelectorText(self, selectorText): """Wrapper for css_parser Selector object. :param selectorText: DOM String, in CSS 2.1 one of - :first - :left - :right - empty :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() # may raise SYNTAX_ERR wellformed, newseq, specificity = self.__parseSelectorText(selectorText) if wellformed: self._selectorText = newseq self._specificity = specificity selectorText = property(_getSelectorText, _setSelectorText, doc="(DOM) The parsable textual representation of " "the page selector for the rule.") def _setStyle(self, style): """ :param style: a CSSStyleDeclaration or string """ self._checkReadonly() # Under Python2.x this was basestring but given unicode literals ... if isinstance(style, string_type): self._style = CSSStyleDeclaration(cssText=style, parentRule=self) else: style._parentRule = self self._style = style style = property(lambda self: self._style, _setStyle, doc="(DOM) The declaration-block of this rule set, " "a :class:`~css_parser.css.CSSStyleDeclaration`.") def insertRule(self, rule, index=None): """Implements base ``insertRule``.""" rule, index = self._prepareInsertRule(rule, index) if rule is False or rule is True: # done or error return # check hierarchy if isinstance(rule, css_parser.css.CSSCharsetRule) or \ isinstance(rule, css_parser.css.CSSFontFaceRule) or \ isinstance(rule, css_parser.css.CSSImportRule) or \ isinstance(rule, css_parser.css.CSSNamespaceRule) or \ isinstance(rule, CSSPageRule) or \ isinstance(rule, css_parser.css.CSSMediaRule): self._log.error('%s: This type of rule is not allowed here: %s' % (self.__class__.__name__, rule.cssText), error=xml.dom.HierarchyRequestErr) return return self._finishInsertRule(rule, index) specificity = property(lambda self: self._specificity, doc="""Specificity of this page rule (READONLY). Tuple of (f, g, h) where: - if the page selector has a named page, f=1; else f=0 - if the page selector has a ':first' pseudo-class, g=1; else g=0 - if the page selector has a ':left' or ':right' pseudo-class, h=1; else h=0 """) type = property(lambda self: self.PAGE_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") # constant but needed: wellformed = property(lambda self: True) css-parser-1.0.4/src/css_parser/css/cssproperties.py0000644000175000017500000001212513407702010023045 0ustar kovidkovid00000000000000"""CSS2Properties (partly!) implements DOM Level 2 CSS CSS2Properties used by CSSStyleDeclaration TODO: CSS2Properties If an implementation does implement this interface, it is expected to understand the specific syntax of the shorthand properties, and apply their semantics; when the margin property is set, for example, the marginTop, marginRight, marginBottom and marginLeft properties are actually being set by the underlying implementation. When dealing with CSS "shorthand" properties, the shorthand properties should be decomposed into their component longhand properties as appropriate, and when querying for their value, the form returned should be the shortest form exactly equivalent to the declarations made in the ruleset. However, if there is no shorthand declaration that could be added to the ruleset without changing in any way the rules already declared in the ruleset (i.e., by adding longhand rules that were previously not declared in the ruleset), then the empty string should be returned for the shorthand property. For example, querying for the font property should not return "normal normal normal 14pt/normal Arial, sans-serif", when "14pt Arial, sans-serif" suffices. (The normals are initial values, and are implied by use of the longhand property.) If the values for all the longhand properties that compose a particular string are the initial values, then a string consisting of all the initial values should be returned (e.g. a border-width value of "medium" should be returned as such, not as ""). For some shorthand properties that take missing values from other sides, such as the margin, padding, and border-[width|style|color] properties, the minimum number of sides possible should be used; i.e., "0px 10px" will be returned instead of "0px 10px 0px 10px". If the value of a shorthand property can not be decomposed into its component longhand properties, as is the case for the font property with a value of "menu", querying for the values of the component longhand properties should return the empty string. TODO: CSS2Properties DOMImplementation The interface found within this section are not mandatory. A DOM application can use the hasFeature method of the DOMImplementation interface to determine whether it is supported or not. The feature string for this extended interface listed in this section is "CSS2" and the version is "2.0". """ __all__ = ['CSS2Properties'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import css_parser.profiles import re class CSS2Properties(object): """The CSS2Properties interface represents a convenience mechanism for retrieving and setting properties within a CSSStyleDeclaration. The attributes of this interface correspond to all the properties specified in CSS2. Getting an attribute of this interface is equivalent to calling the getPropertyValue method of the CSSStyleDeclaration interface. Setting an attribute of this interface is equivalent to calling the setProperty method of the CSSStyleDeclaration interface. css_parser actually also allows usage of ``del`` to remove a CSS property from a CSSStyleDeclaration. This is an abstract class, the following functions need to be present in inheriting class: - ``_getP`` - ``_setP`` - ``_delP`` """ # actual properties are set after the class definition! def _getP(self, CSSname): pass def _setP(self, CSSname, value): pass def _delP(self, CSSname): pass _reCSStoDOMname = re.compile('-[a-z]', re.I) def _toDOMname(CSSname): """Returns DOMname for given CSSname e.g. for CSSname 'font-style' returns 'fontStyle'. """ def _doCSStoDOMname2(m): return m.group(0)[1].capitalize() return _reCSStoDOMname.sub(_doCSStoDOMname2, CSSname) _reDOMtoCSSname = re.compile('([A-Z])[a-z]+') def _toCSSname(DOMname): """Return CSSname for given DOMname e.g. for DOMname 'fontStyle' returns 'font-style'. """ def _doDOMtoCSSname2(m): return '-' + m.group(0).lower() return _reDOMtoCSSname.sub(_doDOMtoCSSname2, DOMname) # add list of DOMname properties to CSS2Properties # used for CSSStyleDeclaration to check if allowed properties # but somehow doubled, any better way? CSS2Properties._properties = [] for group in css_parser.profiles.properties: for name in css_parser.profiles.properties[group]: CSS2Properties._properties.append(_toDOMname(name)) # add CSS2Properties to CSSStyleDeclaration: def __named_property_def(DOMname): """ Closure to keep name known in each properties accessor function DOMname is converted to CSSname here, so actual calls use CSSname. """ CSSname = _toCSSname(DOMname) def _get(self): return self._getP(CSSname) def _set(self, value): self._setP(CSSname, value) def _del(self): self._delP(CSSname) return _get, _set, _del # add all CSS2Properties to CSSStyleDeclaration for DOMname in CSS2Properties._properties: setattr(CSS2Properties, DOMname, property(*__named_property_def(DOMname))) css-parser-1.0.4/src/css_parser/css/cssrule.py0000644000175000017500000003011113407702010021613 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser """CSSRule implements DOM Level 2 CSS CSSRule.""" __all__ = ['CSSRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class CSSRule(css_parser.util.Base2): """Abstract base interface for any type of CSS statement. This includes both rule sets and at-rules. An implementation is expected to preserve all rules specified in a CSS style sheet, even if the rule is not recognized by the parser. Unrecognized rules are represented using the :class:`CSSUnknownRule` interface. """ """ CSSRule type constants. An integer indicating which type of rule this is. """ UNKNOWN_RULE = 0 ":class:`css_parser.css.CSSUnknownRule` (not used in CSSOM anymore)" STYLE_RULE = 1 ":class:`css_parser.css.CSSStyleRule`" CHARSET_RULE = 2 ":class:`css_parser.css.CSSCharsetRule` (not used in CSSOM anymore)" IMPORT_RULE = 3 ":class:`css_parser.css.CSSImportRule`" MEDIA_RULE = 4 ":class:`css_parser.css.CSSMediaRule`" FONT_FACE_RULE = 5 ":class:`css_parser.css.CSSFontFaceRule`" PAGE_RULE = 6 ":class:`css_parser.css.CSSPageRule`" NAMESPACE_RULE = 10 """:class:`css_parser.css.CSSNamespaceRule`, Value has changed in 0.9.7a3 due to a change in the CSSOM spec.""" COMMENT = 1001 # was -1, css_parser only """:class:`css_parser.css.CSSComment` - not in the offical spec, Value has changed in 0.9.7a3""" VARIABLES_RULE = 1008 """:class:`css_parser.css.CSSVariablesRule` - experimental rule not in the offical spec""" MARGIN_RULE = 1006 """:class:`css_parser.css.MarginRule` - experimental rule not in the offical spec""" _typestrings = {UNKNOWN_RULE: 'UNKNOWN_RULE', STYLE_RULE: 'STYLE_RULE', CHARSET_RULE: 'CHARSET_RULE', IMPORT_RULE: 'IMPORT_RULE', MEDIA_RULE: 'MEDIA_RULE', FONT_FACE_RULE: 'FONT_FACE_RULE', PAGE_RULE: 'PAGE_RULE', NAMESPACE_RULE: 'NAMESPACE_RULE', COMMENT: 'COMMENT', VARIABLES_RULE: 'VARIABLES_RULE', MARGIN_RULE: 'MARGIN_RULE' } def __init__(self, parentRule=None, parentStyleSheet=None, readonly=False): """Set common attributes for all rules.""" super(CSSRule, self).__init__() self._parent = parentRule self._parentRule = parentRule self._parentStyleSheet = parentStyleSheet self._setSeq(self._tempSeq()) # self._atkeyword = None # must be set after initialization of #inheriting rule is done self._readonly = False def _setAtkeyword(self, keyword): """Check if new keyword fits the rule it is used for.""" atkeyword = self._normalize(keyword) if not self.atkeyword or (self.atkeyword == atkeyword): self._atkeyword = atkeyword self._keyword = keyword else: self._log.error('%s: Invalid atkeyword for this rule: %r' % (self.atkeyword, keyword), error=xml.dom.InvalidModificationErr) atkeyword = property(lambda self: self._atkeyword, _setAtkeyword, doc="Normalized keyword of an @rule (e.g. ``@import``).") def _setCssText(self, cssText): """ :param cssText: A parsable DOMString. :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ self._checkReadonly() cssText = property(lambda self: '', _setCssText, doc="(DOM) The parsable textual representation of the " "rule. This reflects the current state of the rule " "and not its initial value.") parent = property(lambda self: self._parent, doc="The Parent Node of this CSSRule or None.") parentRule = property(lambda self: self._parentRule, doc="If this rule is contained inside another rule " "(e.g. a style rule inside an @media block), this " "is the containing rule. If this rule is not nested " "inside any other rules, this returns None.") def _getParentStyleSheet(self): # rules contained in other rules (@media) use that rules parent if (self.parentRule): return self.parentRule._parentStyleSheet else: return self._parentStyleSheet parentStyleSheet = property(_getParentStyleSheet, doc="The style sheet that contains this rule.") type = property(lambda self: self.UNKNOWN_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") typeString = property(lambda self: CSSRule._typestrings[self.type], doc="Descriptive name of this rule's type.") wellformed = property(lambda self: False, doc="If the rule is wellformed.") class CSSRuleRules(CSSRule): """Abstract base interface for rules that contain other rules like @media or @page. Methods may be overwritten if a rule has specific stuff to do like checking the order of insertion like @media does. """ def __init__(self, parentRule=None, parentStyleSheet=None): super(CSSRuleRules, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self.cssRules = css_parser.css.CSSRuleList() def __iter__(self): """Generator iterating over these rule's cssRules.""" for rule in self._cssRules: yield rule def _setCssRules(self, cssRules): "Set new cssRules and update contained rules refs." cssRules.append = self.insertRule cssRules.extend = self.insertRule cssRules.__delitem__ == self.deleteRule for rule in cssRules: rule._parentRule = self rule._parentStyleSheet = None self._cssRules = cssRules cssRules = property(lambda self: self._cssRules, _setCssRules, "All Rules in this style sheet, a " ":class:`~css_parser.css.CSSRuleList`.") def deleteRule(self, index): """ Delete the rule at `index` from rules ``cssRules``. :param index: The `index` of the rule to be removed from the rules cssRules list. For an `index` < 0 **no** :exc:`~xml.dom.IndexSizeErr` is raised but rules for normal Python lists are used. E.g. ``deleteRule(-1)`` removes the last rule in cssRules. `index` may also be a CSSRule object which will then be removed. :Exceptions: - :exc:`~xml.dom.IndexSizeErr`: Raised if the specified index does not correspond to a rule in the media rule list. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this media rule is readonly. """ self._checkReadonly() if isinstance(index, CSSRule): for i, r in enumerate(self.cssRules): if index == r: index = i break else: raise xml.dom.IndexSizeErr("%s: Not a rule in " "this rule'a cssRules list: %s" % (self.__class__.__name__, index)) try: # detach self._cssRules[index]._parentRule = None del self._cssRules[index] except IndexError: raise xml.dom.IndexSizeErr('%s: %s is not a valid index ' 'in the rulelist of length %i' % (self.__class__.__name__, index, self._cssRules.length)) def _prepareInsertRule(self, rule, index=None): "return checked `index` and optional parsed `rule`" self._checkReadonly() # check index if index is None: index = len(self._cssRules) elif index < 0 or index > self._cssRules.length: raise xml.dom.IndexSizeErr('%s: Invalid index %s for ' 'CSSRuleList with a length of %s.' % (self.__class__.__name__, index, self._cssRules.length)) # check and optionally parse rule # Under Python 2.x this was basestring but ... if isinstance(rule, string_type): tempsheet = css_parser.css.CSSStyleSheet() tempsheet.cssText = rule if len(tempsheet.cssRules) != 1 or (tempsheet.cssRules and not isinstance(tempsheet.cssRules[0], css_parser.css.CSSRule)): self._log.error('%s: Invalid Rule: %s' % (self.__class__.__name__, rule)) return False, False rule = tempsheet.cssRules[0] elif isinstance(rule, css_parser.css.CSSRuleList): # insert all rules for i, r in enumerate(rule): self.insertRule(r, index + i) return True, True elif not isinstance(rule, css_parser.css.CSSRule): self._log.error('%s: Not a CSSRule: %s' % (rule, self.__class__.__name__)) return False, False return rule, index def _finishInsertRule(self, rule, index): "add `rule` at `index`" rule._parentRule = self rule._parentStyleSheet = None self._cssRules.insert(index, rule) return index def add(self, rule): """Add `rule` to page rule. Same as ``insertRule(rule)``.""" return self.insertRule(rule) def insertRule(self, rule, index=None): """ Insert `rule` into the rules ``cssRules``. :param rule: the parsable text representing the `rule` to be inserted. For rule sets this contains both the selector and the style declaration. For at-rules, this specifies both the at-identifier and the rule content. css_parser also allows rule to be a valid :class:`~css_parser.css.CSSRule` object. :param index: before the `index` the specified `rule` will be inserted. If the specified `index` is equal to the length of the rules rule collection, the rule will be added to the end of the rule. If index is not given or None rule will be appended to rule list. :returns: the index of the newly inserted rule. :exceptions: - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the `rule` cannot be inserted at the specified `index`, e.g., if an @import rule is inserted after a standard rule set or other at-rule. - :exc:`~xml.dom.IndexSizeErr`: Raised if the specified `index` is not a valid insertion point. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified `rule` has a syntax error and is unparsable. """ return self._prepareInsertRule(rule, index) css-parser-1.0.4/src/css_parser/css/cssrulelist.py0000644000175000017500000000407713407702010022523 0ustar kovidkovid00000000000000"""CSSRuleList implements DOM Level 2 CSS CSSRuleList. Partly also http://dev.w3.org/csswg/cssom/#the-cssrulelist.""" from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSRuleList'] __docformat__ = 'restructuredtext' __version__ = '$Id$' class CSSRuleList(list): """The CSSRuleList object represents an (ordered) list of statements. The items in the CSSRuleList are accessible via an integral index, starting from 0. Subclasses a standard Python list so theoretically all standard list methods are available. Setting methods like ``__init__``, ``append``, ``extend`` or ``__setslice__`` are added later on instances of this class if so desired. E.g. CSSStyleSheet adds ``append`` which is not available in a simple instance of this class! """ def __init__(self, *ignored): "Nothing is set as this must also be defined later." pass def __notimplemented(self, *ignored): "Implemented in class using a CSSRuleList only." raise NotImplementedError( 'Must be implemented by class using an instance of this class.') append = extend = __setitem__ = __setslice__ = __notimplemented def item(self, index): """(DOM) Retrieve a CSS rule by ordinal `index`. The order in this collection represents the order of the rules in the CSS style sheet. If index is greater than or equal to the number of rules in the list, this returns None. Returns CSSRule, the style rule at the index position in the CSSRuleList, or None if that is not a valid index. """ try: return self[index] except IndexError: return None length = property(lambda self: len(self), doc="(DOM) The number of CSSRules in the list.") def rulesOfType(self, type): """Yield the rules which have the given `type` only, one of the constants defined in :class:`css_parser.css.CSSRule`.""" for r in self: if r.type == type: yield r css-parser-1.0.4/src/css_parser/css/cssstyledeclaration.py0000644000175000017500000006556413407702010024236 0ustar kovidkovid00000000000000"""CSSStyleDeclaration implements DOM Level 2 CSS CSSStyleDeclaration and extends CSS2Properties see http://www.w3.org/TR/1998/REC-CSS2-19980512/syndata.html#parsing-errors Unknown properties ------------------ User agents must ignore a declaration with an unknown property. For example, if the style sheet is:: H1 { color: red; rotation: 70minutes } the user agent will treat this as if the style sheet had been:: H1 { color: red } css_parser gives a message about any unknown properties but keeps any property (if syntactically correct). Illegal values -------------- User agents must ignore a declaration with an illegal value. For example:: IMG { float: left } /* correct CSS2 */ IMG { float: left here } /* "here" is not a value of 'float' */ IMG { background: "red" } /* keywords cannot be quoted in CSS2 */ IMG { border-width: 3 } /* a unit must be specified for length values */ A CSS2 parser would honor the first rule and ignore the rest, as if the style sheet had been:: IMG { float: left } IMG { } IMG { } IMG { } css_parser again will issue a message (WARNING in this case) about invalid CSS2 property values. TODO: This interface is also used to provide a read-only access to the computed values of an element. See also the ViewCSS interface. - return computed values and not literal values - simplify unit pairs/triples/quadruples 2px 2px 2px 2px -> 2px for border/padding... - normalize compound properties like: background: no-repeat left url() #fff -> background: #fff url() no-repeat left """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSStyleDeclaration', 'Property'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from .cssproperties import CSS2Properties from .property import Property import css_parser class CSSStyleDeclaration(CSS2Properties, css_parser.util.Base2): """The CSSStyleDeclaration class represents a single CSS declaration block. This class may be used to determine the style properties currently set in a block or to set style properties explicitly within the block. While an implementation may not recognize all CSS properties within a CSS declaration block, it is expected to provide access to all specified properties in the style sheet through the CSSStyleDeclaration interface. Furthermore, implementations that support a specific level of CSS should correctly handle CSS shorthand properties for that level. For a further discussion of shorthand properties, see the CSS2Properties interface. Additionally the CSS2Properties interface is implemented. $css2propertyname All properties defined in the CSS2Properties class are available as direct properties of CSSStyleDeclaration with their respective DOM name, so e.g. ``fontStyle`` for property 'font-style'. These may be used as:: >>> style = CSSStyleDeclaration(cssText='color: red') >>> style.color = 'green' >>> print style.color green >>> del style.color >>> print style.color Format:: [Property: Value Priority?;]* [Property: Value Priority?]? """ def __init__(self, cssText='', parentRule=None, readonly=False, validating=None): """ :param cssText: Shortcut, sets CSSStyleDeclaration.cssText :param parentRule: The CSS rule that contains this declaration block or None if this CSSStyleDeclaration is not attached to a CSSRule. :param readonly: defaults to False :param validating: a flag defining if this sheet should be validated on change. Defaults to None, which means defer to the parent stylesheet. """ super(CSSStyleDeclaration, self).__init__() self._parentRule = parentRule self.validating = validating self.cssText = cssText self._readonly = readonly def __contains__(self, nameOrProperty): """Check if a property (or a property with given name) is in style. :param name: a string or Property, uses normalized name and not literalname """ if isinstance(nameOrProperty, Property): name = nameOrProperty.name else: name = self._normalize(nameOrProperty) return name in self.__nnames() def __iter__(self): """Iterator of set Property objects with different normalized names.""" def properties(): for name in self.__nnames(): yield self.getProperty(name) return properties() def keys(self): """Analoguous to standard dict returns property names which are set in this declaration.""" return list(self.__nnames()) def __getitem__(self, CSSName): """Retrieve the value of property ``CSSName`` from this declaration. ``CSSName`` will be always normalized. """ return self.getPropertyValue(CSSName) def __setitem__(self, CSSName, value): """Set value of property ``CSSName``. ``value`` may also be a tuple of (value, priority), e.g. style['color'] = ('red', 'important') ``CSSName`` will be always normalized. """ priority = None if isinstance(value, tuple): value, priority = value return self.setProperty(CSSName, value, priority) def __delitem__(self, CSSName): """Delete property ``CSSName`` from this declaration. If property is not in this declaration return u'' just like removeProperty. ``CSSName`` will be always normalized. """ return self.removeProperty(CSSName) def __setattr__(self, n, v): """Prevent setting of unknown properties on CSSStyleDeclaration which would not work anyway. For these ``CSSStyleDeclaration.setProperty`` MUST be called explicitly! TODO: implementation of known is not really nice, any alternative? """ known = ['_tokenizer', '_log', '_ttypes', '_seq', 'seq', 'parentRule', '_parentRule', 'cssText', 'valid', 'wellformed', 'validating', '_readonly', '_profiles', '_validating'] known.extend(CSS2Properties._properties) if n in known: super(CSSStyleDeclaration, self).__setattr__(n, v) else: raise AttributeError('Unknown CSS Property, ' '``CSSStyleDeclaration.setProperty("%s", ' '...)`` MUST be used.' % n) def __repr__(self): return "css_parser.css.%s(cssText=%r)" % ( self.__class__.__name__, self.getCssText(separator=' ')) def __str__(self): return "" % ( self.__class__.__name__, self.length, len(self.getProperties(all=True)), id(self)) def __nnames(self): """Return iterator for all different names in order as set if names are set twice the last one is used (double reverse!) """ names = [] for item in reversed(self.seq): val = item.value if isinstance(val, Property) and val.name not in names: names.append(val.name) return reversed(names) # overwritten accessor functions for CSS2Properties' properties def _getP(self, CSSName): """(DOM CSS2Properties) Overwritten here and effectively the same as ``self.getPropertyValue(CSSname)``. Parameter is in CSSname format ('font-style'), see CSS2Properties. Example:: >>> style = CSSStyleDeclaration(cssText='font-style:italic;') >>> print style.fontStyle italic """ return self.getPropertyValue(CSSName) def _setP(self, CSSName, value): """(DOM CSS2Properties) Overwritten here and effectively the same as ``self.setProperty(CSSname, value)``. Only known CSS2Properties may be set this way, otherwise an AttributeError is raised. For these unknown properties ``setPropertyValue(CSSname, value)`` has to be called explicitly. Also setting the priority of properties needs to be done with a call like ``setPropertyValue(CSSname, value, priority)``. Example:: >>> style = CSSStyleDeclaration() >>> style.fontStyle = 'italic' >>> # or >>> style.setProperty('font-style', 'italic', '!important') """ self.setProperty(CSSName, value) # TODO: Shorthand ones def _delP(self, CSSName): """(css_parser only) Overwritten here and effectively the same as ``self.removeProperty(CSSname)``. Example:: >>> style = CSSStyleDeclaration(cssText='font-style:italic;') >>> del style.fontStyle >>> print style.fontStyle """ self.removeProperty(CSSName) def children(self): """Generator yielding any known child in this declaration including *all* properties, comments or CSSUnknownrules. """ for item in self._seq: yield item.value def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_css_CSSStyleDeclaration(self) def _setCssText(self, cssText): """Setting this attribute will result in the parsing of the new value and resetting of all the properties in the declaration block including the removal or addition of properties. :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly or a property is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. """ self._checkReadonly() tokenizer = self._tokenize2(cssText) def ident(expected, seq, token, tokenizer=None): # a property tokens = self._tokensupto2(tokenizer, starttoken=token, semicolon=True) if self._tokenvalue(tokens[-1]) == ';': tokens.pop() property = Property(parent=self) property.cssText = tokens if property.wellformed: seq.append(property, 'Property') else: self._log.error('CSSStyleDeclaration: Syntax Error in ' 'Property: %s' % self._valuestr(tokens)) # does not matter in this case return expected def unexpected(expected, seq, token, tokenizer=None): # error, find next ; or } to omit upto next property ignored = self._tokenvalue(token) + self._valuestr( self._tokensupto2(tokenizer, propertyvalueendonly=True)) self._log.error('CSSStyleDeclaration: Unexpected token, ignoring ' 'upto %r.' % ignored, token) # does not matter in this case return expected def char(expected, seq, token, tokenizer=None): # a standalone ; or error... if self._tokenvalue(token) == ';': self._log.info('CSSStyleDeclaration: Stripped standalone semicolon' ': %s' % self._valuestr([token]), neverraise=True) return expected else: return unexpected(expected, seq, token, tokenizer) # [Property: Value;]* Property: Value? newseq = self._tempSeq() wellformed, expected = self._parse(expected=None, seq=newseq, tokenizer=tokenizer, productions={'IDENT': ident, 'CHAR': char}, default=unexpected) # wellformed set by parse for item in newseq: item.value._parent = self # do not check wellformed as invalid things are removed anyway self._setSeq(newseq) cssText = property(_getCssText, _setCssText, doc="(DOM) A parsable textual representation of the " "declaration block excluding the surrounding curly " "braces.") def getCssText(self, separator=None): """ :returns: serialized property cssText, each property separated by given `separator` which may e.g. be ``u''`` to be able to use cssText directly in an HTML style attribute. ``;`` is part of each property (except the last one) and **cannot** be set with separator! """ return css_parser.ser.do_css_CSSStyleDeclaration(self, separator) def _setParentRule(self, parentRule): self._parentRule = parentRule # for x in self.children(): # x.parent = self parentRule = property(lambda self: self._parentRule, _setParentRule, doc="(DOM) The CSS rule that contains this declaration block or " "None if this CSSStyleDeclaration is not attached to a CSSRule.") def getProperties(self, name=None, all=False): """ :param name: optional `name` of properties which are requested. Only properties with this **always normalized** `name` are returned. If `name` is ``None`` all properties are returned (at least one for each set name depending on parameter `all`). :param all: if ``False`` (DEFAULT) only the effective properties are returned. If name is given a list with only one property is returned. if ``True`` all properties including properties set multiple times with different values or priorities for different UAs are returned. The order of the properties is fully kept as in the original stylesheet. :returns: a list of :class:`~css_parser.css.Property` objects set in this declaration. """ if name and not all: # single prop but list p = self.getProperty(name) if p: return [p] else: return [] elif not all: # effective Properties in name order return [self.getProperty(name_) for name_ in self.__nnames()] else: # all properties or all with this name nname = self._normalize(name) properties = [] for item in self.seq: val = item.value if isinstance(val, Property) and ( (bool(nname) is False) or (val.name == nname)): properties.append(val) return properties def getProperty(self, name, normalize=True): r""" :param name: of the CSS property, always lowercase (even if not normalized) :param normalize: if ``True`` (DEFAULT) name will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent If ``False`` may return **NOT** the effective value but the effective for the unnormalized name. :returns: the effective :class:`~css_parser.css.Property` object. """ nname = self._normalize(name) found = None for item in reversed(self.seq): val = item.value if isinstance(val, Property): if (normalize and nname == val.name) or name == val.literalname: if val.priority: return val elif not found: found = val return found def getPropertyCSSValue(self, name, normalize=True): r""" :param name: of the CSS property, always lowercase (even if not normalized) :param normalize: if ``True`` (DEFAULT) name will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent If ``False`` may return **NOT** the effective value but the effective for the unnormalized name. :returns: :class:`~css_parser.css.CSSValue`, the value of the effective property if it has been explicitly set for this declaration block. (DOM) Used to retrieve the object representation of the value of a CSS property if it has been explicitly set within this declaration block. Returns None if the property has not been set. (This method returns None if the property is a shorthand property. Shorthand property values can only be accessed and modified as strings, using the getPropertyValue and setProperty methods.) **css_parser currently always returns a CSSValue if the property is set.** for more on shorthand properties see http://www.dustindiaz.com/css-shorthand/ """ nname = self._normalize(name) if nname in self._SHORTHANDPROPERTIES: self._log.info('CSSValue for shorthand property "%s" should be ' 'None, this may be implemented later.' % nname, neverraise=True) p = self.getProperty(name, normalize) if p: return p.propertyValue else: return None def getPropertyValue(self, name, normalize=True): r""" :param name: of the CSS property, always lowercase (even if not normalized) :param normalize: if ``True`` (DEFAULT) name will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent If ``False`` may return **NOT** the effective value but the effective for the unnormalized name. :returns: the value of the effective property if it has been explicitly set for this declaration block. Returns the empty string if the property has not been set. """ p = self.getProperty(name, normalize) if p: return p.value else: return '' def getPropertyPriority(self, name, normalize=True): r""" :param name: of the CSS property, always lowercase (even if not normalized) :param normalize: if ``True`` (DEFAULT) name will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent If ``False`` may return **NOT** the effective value but the effective for the unnormalized name. :returns: the priority of the effective CSS property (e.g. the "important" qualifier) if the property has been explicitly set in this declaration block. The empty string if none exists. """ p = self.getProperty(name, normalize) if p: return p.priority else: return '' def removeProperty(self, name, normalize=True): r""" (DOM) Used to remove a CSS property if it has been explicitly set within this declaration block. :param name: of the CSS property :param normalize: if ``True`` (DEFAULT) name will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent. The effective Property value is returned and *all* Properties with ``Property.name == name`` are removed. If ``False`` may return **NOT** the effective value but the effective for the unnormalized `name` only. Also only the Properties with the literal name `name` are removed. :returns: the value of the property if it has been explicitly set for this declaration block. Returns the empty string if the property has not been set or the property name does not correspond to a known CSS property :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly or the property is readonly. """ self._checkReadonly() r = self.getPropertyValue(name, normalize=normalize) newseq = self._tempSeq() if normalize: # remove all properties with name == nname nname = self._normalize(name) for item in self.seq: if not (isinstance(item.value, Property) and item.value.name == nname): newseq.appendItem(item) else: # remove all properties with literalname == name for item in self.seq: if not (isinstance(item.value, Property) and item.value.literalname == name): newseq.appendItem(item) self._setSeq(newseq) return r def setProperty(self, name, value=None, priority='', normalize=True, replace=True): r"""(DOM) Set a property value and priority within this declaration block. :param name: of the CSS property to set (in W3C DOM the parameter is called "propertyName"), always lowercase (even if not normalized) If a property with this `name` is present it will be reset. css_parser also allowed `name` to be a :class:`~css_parser.css.Property` object, all other parameter are ignored in this case :param value: the new value of the property, ignored if `name` is a Property. :param priority: the optional priority of the property (e.g. "important"), ignored if `name` is a Property. :param normalize: if True (DEFAULT) `name` will be normalized (lowercase, no simple escapes) so "color", "COLOR" or "C\olor" will all be equivalent :param replace: if True (DEFAULT) the given property will replace a present property. If False a new property will be added always. The difference to `normalize` is that two or more properties with the same name may be set, useful for e.g. stuff like:: background: red; background: rgba(255, 0, 0, 0.5); which defines the same property but only capable UAs use the last property value, older ones use the first value. :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly or the property is readonly. """ self._checkReadonly() if isinstance(name, Property): newp = name name = newp.literalname elif not value: # empty string or None effectively removed property return self.removeProperty(name) else: newp = Property(name, value, priority, parent=self) if newp.wellformed: if replace: # check if update nname = self._normalize(name) properties = self.getProperties(name, all=(not normalize)) for property in reversed(properties): if normalize and property.name == nname: property.propertyValue = newp.propertyValue.cssText property.priority = newp.priority return elif property.literalname == name: property.propertyValue = newp.propertyValue.cssText property.priority = newp.priority return # not yet set or forced omit replace newp.parent = self self.seq._readonly = False self.seq.append(newp, 'Property') self.seq._readonly = True else: self._log.warn('Invalid Property: %s: %s %s' % (name, value, priority)) def item(self, index): r"""(DOM) Retrieve the properties that have been explicitly set in this declaration block. The order of the properties retrieved using this method does not have to be the order in which they were set. This method can be used to iterate over all properties in this declaration block. :param index: of the property to retrieve, negative values behave like negative indexes on Python lists, so -1 is the last element :returns: the name of the property at this ordinal position. The empty string if no property exists at this position. **ATTENTION:** Only properties with different names are counted. If two properties with the same name are present in this declaration only the effective one is included. :meth:`item` and :attr:`length` work on the same set here. """ names = list(self.__nnames()) try: return names[index] except IndexError: return '' length = property(lambda self: len(list(self.__nnames())), doc="(DOM) The number of distinct properties that have " "been explicitly in this declaration block. The " "range of valid indices is 0 to length-1 inclusive. " "These are properties with a different ``name`` " "only. :meth:`item` and :attr:`length` work on the " "same set here.") def _getValidating(self): try: # CSSParser.parseX() sets validating of stylesheet return self.parentRule.parentStyleSheet.validating except AttributeError: # CSSParser.parseStyle() sets validating of declaration if self._validating is not None: return self._validating # default return True def _setValidating(self, validating): self._validating = validating validating = property(_getValidating, _setValidating, doc="If ``True`` this declaration validates " "contained properties. The parent StyleSheet " "validation setting does *always* win though so " "even if validating is True it may not validate " "if the StyleSheet defines else!") def _getValid(self): """Check each contained property for validity.""" return all(prop.valid for prop in self.getProperties()) valid = property(_getValid, doc='``True`` if each property is valid.') css-parser-1.0.4/src/css_parser/css/cssstylerule.py0000644000175000017500000002260613407702010022706 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser from . import cssrule from .selectorlist import SelectorList from .cssstyledeclaration import CSSStyleDeclaration """CSSStyleRule implements DOM Level 2 CSS CSSStyleRule.""" __all__ = ['CSSStyleRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class CSSStyleRule(cssrule.CSSRule): """The CSSStyleRule object represents a ruleset specified (if any) in a CSS style sheet. It provides access to a declaration block as well as to the associated group of selectors. Format:: : selector [ COMMA S* selector ]* LBRACE S* declaration [ ';' S* declaration ]* '}' S* ; """ def __init__(self, selectorText=None, style=None, parentRule=None, parentStyleSheet=None, readonly=False): """ :Parameters: selectorText string parsed into selectorList style string parsed into CSSStyleDeclaration for this CSSStyleRule readonly if True allows setting of properties in constructor only """ super(CSSStyleRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self.selectorList = SelectorList() if selectorText: self.selectorText = selectorText if style: self.style = style else: self.style = CSSStyleDeclaration() self._readonly = readonly def __repr__(self): if self._namespaces: st = (self.selectorText, self._namespaces) else: st = self.selectorText return "css_parser.css.%s(selectorText=%r, style=%r)" % ( self.__class__.__name__, st, self.style.cssText) def __str__(self): return "" % (self.__class__.__name__, self.selectorText, self.style.cssText, self._namespaces, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSStyleRule(self) def _setCssText(self, cssText): """ :param cssText: a parseable string or a tuple of (cssText, dict-of-namespaces) :exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if the specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSStyleRule, self)._setCssText(cssText) # might be (cssText, namespaces) cssText, namespaces = self._splitNamespacesOff(cssText) try: # use parent style sheet ones if available namespaces = self.parentStyleSheet.namespaces except AttributeError: pass tokenizer = self._tokenize2(cssText) selectortokens = self._tokensupto2(tokenizer, blockstartonly=True) styletokens = self._tokensupto2(tokenizer, blockendonly=True) trail = self._nexttoken(tokenizer) if trail: self._log.error('CSSStyleRule: Trailing content: %s' % self._valuestr(cssText), token=trail) elif not selectortokens: self._log.error('CSSStyleRule: No selector found: %r' % self._valuestr(cssText)) elif self._tokenvalue(selectortokens[0]).startswith('@'): self._log.error('CSSStyleRule: No style rule: %r' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: newSelectorList = SelectorList(parentRule=self) newStyle = CSSStyleDeclaration(parentRule=self) ok = True bracetoken = selectortokens.pop() if self._tokenvalue(bracetoken) != '{': ok = False self._log.error( 'CSSStyleRule: No start { of style declaration found: %r' % self._valuestr(cssText), bracetoken) elif not selectortokens: ok = False self._log.error('CSSStyleRule: No selector found: %r.' % self._valuestr(cssText), bracetoken) # SET newSelectorList.selectorText = (selectortokens, namespaces) if not styletokens: ok = False self._log.error( 'CSSStyleRule: No style declaration or "}" found: %r' % self._valuestr(cssText)) else: braceorEOFtoken = styletokens.pop() val, typ = self._tokenvalue(braceorEOFtoken),\ self._type(braceorEOFtoken) if val != '}' and typ != 'EOF': ok = False self._log.error('CSSStyleRule: No "}" after style ' 'declaration found: %r' % self._valuestr(cssText)) else: if 'EOF' == typ: # add again as style needs it styletokens.append(braceorEOFtoken) # SET, may raise: newStyle.cssText = styletokens if ok: self.selectorList = newSelectorList self.style = newStyle cssText = property(_getCssText, _setCssText, doc="(DOM) The parsable textual representation of this " "rule.") def __getNamespaces(self): """Uses children namespaces if not attached to a sheet, else the sheet's ones.""" try: return self.parentStyleSheet.namespaces except AttributeError: return self.selectorList._namespaces _namespaces = property(__getNamespaces, doc="If this Rule is attached to a CSSStyleSheet " "the namespaces of that sheet are mirrored " "here. While the Rule is not attached the " "namespaces of selectorList are used.""") def _setSelectorList(self, selectorList): """ :param selectorList: A SelectorList which replaces the current selectorList object """ self._checkReadonly() selectorList._parentRule = self self._selectorList = selectorList _selectorList = None selectorList = property(lambda self: self._selectorList, _setSelectorList, doc="The SelectorList of this rule.") def _setSelectorText(self, selectorText): """ wrapper for css_parser SelectorList object :param selectorText: of type string, might also be a comma separated list of selectors :exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if the specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() sl = SelectorList(selectorText=selectorText, parentRule=self) if sl.wellformed: self._selectorList = sl selectorText = property(lambda self: self._selectorList.selectorText, _setSelectorText, doc="(DOM) The textual representation of the " "selector for the rule set.") def _setStyle(self, style): """ :param style: A string or CSSStyleDeclaration which replaces the current style object. """ self._checkReadonly() if isinstance(style, string_type): self._style = CSSStyleDeclaration(cssText=style, parentRule=self) else: style._parentRule = self self._style = style style = property(lambda self: self._style, _setStyle, doc="(DOM) The declaration-block of this rule set.") type = property(lambda self: self.STYLE_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: self.selectorList.wellformed) def _getValid(self): """Return whether the style declaration is valid.""" return self.style.valid valid = property(_getValid, doc='``True`` when the style declaration is true.') css-parser-1.0.4/src/css_parser/css/cssstylesheet.py0000644000175000017500000010252213407702010023043 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser.stylesheets from .cssvariablesdeclaration import CSSVariablesDeclaration from .cssrule import CSSRule from css_parser.util import _Namespaces, _readUrl from css_parser.helper import Deprecated """CSSStyleSheet implements DOM Level 2 CSS CSSStyleSheet. Partly also: - http://dev.w3.org/csswg/cssom/#the-cssstylesheet - http://www.w3.org/TR/2006/WD-css3-namespace-20060828/ TODO: - ownerRule and ownerNode """ __all__ = ['CSSStyleSheet'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring def as_list(p): if isinstance(p, list): return p return list(p) class CSSStyleSheet(css_parser.stylesheets.StyleSheet): """CSSStyleSheet represents a CSS style sheet. Format:: stylesheet : [ CHARSET_SYM S* STRING S* ';' ]? [S|CDO|CDC]* [ import [S|CDO|CDC]* ]* [ namespace [S|CDO|CDC]* ]* # according to @namespace WD [ [ ruleset | media | page ] [S|CDO|CDC]* ]* ``cssRules`` All Rules in this style sheet, a :class:`~css_parser.css.CSSRuleList`. """ def __init__(self, href=None, media=None, title='', disabled=None, ownerNode=None, parentStyleSheet=None, readonly=False, ownerRule=None, validating=True): """ For parameters see :class:`~css_parser.stylesheets.StyleSheet` """ super(CSSStyleSheet, self).__init__( 'text/css', href, media, title, disabled, ownerNode, parentStyleSheet, validating=validating) self._ownerRule = ownerRule self.cssRules = css_parser.css.CSSRuleList() self._namespaces = _Namespaces(parentStyleSheet=self, log=self._log) self._variables = CSSVariablesDeclaration() self._readonly = readonly # used only during setting cssText by parse*() self.__encodingOverride = None self._fetcher = None def __iter__(self): "Generator which iterates over cssRules." for rule in self._cssRules: yield rule def __repr__(self): if self.media: mediaText = self.media.mediaText else: mediaText = None return "css_parser.css.%s(href=%r, media=%r, title=%r)" % ( self.__class__.__name__, self.href, mediaText, self.title) def __str__(self): if self.media: mediaText = self.media.mediaText else: mediaText = None return "" % ( self.__class__.__name__, self.encoding, self.href, mediaText, self.title, self.namespaces.namespaces, id(self)) def _cleanNamespaces(self): "Remove all namespace rules with same namespaceURI but last." rules = self.cssRules namespaceitems = as_list(self.namespaces.items()) i = 0 while i < len(rules): rule = rules[i] if rule.type == rule.NAMESPACE_RULE and \ (rule.prefix, rule.namespaceURI) not in namespaceitems: self.deleteRule(i) else: i += 1 def _getUsedURIs(self): "Return set of URIs used in the sheet." useduris = set() for r1 in self: if r1.STYLE_RULE == r1.type: useduris.update(r1.selectorList._getUsedUris()) elif r1.MEDIA_RULE == r1.type: for r2 in r1: if r2.type == r2.STYLE_RULE: useduris.update(r2.selectorList._getUsedUris()) return useduris def _setCssRules(self, cssRules): "Set new cssRules and update contained rules refs." cssRules.append = self.insertRule cssRules.extend = self.insertRule cssRules.__delitem__ = self.deleteRule for rule in cssRules: rule._parentStyleSheet = self self._cssRules = cssRules cssRules = property(lambda self: self._cssRules, _setCssRules, "All Rules in this style sheet, a " ":class:`~css_parser.css.CSSRuleList`.") def _getCssText(self): "Textual representation of the stylesheet (a byte string)." return css_parser.ser.do_CSSStyleSheet(self) def _setCssText(self, cssText): """Parse `cssText` and overwrites the whole stylesheet. :param cssText: a parseable string or a tuple of (cssText, dict-of-namespaces) :exceptions: - :exc:`~xml.dom.NamespaceErr`: If a namespace prefix is found which is not declared. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. """ self._checkReadonly() cssText, namespaces = self._splitNamespacesOff(cssText) tokenizer = self._tokenize2(cssText) def S(expected, seq, token, tokenizer=None): # @charset must be at absolute beginning of style sheet # or 0 for py3 return max(1, expected or 0) def COMMENT(expected, seq, token, tokenizer=None): "special: sets parent*" self.insertRule(css_parser.css.CSSComment([token], parentStyleSheet=self)) # or 0 for py3 return max(1, expected or 0) def charsetrule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSCharsetRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if expected > 0: self._log.error('CSSStylesheet: CSSCharsetRule only allowed ' 'at beginning of stylesheet.', token, xml.dom.HierarchyRequestErr) return expected elif rule.wellformed: self.insertRule(rule) return 1 def importrule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSImportRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if expected > 1: self._log.error('CSSStylesheet: CSSImportRule not allowed ' 'here.', token, xml.dom.HierarchyRequestErr) return expected elif rule.wellformed: self.insertRule(rule) return 1 def namespacerule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSNamespaceRule( cssText=self._tokensupto2(tokenizer, token), parentStyleSheet=self) if expected > 2: self._log.error('CSSStylesheet: CSSNamespaceRule not allowed ' 'here.', token, xml.dom.HierarchyRequestErr) return expected elif rule.wellformed: if rule.prefix not in self.namespaces: # add new if not same prefix self.insertRule(rule, _clean=False) else: # same prefix => replace namespaceURI for r in self.cssRules.rulesOfType(rule.NAMESPACE_RULE): if r.prefix == rule.prefix: r._replaceNamespaceURI(rule.namespaceURI) self._namespaces[rule.prefix] = rule.namespaceURI return 2 def variablesrule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSVariablesRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if expected > 2: self._log.error('CSSStylesheet: CSSVariablesRule not allowed ' 'here.', token, xml.dom.HierarchyRequestErr) return expected elif rule.wellformed: self.insertRule(rule) self._updateVariables() return 2 def fontfacerule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSFontFaceRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) return 3 def mediarule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSMediaRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) return 3 def pagerule(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSPageRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) return 3 def unknownrule(expected, seq, token, tokenizer): # parse and consume tokens in any case if token[1] in css_parser.css.MarginRule.margins: self._log.error('CSSStylesheet: MarginRule out CSSPageRule.', token, neverraise=True) rule = css_parser.css.MarginRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) else: self._log.warn('CSSStylesheet: Unknown @rule found.', token, neverraise=True) rule = css_parser.css.CSSUnknownRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) # or 0 for py3 return max(1, expected or 0) def ruleset(expected, seq, token, tokenizer): # parse and consume tokens in any case rule = css_parser.css.CSSStyleRule(parentStyleSheet=self) rule.cssText = self._tokensupto2(tokenizer, token) if rule.wellformed: self.insertRule(rule) return 3 # save for possible reset oldCssRules = self.cssRules oldNamespaces = self._namespaces self.cssRules = css_parser.css.CSSRuleList() # simple during parse self._namespaces = namespaces self._variables = CSSVariablesDeclaration() # not used?! newseq = [] # ['CHARSET', 'IMPORT', ('VAR', NAMESPACE'), ('PAGE', 'MEDIA', ruleset)] wellformed, expected = self._parse(0, newseq, tokenizer, {'S': S, 'COMMENT': COMMENT, 'CDO': lambda *ignored: None, 'CDC': lambda *ignored: None, 'CHARSET_SYM': charsetrule, 'FONT_FACE_SYM': fontfacerule, 'IMPORT_SYM': importrule, 'NAMESPACE_SYM': namespacerule, 'PAGE_SYM': pagerule, 'MEDIA_SYM': mediarule, 'VARIABLES_SYM': variablesrule, 'ATKEYWORD': unknownrule }, default=ruleset) if wellformed: # use proper namespace object self._namespaces = _Namespaces(parentStyleSheet=self, log=self._log) self._cleanNamespaces() else: # reset self._cssRules = oldCssRules self._namespaces = oldNamespaces self._updateVariables() self._cleanNamespaces() cssText = property(_getCssText, _setCssText, "Textual representation of the stylesheet (a byte string)") def _resolveImport(self, url): """Read (encoding, enctype, decodedContent) from `url` for @import sheets.""" try: # only available during parsing of a complete sheet parentEncoding = self.__newEncoding except AttributeError: try: # explicit @charset parentEncoding = self._cssRules[0].encoding except (IndexError, AttributeError): # default not UTF-8 but None! parentEncoding = None return _readUrl(url, fetcher=self._fetcher, overrideEncoding=self.__encodingOverride, parentEncoding=parentEncoding) def _setCssTextWithEncodingOverride(self, cssText, encodingOverride=None, encoding=None): """Set `cssText` but use `encodingOverride` to overwrite detected encoding. This is used by parse and @import during setting of cssText. If `encoding` is given use this but do not save as `encodingOverride`. """ if encodingOverride: # encoding during resolving of @import self.__encodingOverride = encodingOverride if encoding: # save for nested @import self.__newEncoding = encoding self.cssText = cssText if encodingOverride: # set encodingOverride explicit again! self.encoding = self.__encodingOverride # del? self.__encodingOverride = None elif encoding: # may e.g. be httpEncoding self.encoding = encoding try: del self.__newEncoding except AttributeError: pass def _setFetcher(self, fetcher=None): """Set @import URL loader, if None the default is used.""" self._fetcher = fetcher def _getEncoding(self): """Encoding set in :class:`~css_parser.css.CSSCharsetRule` or if ``None`` resulting in default ``utf-8`` encoding being used.""" try: return self._cssRules[0].encoding except (IndexError, AttributeError): return 'utf-8' def _setEncoding(self, encoding): """Set `encoding` of charset rule if present in sheet or insert a new :class:`~css_parser.css.CSSCharsetRule` with given `encoding`. If `encoding` is None removes charsetrule if present resulting in default encoding of utf-8. """ try: rule = self._cssRules[0] except IndexError: rule = None if rule and rule.CHARSET_RULE == rule.type: if encoding: rule.encoding = encoding else: self.deleteRule(0) elif encoding: self.insertRule(css_parser.css.CSSCharsetRule(encoding=encoding), 0) encoding = property(_getEncoding, _setEncoding, "(css_parser) Reflect encoding of an @charset rule or 'utf-8' " "(default) if set to ``None``") namespaces = property(lambda self: self._namespaces, doc="All Namespaces used in this CSSStyleSheet.") def _updateVariables(self): """Updates self._variables, called when @import or @variables rules is added to sheet. """ for r in self.cssRules.rulesOfType(CSSRule.IMPORT_RULE): s = r.styleSheet if s: for var in s.variables: self._variables.setVariable(var, s.variables[var]) # for r in self.cssRules.rulesOfType(CSSRule.IMPORT_RULE): # for vr in r.styleSheet.cssRules.rulesOfType(CSSRule.VARIABLES_RULE): # for var in vr.variables: # self._variables.setVariable(var, vr.variables[var]) for vr in self.cssRules.rulesOfType(CSSRule.VARIABLES_RULE): for var in vr.variables: self._variables.setVariable(var, vr.variables[var]) variables = property(lambda self: self._variables, doc="A :class:`css_parser.css.CSSVariablesDeclaration` " "containing all available variables in this " "CSSStyleSheet including the ones defined in " "imported sheets.") def add(self, rule): """Add `rule` to style sheet at appropriate position. Same as ``insertRule(rule, inOrder=True)``. """ return self.insertRule(rule, index=None, inOrder=True) def deleteRule(self, index): """Delete rule at `index` from the style sheet. :param index: The `index` of the rule to be removed from the StyleSheet's rule list. For an `index` < 0 **no** :exc:`~xml.dom.IndexSizeErr` is raised but rules for normal Python lists are used. E.g. ``deleteRule(-1)`` removes the last rule in cssRules. `index` may also be a CSSRule object which will then be removed from the StyleSheet. :exceptions: - :exc:`~xml.dom.IndexSizeErr`: Raised if the specified index does not correspond to a rule in the style sheet's rule list. - :exc:`~xml.dom.NamespaceErr`: Raised if removing this rule would result in an invalid StyleSheet - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this style sheet is readonly. """ self._checkReadonly() if isinstance(index, CSSRule): for i, r in enumerate(self.cssRules): if index == r: index = i break else: raise xml.dom.IndexSizeErr("CSSStyleSheet: Not a rule in" " this sheets'a cssRules list: %s" % index) try: rule = self._cssRules[index] except IndexError: raise xml.dom.IndexSizeErr( 'CSSStyleSheet: %s is not a valid index in the rulelist of ' 'length %i' % (index, self._cssRules.length)) else: if rule.type == rule.NAMESPACE_RULE: # check all namespacerules if used uris = [r.namespaceURI for r in self if r.type == r.NAMESPACE_RULE] useduris = self._getUsedURIs() if rule.namespaceURI in useduris and\ uris.count(rule.namespaceURI) == 1: raise xml.dom.NoModificationAllowedErr( 'CSSStyleSheet: NamespaceURI defined in this rule is ' 'used, cannot remove.') return rule._parentStyleSheet = None # detach del self._cssRules[index] # delete from StyleSheet def insertRule(self, rule, index=None, inOrder=False, _clean=True): """ Used to insert a new rule into the style sheet. The new rule now becomes part of the cascade. :param rule: a parsable DOMString, in css_parser also a :class:`~css_parser.css.CSSRule` or :class:`~css_parser.css.CSSRuleList` :param index: of the rule before the new rule will be inserted. If the specified `index` is equal to the length of the StyleSheet's rule collection, the rule will be added to the end of the style sheet. If `index` is not given or ``None`` rule will be appended to rule list. :param inOrder: if ``True`` the rule will be put to a proper location while ignoring `index` and without raising :exc:`~xml.dom.HierarchyRequestErr`. The resulting index is returned nevertheless. :returns: The index within the style sheet's rule collection :Exceptions: - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at the specified `index` e.g. if an @import rule is inserted after a standard rule set or other at-rule. - :exc:`~xml.dom.IndexSizeErr`: Raised if the specified `index` is not a valid insertion point. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this style sheet is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified rule has a syntax error and is unparsable. """ self._checkReadonly() # check position if index is None: index = len(self._cssRules) elif index < 0 or index > self._cssRules.length: raise xml.dom.IndexSizeErr( 'CSSStyleSheet: Invalid index %s for CSSRuleList with a ' 'length of %s.' % (index, self._cssRules.length)) return if isinstance(rule, string_type): # init a temp sheet which has the same properties as self tempsheet = CSSStyleSheet(href=self.href, media=self.media, title=self.title, parentStyleSheet=self.parentStyleSheet, ownerRule=self.ownerRule) tempsheet._ownerNode = self.ownerNode tempsheet._fetcher = self._fetcher # prepend encoding if in this sheet to be able to use it in # @import rules encoding resolution # do not add if new rule startswith "@charset" (which is exact!) if not rule.startswith('@charset') and (self._cssRules and self._cssRules[0].type == self._cssRules[0].CHARSET_RULE): # rule 0 is @charset! newrulescount, newruleindex = 2, 1 rule = self._cssRules[0].cssText + rule else: newrulescount, newruleindex = 1, 0 # parse the new rule(s) tempsheet.cssText = (rule, self._namespaces) if len(tempsheet.cssRules) != newrulescount or (not isinstance( tempsheet.cssRules[newruleindex], css_parser.css.CSSRule)): self._log.error('CSSStyleSheet: Not a CSSRule: %s' % rule) return rule = tempsheet.cssRules[newruleindex] rule._parentStyleSheet = None # done later? # TODO: # tempsheet._namespaces = self._namespaces # variables? elif isinstance(rule, css_parser.css.CSSRuleList): # insert all rules for i, r in enumerate(rule): self.insertRule(r, index + i) return index if not rule.wellformed: self._log.error('CSSStyleSheet: Invalid rules cannot be added.') return # CHECK HIERARCHY # @charset if rule.type == rule.CHARSET_RULE: if inOrder: index = 0 # always first and only if (self._cssRules and self._cssRules[0].type == rule.CHARSET_RULE): self._cssRules[0].encoding = rule.encoding else: self._cssRules.insert(0, rule) elif index != 0 or (self._cssRules and self._cssRules[0].type == rule.CHARSET_RULE): self._log.error( 'CSSStylesheet: @charset only allowed once at the' ' beginning of a stylesheet.', error=xml.dom.HierarchyRequestErr) return else: self._cssRules.insert(index, rule) # @unknown or comment elif rule.type in (rule.UNKNOWN_RULE, rule.COMMENT) and not inOrder: if index == 0 and self._cssRules and\ self._cssRules[0].type == rule.CHARSET_RULE: self._log.error( 'CSSStylesheet: @charset must be the first rule.', error=xml.dom.HierarchyRequestErr) return else: self._cssRules.insert(index, rule) # @import elif rule.type == rule.IMPORT_RULE: if inOrder: # automatic order if rule.type in (r.type for r in self): # find last of this type for i, r in enumerate(reversed(self._cssRules)): if r.type == rule.type: index = len(self._cssRules) - i break else: # find first point to insert if self._cssRules and\ self._cssRules[0].type in (rule.CHARSET_RULE, rule.COMMENT): index = 1 else: index = 0 else: # after @charset if index == 0 and self._cssRules and\ self._cssRules[0].type == rule.CHARSET_RULE: self._log.error( 'CSSStylesheet: Found @charset at index 0.', error=xml.dom.HierarchyRequestErr) return # before @namespace @variables @page @font-face @media stylerule for r in self._cssRules[:index]: if r.type in (r.NAMESPACE_RULE, r.VARIABLES_RULE, r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, r.FONT_FACE_RULE): self._log.error( 'CSSStylesheet: Cannot insert @import here,' ' found @namespace, @variables, @media, @page or' ' CSSStyleRule before index %s.' % index, error=xml.dom.HierarchyRequestErr) return self._cssRules.insert(index, rule) self._updateVariables() # @namespace elif rule.type == rule.NAMESPACE_RULE: if inOrder: if rule.type in (r.type for r in self): # find last of this type for i, r in enumerate(reversed(self._cssRules)): if r.type == rule.type: index = len(self._cssRules) - i break else: # find first point to insert for i, r in enumerate(self._cssRules): if r.type in (r.VARIABLES_RULE, r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, r.FONT_FACE_RULE, r.UNKNOWN_RULE, r.COMMENT): index = i # before these break else: # after @charset and @import for r in self._cssRules[index:]: if r.type in (r.CHARSET_RULE, r.IMPORT_RULE): self._log.error( 'CSSStylesheet: Cannot insert @namespace here,' ' found @charset or @import after index %s.' % index, error=xml.dom.HierarchyRequestErr) return # before @variables @media @page @font-face and stylerule for r in self._cssRules[:index]: if r.type in (r.VARIABLES_RULE, r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, r.FONT_FACE_RULE): self._log.error( 'CSSStylesheet: Cannot insert @namespace here,' ' found @variables, @media, @page or CSSStyleRule' ' before index %s.' % index, error=xml.dom.HierarchyRequestErr) return if not (rule.prefix in self.namespaces and self.namespaces[rule.prefix] == rule.namespaceURI): # no doublettes self._cssRules.insert(index, rule) if _clean: self._cleanNamespaces() # @variables elif rule.type == rule.VARIABLES_RULE: if inOrder: if rule.type in (r.type for r in self): # find last of this type for i, r in enumerate(reversed(self._cssRules)): if r.type == rule.type: index = len(self._cssRules) - i break else: # find first point to insert for i, r in enumerate(self._cssRules): if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, r.FONT_FACE_RULE, r.UNKNOWN_RULE, r.COMMENT): index = i # before these break else: # after @charset @import @namespace for r in self._cssRules[index:]: if r.type in (r.CHARSET_RULE, r.IMPORT_RULE, r.NAMESPACE_RULE): self._log.error( 'CSSStylesheet: Cannot insert @variables here,' ' found @charset, @import or @namespace after' ' index %s.' % index, error=xml.dom.HierarchyRequestErr) return # before @media @page @font-face and stylerule for r in self._cssRules[:index]: if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, r.FONT_FACE_RULE): self._log.error( 'CSSStylesheet: Cannot insert @variables here,' ' found @media, @page or CSSStyleRule' ' before index %s.' % index, error=xml.dom.HierarchyRequestErr) return self._cssRules.insert(index, rule) self._updateVariables() # all other where order is not important else: if inOrder: # simply add to end as no specific order self._cssRules.append(rule) index = len(self._cssRules) - 1 else: for r in self._cssRules[index:]: if r.type in (r.CHARSET_RULE, r.IMPORT_RULE, r.NAMESPACE_RULE): self._log.error( 'CSSStylesheet: Cannot insert rule here, found ' '@charset, @import or @namespace before index %s.' % index, error=xml.dom.HierarchyRequestErr) return self._cssRules.insert(index, rule) # post settings rule._parentStyleSheet = self if rule.IMPORT_RULE == rule.type and not rule.hrefFound: # try loading the imported sheet which has new relative href now rule.href = rule.href return index ownerRule = property(lambda self: self._ownerRule, doc='A ref to an @import rule if it is imported, ' 'else ``None``.') def _getValid(self): """Check if each contained rule is valid.""" for rule in self.cssRules: # Not all rules can be checked for validity if hasattr(rule, 'valid') and not rule.valid: return False return True valid = property(_getValid, doc='``True`` if all contained rules are valid') @Deprecated('Use ``css_parser.setSerializer(serializer)`` instead.') def setSerializer(self, cssserializer): """Set the css_parser global Serializer used for all output.""" if isinstance(cssserializer, css_parser.CSSSerializer): css_parser.ser = cssserializer else: raise ValueError('Serializer must be an instance of ' 'css_parser.CSSSerializer.') @Deprecated('Set pref in ``css_parser.ser.prefs`` instead.') def setSerializerPref(self, pref, value): """Set a Preference of CSSSerializer used for output. See :class:`css_parser.serialize.Preferences` for possible preferences to be set. """ css_parser.ser.prefs.__setattr__(pref, value) css-parser-1.0.4/src/css_parser/css/cssunknownrule.py0000644000175000017500000002104213407702010023236 0ustar kovidkovid00000000000000"""CSSUnknownRule implements DOM Level 2 CSS CSSUnknownRule.""" from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSUnknownRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from . import cssrule import css_parser import xml.dom class CSSUnknownRule(cssrule.CSSRule): """ Represents an at-rule not supported by this user agent, so in effect all other at-rules not defined in css_parser. Format:: @xxx until ';' or block {...} """ def __init__(self, cssText='', parentRule=None, parentStyleSheet=None, readonly=False): """ :param cssText: of type string """ super(CSSUnknownRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = None if cssText: self.cssText = cssText self._readonly = readonly def __repr__(self): return "css_parser.css.%s(cssText=%r)" % ( self.__class__.__name__, self.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.cssText, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSUnknownRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(CSSUnknownRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if not attoken or self._type(attoken) != self._prods.ATKEYWORD: self._log.error('CSSUnknownRule: No CSSUnknownRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: # for closures: must be a mutable new = {'nesting': [], # {} [] or () 'wellformed': True } def CHAR(expected, seq, token, tokenizer=None): type_, val, line, col = token if expected != 'EOF': if val in '{[(': new['nesting'].append(val) elif val in '}])': opening = {'}': '{', ']': '[', ')': '('}[val] try: if new['nesting'][-1] == opening: new['nesting'].pop() else: raise IndexError() except IndexError: new['wellformed'] = False self._log.error('CSSUnknownRule: Wrong nesting of ' '{, [ or (.', token=token) if val in '};' and not new['nesting']: expected = 'EOF' seq.append(val, type_, line=line, col=col) return expected else: new['wellformed'] = False self._log.error('CSSUnknownRule: Expected end of rule.', token=token) return expected def FUNCTION(expected, seq, token, tokenizer=None): # handled as opening ( type_, val, line, col = token val = self._tokenvalue(token) if expected != 'EOF': new['nesting'].append('(') seq.append(val, type_, line=line, col=col) return expected else: new['wellformed'] = False self._log.error('CSSUnknownRule: Expected end of rule.', token=token) return expected def EOF(expected, seq, token, tokenizer=None): "close all blocks and return 'EOF'" for x in reversed(new['nesting']): closing = {'{': '}', '[': ']', '(': ')'}[x] seq.append(closing, closing) new['nesting'] = [] return 'EOF' def INVALID(expected, seq, token, tokenizer=None): # makes rule invalid self._log.error('CSSUnknownRule: Bad syntax.', token=token, error=xml.dom.SyntaxErr) new['wellformed'] = False return expected def STRING(expected, seq, token, tokenizer=None): type_, val, line, col = token val = self._stringtokenvalue(token) if expected != 'EOF': seq.append(val, type_, line=line, col=col) return expected else: new['wellformed'] = False self._log.error('CSSUnknownRule: Expected end of rule.', token=token) return expected def URI(expected, seq, token, tokenizer=None): type_, val, line, col = token val = self._uritokenvalue(token) if expected != 'EOF': seq.append(val, type_, line=line, col=col) return expected else: new['wellformed'] = False self._log.error('CSSUnknownRule: Expected end of rule.', token=token) return expected def default(expected, seq, token, tokenizer=None): type_, val, line, col = token if expected != 'EOF': seq.append(val, type_, line=line, col=col) return expected else: new['wellformed'] = False self._log.error('CSSUnknownRule: Expected end of rule.', token=token) return expected # unknown : ATKEYWORD S* ... ; | } newseq = self._tempSeq() wellformed, expected = self._parse(expected=None, seq=newseq, tokenizer=tokenizer, productions={'CHAR': CHAR, 'EOF': EOF, 'FUNCTION': FUNCTION, 'INVALID': INVALID, 'STRING': STRING, 'URI': URI, 'S': default # overwrite default default! }, default=default, new=new) # wellformed set by parse wellformed = wellformed and new['wellformed'] # post conditions if expected != 'EOF': wellformed = False self._log.error('CSSUnknownRule: No ending ";" or "}" found: ' '%r' % self._valuestr(cssText)) elif new['nesting']: wellformed = False self._log.error('CSSUnknownRule: Unclosed "{", "[" or "(": %r' % self._valuestr(cssText)) # set all if wellformed: self.atkeyword = self._tokenvalue(attoken) self._setSeq(newseq) cssText = property(fget=_getCssText, fset=_setCssText, doc="(DOM) The parsable textual representation.") type = property(lambda self: self.UNKNOWN_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: bool(self.atkeyword)) css-parser-1.0.4/src/css_parser/css/cssvalue.py0000644000175000017500000014667313407702010022005 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import re import math import css_parser.helper import css_parser from css_parser.prodparser import Choice, Sequence, PreDef, Prod, ProdParser """CSSValue related classes - CSSValue implements DOM Level 2 CSS CSSValue - CSSPrimitiveValue implements DOM Level 2 CSS CSSPrimitiveValue - CSSValueList implements DOM Level 2 CSS CSSValueList """ __all__ = ['CSSValue', 'CSSPrimitiveValue', 'CSSValueList', 'RGBColor', 'CSSVariable'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: text_type = str string_type = str else: text_type = unicode string_type = basestring class CSSValue(css_parser.util._NewBase): """The CSSValue interface represents a simple or a complex value. A CSSValue object only occurs in a context of a CSS property. """ # The value is inherited and the cssText contains "inherit". CSS_INHERIT = 0 # The value is a CSSPrimitiveValue. CSS_PRIMITIVE_VALUE = 1 # The value is a CSSValueList. CSS_VALUE_LIST = 2 # The value is a custom value. CSS_CUSTOM = 3 # The value is a CSSVariable. CSS_VARIABLE = 4 _typestrings = {0: 'CSS_INHERIT', 1: 'CSS_PRIMITIVE_VALUE', 2: 'CSS_VALUE_LIST', 3: 'CSS_CUSTOM', 4: 'CSS_VARIABLE'} def __init__(self, cssText=None, parent=None, readonly=False): """ :param cssText: the parsable cssText of the value :param readonly: defaults to False """ super(CSSValue, self).__init__() self._cssValueType = None self.wellformed = False self.parent = parent if cssText is not None: # may be 0 if isinstance(cssText, int): cssText = text_type(cssText) # if it is an integer elif isinstance(cssText, float): cssText = '%f' % cssText # if it is a floating point number self.cssText = cssText self._readonly = readonly def __repr__(self): return "css_parser.css.%s(%r)" % ( self.__class__.__name__, self.cssText) def __str__(self): return "" % (self.__class__.__name__, self.cssValueTypeString, self.cssText, id(self)) def _setCssText(self, cssText): """ Format:: unary_operator : '-' | '+' ; operator : '/' S* | ',' S* | /* empty */ ; expr : term [ operator term ]* ; term : unary_operator? [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | TIME S* | FREQ S* ] | STRING S* | IDENT S* | URI S* | hexcolor | function | UNICODE-RANGE S* ; function : FUNCTION S* expr ')' S* ; /* * There is a constraint on the color that it must * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) * after the "#"; e.g., "#000" is OK, but "#abcd" is not. */ hexcolor : HASH S* ; :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error (according to the attached property) or is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: TODO: Raised if the specified CSS string value represents a different type of values than the values allowed by the CSS property. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this value is readonly. """ self._checkReadonly() # used as operator is , / or S nextSor = ',/' term = Choice(Sequence(PreDef.unary(), Choice(PreDef.number(nextSor=nextSor), PreDef.percentage(nextSor=nextSor), PreDef.dimension(nextSor=nextSor))), PreDef.string(nextSor=nextSor), PreDef.ident(nextSor=nextSor), PreDef.uri(nextSor=nextSor), PreDef.hexcolor(nextSor=nextSor), PreDef.unicode_range(nextSor=nextSor), # special case IE only expression Prod(name='expression', match=lambda t, v: t == self._prods.FUNCTION and ( css_parser.helper.normalize(v) in ( 'expression(', 'alpha(', 'blur(', 'chroma(', 'dropshadow(', 'fliph(', 'flipv(', 'glow(', 'gray(', 'invert(', 'mask(', 'shadow(', 'wave(', 'xray(') or v.startswith('progid:DXImageTransform.Microsoft.') ), nextSor=nextSor, toSeq=lambda t, tokens: (ExpressionValue._functionName, ExpressionValue( css_parser.helper.pushtoken(t, tokens), parent=self) ) ), # CSS Variable var( PreDef.variable(nextSor=nextSor, toSeq=lambda t, tokens: ('CSSVariable', CSSVariable( css_parser.helper.pushtoken(t, tokens), parent=self) ) ), # calc( PreDef.calc(nextSor=nextSor, toSeq=lambda t, tokens: (CalcValue._functionName, CalcValue( css_parser.helper.pushtoken(t, tokens), parent=self) ) ), # TODO: # # rgb/rgba( # Prod(name='RGBColor', # match=lambda t, v: t == self._prods.FUNCTION and ( # css_parser.helper.normalize(v) in (u'rgb(', # u'rgba(' # ) # ), # nextSor=nextSor, # toSeq=lambda t, tokens: (RGBColor._functionName, # RGBColor( # css_parser.helper.pushtoken(t, tokens), # parent=self) # ) # ), # other functions like rgb( etc PreDef.function(nextSor=nextSor, toSeq=lambda t, tokens: ('FUNCTION', CSSFunction( css_parser.helper.pushtoken(t, tokens), parent=self) ) ) ) operator = Choice(PreDef.S(), PreDef.char('comma', ',', toSeq=lambda t, tokens: ('operator', t[1])), PreDef.char('slash', '/', toSeq=lambda t, tokens: ('operator', t[1])), optional=True) # CSSValue PRODUCTIONS valueprods = Sequence(term, Sequence(operator, # mayEnd this Sequence if whitespace # TODO: only when setting via other class # used by variabledeclaration currently PreDef.char('END', ';', stopAndKeep=True, optional=True), term, minmax=lambda: (0, None))) # parse wellformed, seq, store, notused = ProdParser().parse(cssText, 'CSSValue', valueprods, keepS=True) if wellformed: # - count actual values and set firstvalue which is used later on # - combine comma separated list, e.g. font-family to a single item # - remove S which should be an operator but is no needed count, firstvalue = 0, () newseq = self._tempSeq() i, end = 0, len(seq) while i < end: item = seq[i] if item.type == self._prods.S: pass elif (item.value, item.type) == (',', 'operator'): # , separared counts as a single STRING for now # URI or STRING value might be a single CHAR too! newseq.appendItem(item) count -= 1 if firstvalue: # list of IDENTs is handled as STRING! if firstvalue[1] == self._prods.IDENT: firstvalue = firstvalue[0], 'STRING' elif item.value == '/': # / separated items count as one newseq.appendItem(item) elif item.value == '-' or item.value == '+': # combine +- and following number or other i += 1 try: next = seq[i] except IndexError: firstvalue = () # raised later break newval = item.value + next.value newseq.append(newval, next.type, item.line, item.col) if not firstvalue: firstvalue = (newval, next.type) count += 1 elif item.type != css_parser.css.CSSComment: newseq.appendItem(item) if not firstvalue: firstvalue = (item.value, item.type) count += 1 else: newseq.appendItem(item) i += 1 if not firstvalue: self._log.error( 'CSSValue: Unknown syntax or no value: %r.' % self._valuestr(cssText)) else: # ok and set self._setSeq(newseq) self.wellformed = wellformed if hasattr(self, '_value'): # only in case of CSSPrimitiveValue, else remove! del self._value if count == 1: # inherit, primitive or variable # Under Pythn 2.x this was basestring but ... if isinstance(firstvalue[0], string_type) and\ 'inherit' == css_parser.helper.normalize(firstvalue[0]): self.__class__ = CSSValue self._cssValueType = CSSValue.CSS_INHERIT elif 'CSSVariable' == firstvalue[1]: self.__class__ = CSSVariable self._value = firstvalue # TODO: remove major hack! self._name = firstvalue[0]._name else: self.__class__ = CSSPrimitiveValue self._value = firstvalue elif count > 1: # valuelist self.__class__ = CSSValueList # change items in list to specific type (primitive etc) newseq = self._tempSeq() commalist = [] nexttocommalist = False def itemValue(item): "Reserialized simple item.value" if self._prods.STRING == item.type: return css_parser.helper.string(item.value) elif self._prods.URI == item.type: return css_parser.helper.uri(item.value) elif self._prods.FUNCTION == item.type or\ 'CSSVariable' == item.type: return item.value.cssText else: return item.value def saveifcommalist(commalist, newseq): """ saves items in commalist to seq and items if anything in there """ if commalist: newseq.replace(-1, CSSPrimitiveValue(cssText=''.join( commalist)), CSSPrimitiveValue, newseq[-1].line, newseq[-1].col) del commalist[:] for i, item in enumerate(self._seq): if issubclass(type(item.value), CSSValue): # set parent of CSSValueList items to the lists # parent item.value.parent = self.parent if item.type in (self._prods.DIMENSION, self._prods.FUNCTION, self._prods.HASH, self._prods.IDENT, self._prods.NUMBER, self._prods.PERCENTAGE, self._prods.STRING, self._prods.URI, self._prods.UNICODE_RANGE, 'CSSVariable'): if nexttocommalist: # wait until complete commalist.append(itemValue(item)) else: saveifcommalist(commalist, newseq) # append new item if hasattr(item.value, 'cssText'): newseq.append(item.value, item.value.__class__, item.line, item.col) else: newseq.append(CSSPrimitiveValue( itemValue(item)), CSSPrimitiveValue, item.line, item.col) nexttocommalist = False elif ',' == item.value: if not commalist: # save last item to commalist commalist.append(itemValue(self._seq[i - 1])) commalist.append(',') nexttocommalist = True else: if nexttocommalist: commalist.append(item.value.cssText) else: newseq.appendItem(item) saveifcommalist(commalist, newseq) self._setSeq(newseq) else: # should not happen... self.__class__ = CSSValue self._cssValueType = CSSValue.CSS_CUSTOM cssText = property(lambda self: css_parser.ser.do_css_CSSValue(self), _setCssText, doc="A string representation of the current value.") cssValueType = property(lambda self: self._cssValueType, doc="A (readonly) code defining the type of the value.") cssValueTypeString = property( lambda self: CSSValue._typestrings.get(self.cssValueType, None), doc="(readonly) Name of cssValueType.") class CSSPrimitiveValue(CSSValue): """Represents a single CSS Value. May be used to determine the value of a specific style property currently set in a block or to set a specific style property explicitly within the block. Might be obtained from the getPropertyCSSValue method of CSSStyleDeclaration. Conversions are allowed between absolute values (from millimeters to centimeters, from degrees to radians, and so on) but not between relative values. (For example, a pixel value cannot be converted to a centimeter value.) Percentage values can't be converted since they are relative to the parent value (or another property value). There is one exception for color percentage values: since a color percentage value is relative to the range 0-255, a color percentage value can be converted to a number; (see also the RGBColor interface). """ # constant: type of this CSSValue class cssValueType = CSSValue.CSS_PRIMITIVE_VALUE __types = css_parser.cssproductions.CSSProductions # An integer indicating which type of unit applies to the value. CSS_UNKNOWN = 0 # only obtainable via cssText CSS_NUMBER = 1 CSS_PERCENTAGE = 2 CSS_EMS = 3 CSS_EXS = 4 CSS_PX = 5 CSS_CM = 6 CSS_MM = 7 CSS_IN = 8 CSS_PT = 9 CSS_PC = 10 CSS_DEG = 11 CSS_RAD = 12 CSS_GRAD = 13 CSS_MS = 14 CSS_S = 15 CSS_HZ = 16 CSS_KHZ = 17 CSS_DIMENSION = 18 CSS_STRING = 19 CSS_URI = 20 CSS_IDENT = 21 CSS_ATTR = 22 CSS_COUNTER = 23 CSS_RECT = 24 CSS_RGBCOLOR = 25 # NOT OFFICIAL: CSS_RGBACOLOR = 26 CSS_UNICODE_RANGE = 27 _floattypes = (CSS_NUMBER, CSS_PERCENTAGE, CSS_EMS, CSS_EXS, CSS_PX, CSS_CM, CSS_MM, CSS_IN, CSS_PT, CSS_PC, CSS_DEG, CSS_RAD, CSS_GRAD, CSS_MS, CSS_S, CSS_HZ, CSS_KHZ, CSS_DIMENSION) _stringtypes = (CSS_ATTR, CSS_IDENT, CSS_STRING, CSS_URI) _countertypes = (CSS_COUNTER,) _recttypes = (CSS_RECT,) _rbgtypes = (CSS_RGBCOLOR, CSS_RGBACOLOR) _lengthtypes = (CSS_NUMBER, CSS_EMS, CSS_EXS, CSS_PX, CSS_CM, CSS_MM, CSS_IN, CSS_PT, CSS_PC) # oldtype: newType: converterfunc _converter = { # cm <-> mm <-> in, 1 inch is equal to 2.54 centimeters. # pt <-> pc, the points used by CSS 2.1 are equal to 1/72nd of an inch. # pc: picas - 1 pica is equal to 12 points (CSS_CM, CSS_MM): lambda x: x * 10, (CSS_MM, CSS_CM): lambda x: x / 10, (CSS_PT, CSS_PC): lambda x: x * 12, (CSS_PC, CSS_PT): lambda x: x / 12, (CSS_CM, CSS_IN): lambda x: x / 2.54, (CSS_IN, CSS_CM): lambda x: x * 2.54, (CSS_MM, CSS_IN): lambda x: x / 25.4, (CSS_IN, CSS_MM): lambda x: x * 25.4, (CSS_IN, CSS_PT): lambda x: x / 72, (CSS_PT, CSS_IN): lambda x: x * 72, (CSS_CM, CSS_PT): lambda x: x / 2.54 / 72, (CSS_PT, CSS_CM): lambda x: x * 72 * 2.54, (CSS_MM, CSS_PT): lambda x: x / 25.4 / 72, (CSS_PT, CSS_MM): lambda x: x * 72 * 25.4, (CSS_IN, CSS_PC): lambda x: x / 72 / 12, (CSS_PC, CSS_IN): lambda x: x * 12 * 72, (CSS_CM, CSS_PC): lambda x: x / 2.54 / 72 / 12, (CSS_PC, CSS_CM): lambda x: x * 12 * 72 * 2.54, (CSS_MM, CSS_PC): lambda x: x / 25.4 / 72 / 12, (CSS_PC, CSS_MM): lambda x: x * 12 * 72 * 25.4, # hz <-> khz (CSS_KHZ, CSS_HZ): lambda x: x * 1000, (CSS_HZ, CSS_KHZ): lambda x: x / 1000, # s <-> ms (CSS_S, CSS_MS): lambda x: x * 1000, (CSS_MS, CSS_S): lambda x: x / 1000, (CSS_RAD, CSS_DEG): lambda x: math.degrees(x), (CSS_DEG, CSS_RAD): lambda x: math.radians(x), # TODO: convert grad <-> deg or rad # (CSS_RAD, CSS_GRAD): lambda x: math.degrees(x), # (CSS_DEG, CSS_GRAD): lambda x: math.radians(x), # (CSS_GRAD, CSS_RAD): lambda x: math.radians(x), # (CSS_GRAD, CSS_DEG): lambda x: math.radians(x) } def __init__(self, cssText=None, parent=None, readonly=False): """See CSSPrimitiveValue.__init__()""" super(CSSPrimitiveValue, self).__init__(cssText=cssText, parent=parent, readonly=readonly) def __str__(self): return ""\ % (self.__class__.__name__, self.primitiveTypeString, self.cssText, id(self)) _unitnames = ['CSS_UNKNOWN', 'CSS_NUMBER', 'CSS_PERCENTAGE', 'CSS_EMS', 'CSS_EXS', 'CSS_PX', 'CSS_CM', 'CSS_MM', 'CSS_IN', 'CSS_PT', 'CSS_PC', 'CSS_DEG', 'CSS_RAD', 'CSS_GRAD', 'CSS_MS', 'CSS_S', 'CSS_HZ', 'CSS_KHZ', 'CSS_DIMENSION', 'CSS_STRING', 'CSS_URI', 'CSS_IDENT', 'CSS_ATTR', 'CSS_COUNTER', 'CSS_RECT', 'CSS_RGBCOLOR', 'CSS_RGBACOLOR', 'CSS_UNICODE_RANGE' ] _reNumDim = re.compile(r'([+-]?\d*\.\d+|[+-]?\d+)(.*)$', re.I | re.U | re.X) def _unitDIMENSION(value): """Check val for dimension name.""" units = {'em': 'CSS_EMS', 'ex': 'CSS_EXS', 'px': 'CSS_PX', 'cm': 'CSS_CM', 'mm': 'CSS_MM', 'in': 'CSS_IN', 'pt': 'CSS_PT', 'pc': 'CSS_PC', 'deg': 'CSS_DEG', 'rad': 'CSS_RAD', 'grad': 'CSS_GRAD', 'ms': 'CSS_MS', 's': 'CSS_S', 'hz': 'CSS_HZ', 'khz': 'CSS_KHZ' } val, dim = CSSPrimitiveValue._reNumDim.findall(css_parser.helper.normalize(value))[0] return units.get(dim, 'CSS_DIMENSION') def _unitFUNCTION(value): """Check val for function name.""" units = {'attr(': 'CSS_ATTR', 'counter(': 'CSS_COUNTER', 'rect(': 'CSS_RECT', 'rgb(': 'CSS_RGBCOLOR', 'rgba(': 'CSS_RGBACOLOR', } return units.get(re.findall(r'^(.*?\()', css_parser.helper.normalize(value.cssText), re.U)[0], 'CSS_UNKNOWN') __unitbytype = { __types.NUMBER: 'CSS_NUMBER', __types.PERCENTAGE: 'CSS_PERCENTAGE', __types.STRING: 'CSS_STRING', __types.UNICODE_RANGE: 'CSS_UNICODE_RANGE', __types.URI: 'CSS_URI', __types.IDENT: 'CSS_IDENT', __types.HASH: 'CSS_RGBCOLOR', __types.DIMENSION: _unitDIMENSION, __types.FUNCTION: _unitFUNCTION } def __set_primitiveType(self): """primitiveType is readonly but is set lazy if accessed""" # TODO: check unary and font-family STRING a, b, "c" val, type_ = self._value # try get by type_ pt = self.__unitbytype.get(type_, 'CSS_UNKNOWN') if callable(pt): # multiple options, check value too pt = pt(val) self._primitiveType = getattr(self, pt) def _getPrimitiveType(self): if not hasattr(self, '_primitivetype'): self.__set_primitiveType() return self._primitiveType primitiveType = property(_getPrimitiveType, doc="(readonly) The type of the value as defined " "by the constants in this class.") def _getPrimitiveTypeString(self): return self._unitnames[self.primitiveType] primitiveTypeString = property(_getPrimitiveTypeString, doc="Name of primitive type of this value.") def _getCSSPrimitiveTypeString(self, type): "get TypeString by given type which may be unknown, used by setters" try: return self._unitnames[type] except (IndexError, TypeError): return '%r (UNKNOWN TYPE)' % type def _getNumDim(self, value=None): "Split self._value in numerical and dimension part." if value is None: value = css_parser.helper.normalize(self._value[0]) try: val, dim = CSSPrimitiveValue._reNumDim.findall(value)[0] except IndexError: val, dim = value, '' try: val = float(val) if val == int(val): val = int(val) except ValueError: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: No float value %r' % self._value[0]) return val, dim def getFloatValue(self, unitType=None): """(DOM) This method is used to get a float value in a specified unit. If this CSS value doesn't contain a float value or can't be converted into the specified unit, a DOMException is raised. :param unitType: to get the float value. The unit code can only be a float unit type (i.e. CSS_NUMBER, CSS_PERCENTAGE, CSS_EMS, CSS_EXS, CSS_PX, CSS_CM, CSS_MM, CSS_IN, CSS_PT, CSS_PC, CSS_DEG, CSS_RAD, CSS_GRAD, CSS_MS, CSS_S, CSS_HZ, CSS_KHZ, CSS_DIMENSION) or None in which case the current dimension is used. :returns: not necessarily a float but some cases just an integer e.g. if the value is ``1px`` it return ``1`` and **not** ``1.0`` Conversions might return strange values like 1.000000000001 """ if unitType is not None and unitType not in self._floattypes: raise xml.dom.InvalidAccessErr( 'unitType Parameter is not a float type') val, dim = self._getNumDim() if unitType is not None and self.primitiveType != unitType: # convert if needed try: val = self._converter[self.primitiveType, unitType](val) except KeyError: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' % (self.primitiveTypeString, self._getCSSPrimitiveTypeString(unitType))) if val == int(val): val = int(val) return val def setFloatValue(self, unitType, floatValue): """(DOM) A method to set the float value with a specified unit. If the property attached with this value can not accept the specified unit or the float value, the value will be unchanged and a DOMException will be raised. :param unitType: a unit code as defined above. The unit code can only be a float unit type :param floatValue: the new float value which does not have to be a float value but may simple be an int e.g. if setting:: setFloatValue(CSS_PX, 1) :exceptions: - :exc:`~xml.dom.InvalidAccessErr`: Raised if the attached property doesn't support the float value or the unit type. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this property is readonly. """ self._checkReadonly() if unitType not in self._floattypes: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: unitType %r is not a float type' % self._getCSSPrimitiveTypeString(unitType)) try: val = float(floatValue) except ValueError: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: floatValue %r is not a float' % floatValue) oldval, dim = self._getNumDim() if self.primitiveType != unitType: # convert if possible try: val = self._converter[unitType, self.primitiveType](val) except KeyError: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' % (self.primitiveTypeString, self._getCSSPrimitiveTypeString(unitType))) if val == int(val): val = int(val) self.cssText = '%s%s' % (val, dim) def getStringValue(self): """(DOM) This method is used to get the string value. If the CSS value doesn't contain a string value, a DOMException is raised. Some properties (like 'font-family' or 'voice-family') convert a whitespace separated list of idents to a string. Only the actual value is returned so e.g. all the following return the actual value ``a``: url(a), attr(a), "a", 'a' """ if self.primitiveType not in self._stringtypes: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue %r is not a string type' % self.primitiveTypeString) if CSSPrimitiveValue.CSS_ATTR == self.primitiveType: return self._value[0].cssText[5:-1] else: return self._value[0] def setStringValue(self, stringType, stringValue): """(DOM) A method to set the string value with the specified unit. If the property attached to this value can't accept the specified unit or the string value, the value will be unchanged and a DOMException will be raised. :param stringType: a string code as defined above. The string code can only be a string unit type (i.e. CSS_STRING, CSS_URI, CSS_IDENT, and CSS_ATTR). :param stringValue: the new string value Only the actual value is expected so for (CSS_URI, "a") the new value will be ``url(a)``. For (CSS_STRING, "'a'") the new value will be ``"\\'a\\'"`` as the surrounding ``'`` are not part of the string value :exceptions: - :exc:`~xml.dom.InvalidAccessErr`: Raised if the CSS value doesn't contain a string value or if the string value can't be converted into the specified unit. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this property is readonly. """ self._checkReadonly() # self not stringType if self.primitiveType not in self._stringtypes: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue %r is not a string type' % self.primitiveTypeString) # given stringType is no StringType if stringType not in self._stringtypes: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: stringType %s is not a string type' % self._getCSSPrimitiveTypeString(stringType)) if self._primitiveType != stringType: raise xml.dom.InvalidAccessErr( 'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' % (self.primitiveTypeString, self._getCSSPrimitiveTypeString(stringType))) if CSSPrimitiveValue.CSS_STRING == self._primitiveType: self.cssText = css_parser.helper.string(stringValue) elif CSSPrimitiveValue.CSS_URI == self._primitiveType: self.cssText = css_parser.helper.uri(stringValue) elif CSSPrimitiveValue.CSS_ATTR == self._primitiveType: self.cssText = 'attr(%s)' % stringValue else: self.cssText = stringValue self._primitiveType = stringType def getCounterValue(self): """(DOM) This method is used to get the Counter value. If this CSS value doesn't contain a counter value, a DOMException is raised. Modification to the corresponding style property can be achieved using the Counter interface. **Not implemented.** """ if not self.CSS_COUNTER == self.primitiveType: raise xml.dom.InvalidAccessErr('Value is not a counter type') # TODO: use Counter class raise NotImplementedError() def getRGBColorValue(self): """(DOM) This method is used to get the RGB color. If this CSS value doesn't contain a RGB color value, a DOMException is raised. Modification to the corresponding style property can be achieved using the RGBColor interface. """ if self.primitiveType not in self._rbgtypes: raise xml.dom.InvalidAccessErr('Value is not a RGBColor value') return RGBColor(self._value[0]) def getRectValue(self): """(DOM) This method is used to get the Rect value. If this CSS value doesn't contain a rect value, a DOMException is raised. Modification to the corresponding style property can be achieved using the Rect interface. **Not implemented.** """ if self.primitiveType not in self._recttypes: raise xml.dom.InvalidAccessErr('value is not a Rect value') # TODO: use Rect class raise NotImplementedError() def _getCssText(self): """Overwrites CSSValue.""" return css_parser.ser.do_css_CSSPrimitiveValue(self) def _setCssText(self, cssText): """Use CSSValue.""" return super(CSSPrimitiveValue, self)._setCssText(cssText) cssText = property(_getCssText, _setCssText, doc="A string representation of the current value.") class CSSValueList(CSSValue): """The CSSValueList interface provides the abstraction of an ordered collection of CSS values. Some properties allow an empty list into their syntax. In that case, these properties take the none identifier. So, an empty list means that the property has the value none. The items in the CSSValueList are accessible via an integral index, starting from 0. """ cssValueType = CSSValue.CSS_VALUE_LIST def __init__(self, cssText=None, parent=None, readonly=False): """Init a new CSSValueList""" super(CSSValueList, self).__init__(cssText=cssText, parent=parent, readonly=readonly) self._items = [] def __iter__(self): "CSSValueList is iterable." for item in self.__items(): yield item.value def __str__(self): return "" % (self.__class__.__name__, self.cssValueTypeString, self.cssText, self.length, id(self)) def __items(self): return [item for item in self._seq if isinstance(item.value, CSSValue)] def item(self, index): """(DOM) Retrieve a CSSValue by ordinal `index`. The order in this collection represents the order of the values in the CSS style property. If `index` is greater than or equal to the number of values in the list, this returns ``None``. """ try: return self.__items()[index].value except IndexError: return None length = property(lambda self: len(self.__items()), doc="(DOM attribute) The number of CSSValues in the " "list.") class CSSFunction(CSSPrimitiveValue): """A CSS function value like rect() etc.""" _functionName = 'CSSFunction' primitiveType = CSSPrimitiveValue.CSS_UNKNOWN def __init__(self, cssText=None, parent=None, readonly=False): """ Init a new CSSFunction :param cssText: the parsable cssText of the value :param readonly: defaults to False """ super(CSSFunction, self).__init__(parent=parent) self._funcType = None self.valid = False self.wellformed = False if cssText is not None: self.cssText = cssText self._readonly = readonly def _productiondefinition(self): """Return definition used for parsing.""" types = self._prods # rename! value = Sequence(PreDef.unary(), Prod(name='PrimitiveValue', match=lambda t, v: t in (types.DIMENSION, types.HASH, types.IDENT, types.NUMBER, types.PERCENTAGE, types.STRING), toSeq=lambda t, tokens: (t[0], CSSPrimitiveValue(t[1])) ) ) valueOrFunc = Choice(value, # FUNC is actually not in spec but used in e.g. Prince PreDef.function(toSeq=lambda t, tokens: ('FUNCTION', CSSFunction( css_parser.helper.pushtoken(t, tokens)) ) ) ) funcProds = Sequence(Prod(name='FUNC', match=lambda t, v: t == types.FUNCTION, toSeq=lambda t, tokens: (t[0], css_parser.helper.normalize(t[1]))), Choice(Sequence(valueOrFunc, # more values starting with Comma # should use store where colorType is saved to # define min and may, closure? Sequence(PreDef.comma(), valueOrFunc, minmax=lambda: (0, None)), PreDef.funcEnd(stop=True)), PreDef.funcEnd(stop=True)) ) return funcProds def _setCssText(self, cssText): self._checkReadonly() # store: colorType, parts wellformed, seq, store, unusedtokens = ProdParser().parse(cssText, self._functionName, self._productiondefinition(), keepS=True) if wellformed: # combine +/- and following CSSPrimitiveValue, remove S newseq = self._tempSeq() i, end = 0, len(seq) while i < end: item = seq[i] if item.type == self._prods.S: pass elif item.value == '+' or item.value == '-': i += 1 next = seq[i] newval = next.value if isinstance(newval, CSSPrimitiveValue): newval.setFloatValue(newval.primitiveType, float(item.value + str(newval.getFloatValue()))) newseq.append(newval, next.type, item.line, item.col) else: # expressions only? newseq.appendItem(item) newseq.appendItem(next) else: newseq.appendItem(item) i += 1 self.wellformed = True self._setSeq(newseq) self._funcType = newseq[0].value cssText = property(lambda self: css_parser.ser.do_css_FunctionValue(self), _setCssText) funcType = property(lambda self: self._funcType) class RGBColor(CSSFunction): """A CSS color like RGB, RGBA or a simple value like `#000` or `red`.""" _functionName = 'Function rgb()' def __init__(self, cssText=None, parent=None, readonly=False): """ Init a new RGBColor :param cssText: the parsable cssText of the value :param readonly: defaults to False """ super(CSSFunction, self).__init__(parent=parent) self._colorType = None self.valid = False self.wellformed = False if cssText is not None: try: # if it is a Function object cssText = cssText.cssText except AttributeError: pass self.cssText = cssText self._readonly = readonly def __repr__(self): return "css_parser.css.%s(%r)" % ( self.__class__.__name__, self.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.colorType, self.cssText, id(self)) def _setCssText(self, cssText): self._checkReadonly() types = self._prods # rename! valueProd = Prod(name='value', match=lambda t, v: t in (types.NUMBER, types.PERCENTAGE), toSeq=lambda t, v: (CSSPrimitiveValue, CSSPrimitiveValue(v)), toStore='parts' ) # COLOR PRODUCTION funccolor = Sequence(Prod(name='FUNC', match=lambda t, v: t == types.FUNCTION and css_parser.helper.normalize( v) in ('rgb(', 'rgba(', 'hsl(', 'hsla('), toSeq=lambda t, v: (t, v), # css_parser.helper.normalize(v)), toStore='colorType'), PreDef.unary(), valueProd, # 2 or 3 more values starting with Comma Sequence(PreDef.comma(), PreDef.unary(), valueProd, minmax=lambda: (2, 3)), PreDef.funcEnd() ) colorprods = Choice(funccolor, PreDef.hexcolor('colorType'), Prod(name='named color', match=lambda t, v: t == types.IDENT, toStore='colorType' ) ) # store: colorType, parts wellformed, seq, store, unusedtokens = ProdParser().parse(cssText, 'RGBColor', colorprods, keepS=True, store={'parts': []}) if wellformed: self.wellformed = True if store['colorType'].type == self._prods.HASH: self._colorType = 'HEX' elif store['colorType'].type == self._prods.IDENT: self._colorType = 'Named Color' else: self._colorType = store['colorType'].value[:-1] # self._colorType = css_parser.helper.normalize(store['colorType'].value)[:-1] self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_RGBColor(self), _setCssText) colorType = property(lambda self: self._colorType) class CalcValue(CSSFunction): """Calc Function""" _functionName = 'Function calc()' def _productiondefinition(self): """Return defintion used for parsing.""" types = self._prods # rename! def toSeq(t, tokens): "Do not normalize function name!" return t[0], t[1] funcProds = Sequence(Prod(name='calc', match=lambda t, v: t == types.FUNCTION, toSeq=toSeq ), Sequence(Choice(Prod(name='nested function', match=lambda t, v: t == self._prods.FUNCTION, toSeq=lambda t, tokens: ( CSSFunction._functionName, CSSFunction( css_parser.helper.pushtoken(t, tokens))) ), Prod(name='part', match=lambda t, v: v != ')', toSeq=lambda t, tokens: (t[0], t[1])), ), minmax=lambda: (0, None)), PreDef.funcEnd(stop=True)) return funcProds def _getCssText(self): return css_parser.ser.do_css_CalcValue(self) def _setCssText(self, cssText): return super(CalcValue, self)._setCssText(cssText) cssText = property(_getCssText, _setCssText, doc="A string representation of the current value.") class ExpressionValue(CSSFunction): """Special IE only CSSFunction which may contain *anything*. Used for expressions and ``alpha(opacity=100)`` currently.""" _functionName = 'Expression (IE only)' def _productiondefinition(self): """Return defintion used for parsing.""" types = self._prods # rename! def toSeq(t, tokens): "Do not normalize function name!" return t[0], t[1] funcProds = Sequence(Prod(name='expression', match=lambda t, v: t == types.FUNCTION, toSeq=toSeq ), Sequence(Choice(Prod(name='nested function', match=lambda t, v: t == self._prods.FUNCTION, toSeq=lambda t, tokens: ( ExpressionValue._functionName, ExpressionValue(css_parser.helper.pushtoken(t, tokens))) ), Prod(name='part', match=lambda t, v: v != ')', toSeq=lambda t, tokens: (t[0], t[1])), ), minmax=lambda: (0, None)), PreDef.funcEnd(stop=True)) return funcProds def _getCssText(self): return css_parser.ser.do_css_ExpressionValue(self) def _setCssText(self, cssText): # self._log.warn(u'CSSValue: Unoffial and probably invalid MS value used!') return super(ExpressionValue, self)._setCssText(cssText) cssText = property(_getCssText, _setCssText, doc="A string representation of the current value.") class CSSVariable(CSSValue): """The CSSVariable represents a call to CSS Variable.""" def __init__(self, cssText=None, parent=None, readonly=False): """Init a new CSSVariable. :param cssText: the parsable cssText of the value, e.g. ``var(x)`` :param readonly: defaults to False """ self._name = None super(CSSVariable, self).__init__(cssText=cssText, parent=parent, readonly=readonly) def __repr__(self): return "css_parser.css.%s(%r)" % (self.__class__.__name__, self.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.name, self.value, id(self)) def _setCssText(self, cssText): self._checkReadonly() types = self._prods # rename! funcProds = Sequence(Prod(name='var', match=lambda t, v: t == types.FUNCTION ), PreDef.ident(toStore='ident'), PreDef.funcEnd(stop=True)) # store: name of variable store = {'ident': None} wellformed, seq, store, unusedtokens = ProdParser().parse(cssText, 'CSSVariable', funcProds, keepS=True) if wellformed: self._name = store['ident'].value self._setSeq(seq) self.wellformed = True cssText = property(lambda self: css_parser.ser.do_css_CSSVariable(self), _setCssText, doc="A string representation of the current variable.") cssValueType = CSSValue.CSS_VARIABLE # TODO: writable? check if var (value) available? name = property(lambda self: self._name) def _getValue(self): "Find contained sheet and @variables there" try: variables = self.parent.parent.parentRule.parentStyleSheet.variables except AttributeError: return None else: try: return variables[self.name] except KeyError: return None value = property(_getValue) css-parser-1.0.4/src/css_parser/css/cssvariablesdeclaration.py0000644000175000017500000002773013407702010025037 0ustar kovidkovid00000000000000"""CSSVariablesDeclaration http://disruptive-innovations.com/zoo/cssvariables/#mozTocId496530 """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSVariablesDeclaration'] __docformat__ = 'restructuredtext' __version__ = '$Id: cssstyledeclaration.py 1819 2009-08-01 20:52:43Z cthedot $' from css_parser.prodparser import Prod, Sequence, PreDef, ProdParser from css_parser.helper import normalize from .value import PropertyValue import css_parser import itertools def as_list(p): if isinstance(p, list): return p return list(p) class CSSVariablesDeclaration(css_parser.util._NewBase): """The CSSVariablesDeclaration interface represents a single block of variable declarations. """ def __init__(self, cssText='', parentRule=None, readonly=False): """ :param cssText: Shortcut, sets CSSVariablesDeclaration.cssText :param parentRule: The CSS rule that contains this declaration block or None if this CSSVariablesDeclaration is not attached to a CSSRule. :param readonly: defaults to False Format:: variableset : vardeclaration [ ';' S* vardeclaration ]* S* ; vardeclaration : varname ':' S* term ; varname : IDENT S* ; """ super(CSSVariablesDeclaration, self).__init__() self._parentRule = parentRule self._vars = {} if cssText: self.cssText = cssText self._readonly = readonly def __repr__(self): return "css_parser.css.%s(cssText=%r)" % (self.__class__.__name__, self.cssText) def __str__(self): return "" % ( self.__class__.__name__, self.length, id(self)) def __contains__(self, variableName): """Check if a variable is in variable declaration block. :param variableName: a string """ return normalize(variableName) in as_list(self.keys()) def __getitem__(self, variableName): """Retrieve the value of variable ``variableName`` from this declaration. """ return self.getVariableValue(variableName) def __setitem__(self, variableName, value): self.setVariable(variableName, value) def __delitem__(self, variableName): return self.removeVariable(variableName) def __iter__(self): """Iterator of names of set variables.""" for name in as_list(self.keys()): yield name def keys(self): """Analoguous to standard dict returns variable names which are set in this declaration.""" return as_list(self._vars.keys()) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_css_CSSVariablesDeclaration(self) def _setCssText(self, cssText): """Setting this attribute will result in the parsing of the new value and resetting of all the properties in the declaration block including the removal or addition of properties. :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly or a property is readonly. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. Format:: variableset : vardeclaration [ ';' S* vardeclaration ]* ; vardeclaration : varname ':' S* term ; varname : IDENT S* ; expr : [ VARCALL | term ] [ operator [ VARCALL | term ] ]* ; """ self._checkReadonly() vardeclaration = Sequence( PreDef.ident(), PreDef.char(':', ':', toSeq=False, optional=True), # PreDef.S(toSeq=False, optional=True), Prod(name='term', match=lambda t, v: True, toSeq=lambda t, tokens: ('value', PropertyValue(itertools.chain([t], tokens), parent=self) ) ) ) prods = Sequence(vardeclaration, Sequence(PreDef.S(optional=True), PreDef.char(';', ';', toSeq=False, optional=True), PreDef.S(optional=True), vardeclaration, minmax=lambda: (0, None)), PreDef.S(optional=True), PreDef.char(';', ';', toSeq=False, optional=True) ) # parse wellformed, seq, store, notused = \ ProdParser().parse(cssText, 'CSSVariableDeclaration', prods, emptyOk=True) if wellformed: newseq = self._tempSeq() newvars = {} # seq contains only name: value pairs plus comments etc nameitem = None for item in seq: if 'IDENT' == item.type: nameitem = item elif 'value' == item.type: nname = normalize(nameitem.value) if nname in newvars: # replace var with same name for i, it in enumerate(newseq): if normalize(it.value[0]) == nname: newseq.replace(i, (nameitem.value, item.value), 'var', nameitem.line, nameitem.col) else: # saved non normalized name for reserialization newseq.append((nameitem.value, item.value), 'var', nameitem.line, nameitem.col) # newseq.append((nameitem.value, item.value), # 'var', # nameitem.line, nameitem.col) newvars[nname] = item.value else: newseq.appendItem(item) self._setSeq(newseq) self._vars = newvars self.wellformed = True cssText = property(_getCssText, _setCssText, doc="(DOM) A parsable textual representation of the declaration " "block excluding the surrounding curly braces.") def _setParentRule(self, parentRule): self._parentRule = parentRule parentRule = property(lambda self: self._parentRule, _setParentRule, doc="(DOM) The CSS rule that contains this" " declaration block or None if this block" " is not attached to a CSSRule.") def getVariableValue(self, variableName): """Used to retrieve the value of a variable if it has been explicitly set within this variable declaration block. :param variableName: The name of the variable. :returns: the value of the variable if it has been explicitly set in this variable declaration block. Returns the empty string if the variable has not been set. """ try: return self._vars[normalize(variableName)].cssText except KeyError: return '' def removeVariable(self, variableName): """Used to remove a variable if it has been explicitly set within this variable declaration block. :param variableName: The name of the variable. :returns: the value of the variable if it has been explicitly set for this variable declaration block. Returns the empty string if the variable has not been set. :exceptions: - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly is readonly. """ normalname = variableName try: r = self._vars[normalname] except KeyError: return '' else: self.seq._readonly = False if normalname in self._vars: for i, x in enumerate(self.seq): if x.value[0] == variableName: del self.seq[i] self.seq._readonly = True del self._vars[normalname] return r.cssText def setVariable(self, variableName, value): """Used to set a variable value within this variable declaration block. :param variableName: The name of the CSS variable. :param value: The new value of the variable, may also be a PropertyValue object. :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this declaration is readonly or the property is readonly. """ self._checkReadonly() # check name wellformed, seq, store, unused = \ ProdParser().parse(normalize(variableName), 'variableName', Sequence(PreDef.ident())) if not wellformed: self._log.error('Invalid variableName: %r: %r' % (variableName, value)) else: # check value if isinstance(value, PropertyValue): v = value else: v = PropertyValue(cssText=value, parent=self) if not v.wellformed: self._log.error('Invalid variable value: %r: %r' % (variableName, value)) else: # update seq self.seq._readonly = False variableName = normalize(variableName) if variableName in self._vars: for i, x in enumerate(self.seq): if x.value[0] == variableName: self.seq.replace(i, [variableName, v], x.type, x.line, x.col) break else: self.seq.append([variableName, v], 'var') self.seq._readonly = True self._vars[variableName] = v def item(self, index): """Used to retrieve the variables that have been explicitly set in this variable declaration block. The order of the variables retrieved using this method does not have to be the order in which they were set. This method can be used to iterate over all variables in this variable declaration block. :param index: of the variable name to retrieve, negative values behave like negative indexes on Python lists, so -1 is the last element :returns: The name of the variable at this ordinal position. The empty string if no variable exists at this position. """ try: return as_list(self.keys())[index] except IndexError: return '' length = property(lambda self: len(self._vars), doc="The number of variables that have been explicitly set in this" " variable declaration block. The range of valid indices is 0" " to length-1 inclusive.") css-parser-1.0.4/src/css_parser/css/cssvariablesrule.py0000644000175000017500000001726513407702010023523 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser from . import cssrule from .cssvariablesdeclaration import CSSVariablesDeclaration """CSSVariables implements (and only partly) experimental `CSS Variables `_ """ __all__ = ['CSSVariablesRule'] __docformat__ = 'restructuredtext' __version__ = '$Id: cssfontfacerule.py 1818 2009-07-30 21:39:00Z cthedot $' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class CSSVariablesRule(cssrule.CSSRule): """ The CSSVariablesRule interface represents a @variables rule within a CSS style sheet. The @variables rule is used to specify variables. css_parser uses a :class:`~css_parser.css.CSSVariablesDeclaration` to represent the variables. Format:: variables VARIABLES_SYM S* medium [ COMMA S* medium ]* LBRACE S* variableset* '}' S* ; for variableset see :class:`css_parser.css.CSSVariablesDeclaration` **Media are not implemented. Reason is that css_parser is using CSS variables in a kind of preprocessing and therefor no media information is available at this stage. For now do not use media!** Example:: @variables { CorporateLogoBGColor: #fe8d12; } div.logoContainer { background-color: var(CorporateLogoBGColor); } """ def __init__(self, mediaText=None, variables=None, parentRule=None, parentStyleSheet=None, readonly=False): """ If readonly allows setting of properties in constructor only. """ super(CSSVariablesRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = '@variables' # dummy self._media = css_parser.stylesheets.MediaList(mediaText, readonly=readonly) if variables: self.variables = variables else: self.variables = CSSVariablesDeclaration(parentRule=self) self._readonly = readonly def __repr__(self): return "css_parser.css.%s(mediaText=%r, variables=%r)" % ( self.__class__.__name__, self._media.mediaText, self.variables.cssText) def __str__(self): return "" % (self.__class__.__name__, self._media.mediaText, self.variables.cssText, self.valid, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_CSSVariablesRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. Format:: variables : VARIABLES_SYM S* medium [ COMMA S* medium ]* LBRACE S* variableset* '}' S* ; variableset : LBRACE S* vardeclaration [ ';' S* vardeclaration ]* '}' S* ; """ super(CSSVariablesRule, self)._setCssText(cssText) tokenizer = self._tokenize2(cssText) attoken = self._nexttoken(tokenizer, None) if self._type(attoken) != self._prods.VARIABLES_SYM: self._log.error('CSSVariablesRule: No CSSVariablesRule found: %s' % self._valuestr(cssText), error=xml.dom.InvalidModificationErr) else: newVariables = CSSVariablesDeclaration(parentRule=self) ok = True beforetokens, brace = self._tokensupto2(tokenizer, blockstartonly=True, separateEnd=True) if self._tokenvalue(brace) != '{': ok = False self._log.error('CSSVariablesRule: No start { of variable ' 'declaration found: %r' % self._valuestr(cssText), brace) # parse stuff before { which should be comments and S only new = {'wellformed': True} newseq = self._tempSeq() # [] beforewellformed, expected = self._parse(expected=':', seq=newseq, tokenizer=self._tokenize2(beforetokens), productions={}) ok = ok and beforewellformed and new['wellformed'] variablestokens, braceorEOFtoken = self._tokensupto2(tokenizer, blockendonly=True, separateEnd=True) val, type_ = self._tokenvalue(braceorEOFtoken), \ self._type(braceorEOFtoken) if val != '}' and type_ != 'EOF': ok = False self._log.error('CSSVariablesRule: No "}" after variables ' 'declaration found: %r' % self._valuestr(cssText)) nonetoken = self._nexttoken(tokenizer) if nonetoken: ok = False self._log.error('CSSVariablesRule: Trailing content found.', token=nonetoken) if 'EOF' == type_: # add again as variables needs it variablestokens.append(braceorEOFtoken) # SET but may raise: newVariables.cssText = variablestokens if ok: # contains probably comments only upto { self._setSeq(newseq) self.variables = newVariables cssText = property(_getCssText, _setCssText, doc="(DOM) The parsable textual representation of this " "rule.") media = property(doc="NOT IMPLEMENTED! As css_parser resolves variables " "during serializing media information is lost.") def _setVariables(self, variables): """ :param variables: a CSSVariablesDeclaration or string """ self._checkReadonly() # Under Pythoin 2.x this was basestring but ... if isinstance(variables, string_type): self._variables = CSSVariablesDeclaration(cssText=variables, parentRule=self) else: variables._parentRule = self self._variables = variables variables = property(lambda self: self._variables, _setVariables, doc="(DOM) The variables of this rule set, a " ":class:`css_parser.css.CSSVariablesDeclaration`.") type = property(lambda self: self.VARIABLES_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") valid = property(lambda self: True, doc='NOT IMPLEMTED REALLY (TODO)') # constant but needed: wellformed = property(lambda self: True) css-parser-1.0.4/src/css_parser/css/marginrule.py0000644000175000017500000001675513407702010022322 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import css_parser from . import cssrule from .cssstyledeclaration import CSSStyleDeclaration from css_parser.prodparser import Prod, PreDef, Sequence, Choice, ProdParser """MarginRule implements DOM Level 2 CSS MarginRule.""" __all__ = ['MarginRule'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class MarginRule(cssrule.CSSRule): """ A margin at-rule consists of an ATKEYWORD that identifies the margin box (e.g. '@top-left') and a block of declarations (said to be in the margin context). Format:: margin : margin_sym S* '{' declaration [ ';' S* declaration? ]* '}' S* ; margin_sym : TOPLEFTCORNER_SYM | TOPLEFT_SYM | TOPCENTER_SYM | TOPRIGHT_SYM | TOPRIGHTCORNER_SYM | BOTTOMLEFTCORNER_SYM | BOTTOMLEFT_SYM | BOTTOMCENTER_SYM | BOTTOMRIGHT_SYM | BOTTOMRIGHTCORNER_SYM | LEFTTOP_SYM | LEFTMIDDLE_SYM | LEFTBOTTOM_SYM | RIGHTTOP_SYM | RIGHTMIDDLE_SYM | RIGHTBOTTOM_SYM ; e.g.:: @top-left { content: "123"; } """ margins = ['@top-left-corner', '@top-left', '@top-center', '@top-right', '@top-right-corner', '@bottom-left-corner', '@bottom-left', '@bottom-center', '@bottom-right', '@bottom-right-corner', '@left-top', '@left-middle', '@left-bottom', '@right-top', '@right-middle', '@right-bottom' ] def __init__(self, margin=None, style=None, parentRule=None, parentStyleSheet=None, readonly=False): """ :param atkeyword: The margin area, e.g. '@top-left' for this rule :param style: CSSStyleDeclaration for this MarginRule """ super(MarginRule, self).__init__(parentRule=parentRule, parentStyleSheet=parentStyleSheet) self._atkeyword = self._keyword = None if margin: self.margin = margin if style: self.style = style else: self.style = CSSStyleDeclaration(parentRule=self) self._readonly = readonly def _setMargin(self, margin): """Check if new keyword fits the rule it is used for.""" n = self._normalize(margin) if n not in MarginRule.margins: self._log.error('Invalid margin @keyword for this %s rule: %r' % (self.margin, margin), error=xml.dom.InvalidModificationErr) else: self._atkeyword = n self._keyword = margin margin = property(lambda self: self._atkeyword, _setMargin, doc="Margin area of parent CSSPageRule. " "`margin` and `atkeyword` are both normalized " "@keyword of the @rule.") atkeyword = margin def __repr__(self): return "css_parser.css.%s(margin=%r, style=%r)" % ( self.__class__.__name__, self.margin, self.style.cssText) def __str__(self): return "" % (self.__class__.__name__, self.margin, self.style.cssText, id(self)) def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_MarginRule(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: Raised if the specified CSS string value represents a different type of rule than the current one. - :exc:`~xml.dom.HierarchyRequestErr`: Raised if the rule cannot be inserted at this point in the style sheet. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ super(MarginRule, self)._setCssText(cssText) # TEMP: all style tokens are saved in store to fill styledeclaration # TODO: resolve when all generators styletokens = Prod(name='styletokens', match=lambda t, v: v != '}', # toSeq=False, toStore='styletokens', storeToken=True ) prods = Sequence(Prod(name='@ margin', match=lambda t, v: t == 'ATKEYWORD' and self._normalize(v) in MarginRule.margins, toStore='margin' # TODO? # , exception=xml.dom.InvalidModificationErr ), PreDef.char('OPEN', '{'), Sequence(Choice(PreDef.unknownrule(toStore='@'), styletokens), minmax=lambda: (0, None) ), PreDef.char('CLOSE', '}', stopAndKeep=True) ) # parse ok, seq, store, unused = ProdParser().parse(cssText, 'MarginRule', prods) if ok: # TODO: use seq for serializing instead of fixed stuff? self._setSeq(seq) if 'margin' in store: # may raise: self.margin = store['margin'].value else: self._log.error('No margin @keyword for this %s rule' % self.margin, error=xml.dom.InvalidModificationErr) # new empty style self.style = CSSStyleDeclaration(parentRule=self) if 'styletokens' in store: # may raise: self.style.cssText = store['styletokens'] cssText = property(fget=_getCssText, fset=_setCssText, doc="(DOM) The parsable textual representation.") def _setStyle(self, style): """ :param style: A string or CSSStyleDeclaration which replaces the current style object. """ self._checkReadonly() if isinstance(style, string_type): self._style = CSSStyleDeclaration(cssText=style, parentRule=self) else: style._parentRule = self self._style = style style = property(lambda self: self._style, _setStyle, doc="(DOM) The declaration-block of this rule set.") type = property(lambda self: self.MARGIN_RULE, doc="The type of this rule, as defined by a CSSRule " "type constant.") wellformed = property(lambda self: bool(self.atkeyword)) css-parser-1.0.4/src/css_parser/css/property.py0000644000175000017500000004475313407702010022040 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import css_parser from .value import PropertyValue from css_parser.helper import Deprecated """Property is a single CSS property in a CSSStyleDeclaration.""" __all__ = ['Property'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: string_type = str else: string_type = basestring class Property(css_parser.util.Base): """A CSS property in a StyleDeclaration of a CSSStyleRule (css_parser). Format:: property = name : IDENT S* ; expr = value : term [ operator term ]* ; term : unary_operator? [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | TIME S* | FREQ S* | function ] | STRING S* | IDENT S* | URI S* | hexcolor ; function : FUNCTION S* expr ')' S* ; /* * There is a constraint on the color that it must * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) * after the "#"; e.g., "#000" is OK, but "#abcd" is not. */ hexcolor : HASH S* ; prio : IMPORTANT_SYM S* ; """ def __init__(self, name=None, value=None, priority='', _mediaQuery=False, parent=None): """ :param name: a property name string (will be normalized) :param value: a property value string :param priority: an optional priority string which currently must be u'', u'!important' or u'important' :param _mediaQuery: if ``True`` value is optional (used by MediaQuery) :param parent: the parent object, normally a :class:`css_parser.css.CSSStyleDeclaration` """ super(Property, self).__init__() self.seqs = [[], None, []] self.wellformed = False self._mediaQuery = _mediaQuery self.parent = parent self.__nametoken = None self._name = '' self._literalname = '' self.seqs[1] = PropertyValue(parent=self) if name: self.name = name self.propertyValue = value self._priority = '' self._literalpriority = '' if priority: self.priority = priority def __repr__(self): return "css_parser.css.%s(name=%r, value=%r, priority=%r)" % ( self.__class__.__name__, self.literalname, self.propertyValue.cssText, self.priority) def __str__(self): return "<%s.%s object name=%r value=%r priority=%r valid=%r at 0x%x>" \ % (self.__class__.__module__, self.__class__.__name__, self.name, self.propertyValue.cssText, self.priority, self.valid, id(self)) def _isValidating(self): """Return True if validation is enabled.""" try: return self.parent.validating except AttributeError: # default (no parent) return True def _getCssText(self): """Return serialized property cssText.""" return css_parser.ser.do_Property(self) def _setCssText(self, cssText): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if the rule is readonly. """ # check and prepare tokenlists for setting tokenizer = self._tokenize2(cssText) nametokens = self._tokensupto2(tokenizer, propertynameendonly=True) if nametokens: wellformed = True valuetokens = self._tokensupto2(tokenizer, propertyvalueendonly=True) prioritytokens = self._tokensupto2(tokenizer, propertypriorityendonly=True) if self._mediaQuery and not valuetokens: # MediaQuery may consist of name only self.name = nametokens self.propertyValue = None self.priority = None return # remove colon from nametokens colontoken = nametokens.pop() if self._tokenvalue(colontoken) != ':': wellformed = False self._log.error('Property: No ":" after name found: %s' % self._valuestr(cssText), colontoken) elif not nametokens: wellformed = False self._log.error('Property: No property name found: %s' % self._valuestr(cssText), colontoken) if valuetokens: if self._tokenvalue(valuetokens[-1]) == '!': # priority given, move "!" to prioritytokens prioritytokens.insert(0, valuetokens.pop(-1)) else: wellformed = False self._log.error('Property: No property value found: %s' % self._valuestr(cssText), colontoken) if wellformed: self.wellformed = True self.name = nametokens self.propertyValue = valuetokens self.priority = prioritytokens # also invalid values are set! if self._isValidating(): self.validate() else: self._log.error('Property: No property name found: %s' % self._valuestr(cssText)) cssText = property(fget=_getCssText, fset=_setCssText, doc="A parsable textual representation.") def _setName(self, name): """ :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified name has a syntax error and is unparsable. """ # for closures: must be a mutable new = {'literalname': None, 'wellformed': True} def _ident(expected, seq, token, tokenizer=None): # name if 'name' == expected: new['literalname'] = self._tokenvalue(token).lower() seq.append(new['literalname']) return 'EOF' else: new['wellformed'] = False self._log.error('Property: Unexpected ident.', token) return expected newseq = [] wellformed, expected = self._parse(expected='name', seq=newseq, tokenizer=self._tokenize2(name), productions={'IDENT': _ident}) wellformed = wellformed and new['wellformed'] # post conditions # define a token for error logging if isinstance(name, list): token = name[0] self.__nametoken = token else: token = None if not new['literalname']: wellformed = False self._log.error('Property: No name found: %s' % self._valuestr(name), token=token) if wellformed: self.wellformed = True self._literalname = new['literalname'] self._name = self._normalize(self._literalname) self.seqs[0] = newseq # validate if self._isValidating() and self._name not in css_parser.profile.knownNames: # self.valid = False self._log.warn('Property: Unknown Property name.', token=token, neverraise=True) else: pass # self.valid = True # if self.propertyValue: # self.propertyValue._propertyName = self._name # #self.valid = self.propertyValue.valid else: self.wellformed = False name = property(lambda self: self._name, _setName, doc="Name of this property.") literalname = property(lambda self: self._literalname, doc="Readonly literal (not normalized) name " "of this property") def _setPropertyValue(self, cssText): """ See css.PropertyValue :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error (according to the attached property) or is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: TODO: Raised if the specified CSS string value represents a different type of values than the values allowed by the CSS property. """ if self._mediaQuery and not cssText: self.seqs[1] = PropertyValue(parent=self) else: self.seqs[1].cssText = cssText self.wellformed = self.wellformed and self.seqs[1].wellformed propertyValue = property(lambda self: self.seqs[1], _setPropertyValue, doc="(css_parser) PropertyValue object of property") def _getValue(self): if self.propertyValue: # value without comments return self.propertyValue.value else: return '' def _setValue(self, value): self._setPropertyValue(value) value = property(_getValue, _setValue, doc="The textual value of this Properties propertyValue.") def _setPriority(self, priority): """ priority a string, currently either u'', u'!important' or u'important' Format:: prio : IMPORTANT_SYM S* ; "!"{w}"important" {return IMPORTANT_SYM;} :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified priority has a syntax error and is unparsable. In this case a priority not equal to None, "" or "!{w}important". As CSSOM defines CSSStyleDeclaration.getPropertyPriority resulting in u'important' this value is also allowed to set a Properties priority """ if self._mediaQuery: self._priority = '' self._literalpriority = '' if priority: self._log.error('Property: No priority in a MediaQuery - ' 'ignored.') return if isinstance(priority, string_type) and 'important' == self._normalize(priority): priority = '!%s' % priority # for closures: must be a mutable new = {'literalpriority': '', 'wellformed': True} def _char(expected, seq, token, tokenizer=None): # "!" val = self._tokenvalue(token) if '!' == expected == val: seq.append(val) return 'important' else: new['wellformed'] = False self._log.error('Property: Unexpected char.', token) return expected def _ident(expected, seq, token, tokenizer=None): # "important" val = self._tokenvalue(token) if 'important' == expected: new['literalpriority'] = val seq.append(val) return 'EOF' else: new['wellformed'] = False self._log.error('Property: Unexpected ident.', token) return expected newseq = [] wellformed, expected = self._parse(expected='!', seq=newseq, tokenizer=self._tokenize2(priority), productions={'CHAR': _char, 'IDENT': _ident}) wellformed = wellformed and new['wellformed'] # post conditions if priority and not new['literalpriority']: wellformed = False self._log.info('Property: Invalid priority: %s' % self._valuestr(priority)) if wellformed: self.wellformed = self.wellformed and wellformed self._literalpriority = new['literalpriority'] self._priority = self._normalize(self.literalpriority) self.seqs[2] = newseq # validate priority if self._priority not in ('', 'important'): self._log.error('Property: No CSS priority value: %s' % self._priority) priority = property(lambda self: self._priority, _setPriority, doc="Priority of this property.") literalpriority = property(lambda self: self._literalpriority, doc="Readonly literal (not normalized) priority of this property") def _setParent(self, parent): self._parent = parent parent = property(lambda self: self._parent, _setParent, doc="The Parent Node (normally a CSSStyledeclaration) of this " "Property") def validate(self): """Validate value against `profiles` which are checked dynamically. properties in e.g. @font-face rules are checked against ``css_parser.profile.CSS3_FONT_FACE`` only. For each of the following cases a message is reported: - INVALID (so the property is known but not valid) ``ERROR Property: Invalid value for "{PROFILE-1[/PROFILE-2...]" property: ...`` - VALID but not in given profiles or defaultProfiles ``WARNING Property: Not valid for profile "{PROFILE-X}" but valid "{PROFILE-Y}" property: ...`` - VALID in current profile ``DEBUG Found valid "{PROFILE-1[/PROFILE-2...]" property...`` - UNKNOWN property ``WARNING Unknown Property name...`` is issued so for example:: css_parser.log.setLevel(logging.DEBUG) parser = css_parser.CSSParser() s = parser.parseString('''body { unknown-property: x; color: 4; color: rgba(1,2,3,4); color: red }''') # Log output: WARNING Property: Unknown Property name. [2:9: unknown-property] ERROR Property: Invalid value for "CSS Color Module Level 3/CSS Level 2.1" property: 4 [3:9: color] DEBUG Property: Found valid "CSS Color Module Level 3" value: rgba(1, 2, 3, 4) [4:9: color] DEBUG Property: Found valid "CSS Level 2.1" value: red [5:9: color] and when setting an explicit default profile:: css_parser.profile.defaultProfiles = css_parser.profile.CSS_LEVEL_2 s = parser.parseString('''body { unknown-property: x; color: 4; color: rgba(1,2,3,4); color: red }''') # Log output: WARNING Property: Unknown Property name. [2:9: unknown-property] ERROR Property: Invalid value for "CSS Color Module Level 3/CSS Level 2.1" property: 4 [3:9: color] WARNING Property: Not valid for profile "CSS Level 2.1" but valid "CSS Color Module Level 3" value: rgba(1, 2, 3, 4) [4:9: color] DEBUG Property: Found valid "CSS Level 2.1" value: red [5:9: color] """ valid = False profiles = None try: # if @font-face use that profile rule = self.parent.parentRule except AttributeError: pass else: if rule is not None: if rule.type == rule.FONT_FACE_RULE: profiles = [css_parser.profile.CSS3_FONT_FACE] # TODO: same for @page if self.name and self.value: pass # TODO # cv = self.propertyValue # if cv.cssValueType == cv.CSS_VARIABLE and not cv.value: # # TODO: false alarms too! # css_parser.log.warn(u'No value for variable "%s" found, keeping ' # u'variable.' % cv.name, neverraise=True) if self.name in css_parser.profile.knownNames: # add valid, matching, validprofiles... valid, matching, validprofiles = \ css_parser.profile.validateWithProfile(self.name, self.value, profiles) if not valid: self._log.error('Property: Invalid value for ' '"%s" property: %s' % ('/'.join(validprofiles), self.value), token=self.__nametoken, neverraise=True) # TODO: remove logic to profiles! elif valid and not matching: # (profiles and profiles not in validprofiles): if not profiles: notvalidprofiles = '/'.join(css_parser.profile.defaultProfiles) else: notvalidprofiles = profiles self._log.warn('Property: Not valid for profile "%s" ' 'but valid "%s" value: %s ' % (notvalidprofiles, '/'.join(validprofiles), self.value), token=self.__nametoken, neverraise=True) valid = False elif valid: self._log.debug('Property: Found valid "%s" value: %s' % ('/'.join(validprofiles), self.value), token=self.__nametoken, neverraise=True) if self._priority not in ('', 'important'): valid = False return valid valid = property(validate, doc="Check if value of this property is valid " "in the properties context.") @Deprecated('Use ``property.propertyValue`` instead.') def _getCSSValue(self): return self.propertyValue @Deprecated('Use ``property.propertyValue`` instead.') def _setCSSValue(self, cssText): self._setPropertyValue(cssText) cssValue = property(_getCSSValue, _setCSSValue, doc="(DEPRECATED) Use ``property.propertyValue`` instead.") css-parser-1.0.4/src/css_parser/css/selector.py0000644000175000017500000010003113407702010021752 0ustar kovidkovid00000000000000"""Selector is a single Selector of a CSSStyleRule SelectorList. Partly implements http://www.w3.org/TR/css3-selectors/. TODO - .contains(selector) - .isSubselector(selector) """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['Selector'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from css_parser.helper import Deprecated from css_parser.util import _SimpleNamespaces import css_parser import xml.dom def as_list(p): if isinstance(p, list): return p class Selector(css_parser.util.Base2): """ (css_parser) a single selector in a :class:`~css_parser.css.SelectorList` of a :class:`~css_parser.css.CSSStyleRule`. Format:: # implemented in SelectorList selectors_group : selector [ COMMA S* selector ]* ; selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ; combinator /* combinators can be surrounded by white space */ : PLUS S* | GREATER S* | TILDE S* | S+ ; simple_selector_sequence : [ type_selector | universal ] [ HASH | class | attrib | pseudo | negation ]* | [ HASH | class | attrib | pseudo | negation ]+ ; type_selector : [ namespace_prefix ]? element_name ; namespace_prefix : [ IDENT | '*' ]? '|' ; element_name : IDENT ; universal : [ namespace_prefix ]? '*' ; class : '.' IDENT ; attrib : '[' S* [ namespace_prefix ]? IDENT S* [ [ PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | '=' | INCLUDES | DASHMATCH ] S* [ IDENT | STRING ] S* ]? ']' ; pseudo /* '::' starts a pseudo-element, ':' a pseudo-class */ /* Exceptions: :first-line, :first-letter, :before and :after. */ /* Note that pseudo-elements are restricted to one per selector and */ /* occur only in the last simple_selector_sequence. */ : ':' ':'? [ IDENT | functional_pseudo ] ; functional_pseudo : FUNCTION S* expression ')' ; expression /* In CSS3, the expressions are identifiers, strings, */ /* or of the form "an+b" */ : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ ; negation : NOT S* negation_arg S* ')' ; negation_arg : type_selector | universal | HASH | class | attrib | pseudo ; """ def __init__(self, selectorText=None, parent=None, readonly=False): """ :Parameters: selectorText initial value of this selector parent a SelectorList readonly default to False """ super(Selector, self).__init__() self.__namespaces = _SimpleNamespaces(log=self._log) self._element = None self._parent = parent self._specificity = (0, 0, 0, 0) if selectorText: self.selectorText = selectorText self._readonly = readonly def __repr__(self): if self.__getNamespaces(): st = (self.selectorText, self._getUsedNamespaces()) else: st = self.selectorText return "css_parser.css.%s(selectorText=%r)" % (self.__class__.__name__, st) def __str__(self): return "" % (self.__class__.__name__, self.selectorText, self.specificity, self._getUsedNamespaces(), id(self)) def _getUsedUris(self): "Return list of actually used URIs in this Selector." uris = set() for item in self.seq: type_, val = item.type, item.value if type_.endswith('-selector') or type_ == 'universal' and \ isinstance(val, tuple) and val[0] not in (None, '*'): uris.add(val[0]) return uris def _getUsedNamespaces(self): "Return actually used namespaces only." useduris = self._getUsedUris() namespaces = _SimpleNamespaces(log=self._log) for p, uri in as_list(self._namespaces.items()): if uri in useduris: namespaces[p] = uri return namespaces def __getNamespaces(self): "Use own namespaces if not attached to a sheet, else the sheet's ones." try: return self._parent.parentRule.parentStyleSheet.namespaces except AttributeError: return self.__namespaces _namespaces = property(__getNamespaces, doc="If this Selector is attached to a " "CSSStyleSheet the namespaces of that sheet " "are mirrored here. While the Selector (or " "parent SelectorList or parentRule(s) of that " "are not attached a own dict of {prefix: " "namespaceURI} is used.") element = property(lambda self: self._element, doc="Effective element target of this selector.") parent = property(lambda self: self._parent, doc="(DOM) The SelectorList that contains this Selector " "or None if this Selector is not attached to a " "SelectorList.") def _getSelectorText(self): """Return serialized format.""" return css_parser.ser.do_css_Selector(self) def _setSelectorText(self, selectorText): """ :param selectorText: parsable string or a tuple of (selectorText, dict-of-namespaces). Given namespaces are ignored if this object is attached to a CSSStyleSheet! :exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if the specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() # might be (selectorText, namespaces) selectorText, namespaces = self._splitNamespacesOff(selectorText) try: # uses parent stylesheets namespaces if available, # otherwise given ones namespaces = self.parent.parentRule.parentStyleSheet.namespaces except AttributeError: pass tokenizer = self._tokenize2(selectorText) if not tokenizer: self._log.error('Selector: No selectorText given.') else: # prepare tokenlist: # "*" -> type "universal" # "*"|IDENT + "|" -> combined to "namespace_prefix" # "|" -> type "namespace_prefix" # "." + IDENT -> combined to "class" # ":" + IDENT, ":" + FUNCTION -> pseudo-class # FUNCTION "not(" -> negation # "::" + IDENT, "::" + FUNCTION -> pseudo-element tokens = [] for t in tokenizer: typ, val, lin, col = t if val == ':' and tokens and\ self._tokenvalue(tokens[-1]) == ':': # combine ":" and ":" tokens[-1] = (typ, '::', lin, col) elif typ == 'IDENT' and tokens\ and self._tokenvalue(tokens[-1]) == '.': # class: combine to .IDENT tokens[-1] = ('class', '.'+val, lin, col) elif typ == 'IDENT' and tokens and \ self._tokenvalue(tokens[-1]).startswith(':') and\ not self._tokenvalue(tokens[-1]).endswith('('): # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" if self._tokenvalue(tokens[-1]).startswith('::'): t = 'pseudo-element' else: t = 'pseudo-class' tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col) elif typ == 'FUNCTION' and val == 'not(' and tokens and \ ':' == self._tokenvalue(tokens[-1]): tokens[-1] = ('negation', ':' + val, lin, tokens[-1][3]) elif typ == 'FUNCTION' and tokens\ and self._tokenvalue(tokens[-1]).startswith(':'): # pseudo-X: combine to :FUNCTION( or ::FUNCTION( if self._tokenvalue(tokens[-1]).startswith('::'): t = 'pseudo-element' else: t = 'pseudo-class' tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col) elif val == '*' and tokens and\ self._type(tokens[-1]) == 'namespace_prefix' and\ self._tokenvalue(tokens[-1]).endswith('|'): # combine prefix|* tokens[-1] = ('universal', self._tokenvalue(tokens[-1])+val, lin, col) elif val == '*': # universal: "*" tokens.append(('universal', val, lin, col)) elif val == '|' and tokens and\ self._type(tokens[-1]) in (self._prods.IDENT, 'universal')\ and self._tokenvalue(tokens[-1]).find('|') == -1: # namespace_prefix: "IDENT|" or "*|" tokens[-1] = ('namespace_prefix', self._tokenvalue(tokens[-1])+'|', lin, col) elif val == '|': # namespace_prefix: "|" tokens.append(('namespace_prefix', val, lin, col)) else: tokens.append(t) tokenizer = iter(tokens) # for closures: must be a mutable new = {'context': [''], # stack of: 'attrib', 'negation', 'pseudo' 'element': None, '_PREFIX': None, 'specificity': [0, 0, 0, 0], # mutable, finally a tuple! 'wellformed': True } # used for equality checks and setting of a space combinator S = ' ' def append(seq, val, typ=None, token=None): """ appends to seq namespace_prefix, IDENT will be combined to a tuple (prefix, name) where prefix might be None, the empty string or a prefix. Saved are also: - specificity definition: style, id, class/att, type - element: the element this Selector is for """ context = new['context'][-1] if token: line, col = token[2], token[3] else: line, col = None, None if typ == '_PREFIX': # SPECIAL TYPE: save prefix for combination with next new['_PREFIX'] = val[:-1] # handle next time return if new['_PREFIX'] is not None: # as saved from before and reset to None prefix, new['_PREFIX'] = new['_PREFIX'], None elif typ == 'universal' and '|' in val: # val == *|* or prefix|* prefix, val = val.split('|') else: prefix = None # namespace if (typ.endswith('-selector') or typ == 'universal') and not ( 'attribute-selector' == typ and not prefix): # att **IS NOT** in default ns if prefix == '*': # *|name: in ANY_NS namespaceURI = css_parser._ANYNS elif prefix is None: # e or *: default namespace with prefix u'' # or local-name() namespaceURI = namespaces.get('', None) elif prefix == '': # |name or |*: in no (or the empty) namespace namespaceURI = '' else: # explicit namespace prefix # does not raise KeyError, see _SimpleNamespaces namespaceURI = namespaces[prefix] if namespaceURI is None: new['wellformed'] = False self._log.error('Selector: No namespaceURI found ' 'for prefix %r' % prefix, token=token, error=xml.dom.NamespaceErr) return # val is now (namespaceprefix, name) tuple val = (namespaceURI, val) # specificity if not context or context == 'negation': if 'id' == typ: new['specificity'][1] += 1 elif 'class' == typ or '[' == val: new['specificity'][2] += 1 elif typ in ('type-selector', 'negation-type-selector', 'pseudo-element'): new['specificity'][3] += 1 if not context and typ in ('type-selector', 'universal'): # define element new['element'] = val seq.append(val, typ, line=line, col=col) # expected constants simple_selector_sequence = 'type_selector universal HASH class ' \ 'attrib pseudo negation ' simple_selector_sequence2 = 'HASH class attrib pseudo negation ' element_name = 'element_name' negation_arg = 'type_selector universal HASH class attrib pseudo' negationend = ')' attname = 'prefix attribute' attname2 = 'attribute' attcombinator = 'combinator ]' # optional attvalue = 'value' # optional attend = ']' expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' expression = expressionstart + ' )' combinator = ' combinator' def _COMMENT(expected, seq, token, tokenizer=None): "special implementation for comment token" append(seq, css_parser.css.CSSComment([token]), 'COMMENT', token=token) return expected def _S(expected, seq, token, tokenizer=None): # S context = new['context'][-1] if context.startswith('pseudo-'): if seq and seq[-1].value not in '+-': # e.g. x:func(a + b) append(seq, S, 'S', token=token) return expected elif context != 'attrib' and 'combinator' in expected: append(seq, S, 'descendant', token=token) return simple_selector_sequence + combinator else: return expected def _universal(expected, seq, token, tokenizer=None): # *|* or prefix|* context = new['context'][-1] val = self._tokenvalue(token) if 'universal' in expected: append(seq, val, 'universal', token=token) if 'negation' == context: return negationend else: return simple_selector_sequence2 + combinator else: new['wellformed'] = False self._log.error( 'Selector: Unexpected universal.', token=token) return expected def _namespace_prefix(expected, seq, token, tokenizer=None): # prefix| => element_name # or prefix| => attribute_name if attrib context = new['context'][-1] val = self._tokenvalue(token) if 'attrib' == context and 'prefix' in expected: # [PREFIX|att] append(seq, val, '_PREFIX', token=token) return attname2 elif 'type_selector' in expected: # PREFIX|* append(seq, val, '_PREFIX', token=token) return element_name else: new['wellformed'] = False self._log.error( 'Selector: Unexpected namespace prefix.', token=token) return expected def _pseudo(expected, seq, token, tokenizer=None): # pseudo-class or pseudo-element :a ::a :a( ::a( """ /* '::' starts a pseudo-element, ':' a pseudo-class */ /* Exceptions: :first-line, :first-letter, :before and :after. */ /* Note that pseudo-elements are restricted to one per selector and */ /* occur only in the last simple_selector_sequence. */ """ context = new['context'][-1] val, typ = self._tokenvalue(token, normalize=True),\ self._type(token) if 'pseudo' in expected: if val in (':first-line', ':first-letter', ':before', ':after'): # always pseudo-element ??? typ = 'pseudo-element' append(seq, val, typ, token=token) if val.endswith('('): # function # "pseudo-" "class" or "element" new['context'].append(typ) return expressionstart elif 'negation' == context: return negationend elif 'pseudo-element' == typ: # only one per element, check at ) also! return combinator else: return simple_selector_sequence2 + combinator else: new['wellformed'] = False self._log.error( 'Selector: Unexpected start of pseudo.', token=token) return expected def _expression(expected, seq, token, tokenizer=None): # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ context = new['context'][-1] val, typ = self._tokenvalue(token), self._type(token) if context.startswith('pseudo-'): append(seq, val, typ, token=token) return expression else: new['wellformed'] = False self._log.error( 'Selector: Unexpected %s.' % typ, token=token) return expected def _attcombinator(expected, seq, token, tokenizer=None): # context: attrib # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | # DASHMATCH context = new['context'][-1] val, typ = self._tokenvalue(token), self._type(token) if 'attrib' == context and 'combinator' in expected: # combinator in attrib append(seq, val, typ.lower(), token=token) return attvalue else: new['wellformed'] = False self._log.error( 'Selector: Unexpected %s.' % typ, token=token) return expected def _string(expected, seq, token, tokenizer=None): # identifier context = new['context'][-1] typ, val = self._type(token), self._stringtokenvalue(token) # context: attrib if 'attrib' == context and 'value' in expected: # attrib: [...=VALUE] append(seq, val, typ, token=token) return attend # context: pseudo elif context.startswith('pseudo-'): # :func(...) append(seq, val, typ, token=token) return expression else: new['wellformed'] = False self._log.error( 'Selector: Unexpected STRING.', token=token) return expected def _ident(expected, seq, token, tokenizer=None): # identifier context = new['context'][-1] val, typ = self._tokenvalue(token), self._type(token) # context: attrib if 'attrib' == context and 'attribute' in expected: # attrib: [...|ATT...] append(seq, val, 'attribute-selector', token=token) return attcombinator elif 'attrib' == context and 'value' in expected: # attrib: [...=VALUE] append(seq, val, 'attribute-value', token=token) return attend # context: negation elif 'negation' == context: # negation: (prefix|IDENT) append(seq, val, 'negation-type-selector', token=token) return negationend # context: pseudo elif context.startswith('pseudo-'): # :func(...) append(seq, val, typ, token=token) return expression elif 'type_selector' in expected or element_name == expected: # element name after ns or complete type_selector append(seq, val, 'type-selector', token=token) return simple_selector_sequence2 + combinator else: new['wellformed'] = False self._log.error('Selector: Unexpected IDENT.', token=token) return expected def _class(expected, seq, token, tokenizer=None): # .IDENT context = new['context'][-1] val = self._tokenvalue(token) if 'class' in expected: append(seq, val, 'class', token=token) if 'negation' == context: return negationend else: return simple_selector_sequence2 + combinator else: new['wellformed'] = False self._log.error('Selector: Unexpected class.', token=token) return expected def _hash(expected, seq, token, tokenizer=None): # #IDENT context = new['context'][-1] val = self._tokenvalue(token) if 'HASH' in expected: append(seq, val, 'id', token=token) if 'negation' == context: return negationend else: return simple_selector_sequence2 + combinator else: new['wellformed'] = False self._log.error('Selector: Unexpected HASH.', token=token) return expected def _char(expected, seq, token, tokenizer=None): # + > ~ ) [ ] + - context = new['context'][-1] val = self._tokenvalue(token) # context: attrib if ']' == val and 'attrib' == context and ']' in expected: # end of attrib append(seq, val, 'attribute-end', token=token) context = new['context'].pop() # attrib is done context = new['context'][-1] if 'negation' == context: return negationend else: return simple_selector_sequence2 + combinator elif '=' == val and 'attrib' == context\ and 'combinator' in expected: # combinator in attrib append(seq, val, 'equals', token=token) return attvalue # context: negation elif ')' == val and 'negation' == context and ')' in expected: # not(negation_arg)" append(seq, val, 'negation-end', token=token) new['context'].pop() # negation is done context = new['context'][-1] return simple_selector_sequence + combinator # context: pseudo (at least one expression) elif val in '+-' and context.startswith('pseudo-'): # :func(+ -)" _names = {'+': 'plus', '-': 'minus'} if val == '+' and seq and seq[-1].value == S: seq.replace(-1, val, _names[val]) else: append(seq, val, _names[val], token=token) return expression elif ')' == val and context.startswith('pseudo-') and\ expression == expected: # :func(expression)" append(seq, val, 'function-end', token=token) new['context'].pop() # pseudo is done if 'pseudo-element' == context: return combinator else: return simple_selector_sequence + combinator # context: ROOT elif '[' == val and 'attrib' in expected: # start of [attrib] append(seq, val, 'attribute-start', token=token) new['context'].append('attrib') return attname elif val in '+>~' and 'combinator' in expected: # no other combinator except S may be following _names = { '>': 'child', '+': 'adjacent-sibling', '~': 'following-sibling'} if seq and seq[-1].value == S: seq.replace(-1, val, _names[val]) else: append(seq, val, _names[val], token=token) return simple_selector_sequence elif ',' == val: # not a selectorlist new['wellformed'] = False self._log.error( 'Selector: Single selector only.', error=xml.dom.InvalidModificationErr, token=token) return expected else: new['wellformed'] = False self._log.error( 'Selector: Unexpected CHAR.', token=token) return expected def _negation(expected, seq, token, tokenizer=None): # not( val = self._tokenvalue(token, normalize=True) if 'negation' in expected: new['context'].append('negation') append(seq, val, 'negation-start', token=token) return negation_arg else: new['wellformed'] = False self._log.error( 'Selector: Unexpected negation.', token=token) return expected def _atkeyword(expected, seq, token, tokenizer=None): "invalidates selector" new['wellformed'] = False self._log.error( 'Selector: Unexpected ATKEYWORD.', token=token) return expected # expected: only|not or mediatype, mediatype, feature, and newseq = self._tempSeq() wellformed, expected = self._parse( expected=simple_selector_sequence, seq=newseq, tokenizer=tokenizer, productions={'CHAR': _char, 'class': _class, 'HASH': _hash, 'STRING': _string, 'IDENT': _ident, 'namespace_prefix': _namespace_prefix, 'negation': _negation, 'pseudo-class': _pseudo, 'pseudo-element': _pseudo, 'universal': _universal, # pseudo 'NUMBER': _expression, 'DIMENSION': _expression, # attribute 'PREFIXMATCH': _attcombinator, 'SUFFIXMATCH': _attcombinator, 'SUBSTRINGMATCH': _attcombinator, 'DASHMATCH': _attcombinator, 'INCLUDES': _attcombinator, 'S': _S, 'COMMENT': _COMMENT, 'ATKEYWORD': _atkeyword}) wellformed = wellformed and new['wellformed'] # post condition if len(new['context']) > 1 or not newseq: wellformed = False self._log.error('Selector: Invalid or incomplete selector: %s' % self._valuestr(selectorText)) if expected == 'element_name': wellformed = False self._log.error('Selector: No element name found: %s' % self._valuestr(selectorText)) if expected == simple_selector_sequence and newseq: wellformed = False self._log.error('Selector: Cannot end with combinator: %s' % self._valuestr(selectorText)) if newseq and hasattr(newseq[-1].value, 'strip') \ and newseq[-1].value.strip() == '': del newseq[-1] # set if wellformed: self.__namespaces = namespaces self._element = new['element'] self._specificity = tuple(new['specificity']) self._setSeq(newseq) # filter that only used ones are kept self.__namespaces = self._getUsedNamespaces() selectorText = property(_getSelectorText, _setSelectorText, doc="(DOM) The parsable textual representation of " "the selector.") specificity = property(lambda self: self._specificity, doc="""Specificity of this selector (READONLY). Tuple of (a, b, c, d) where: a presence of style in document, always 0 if not used on a document b number of ID selectors c number of .class selectors d number of Element (type) selectors""") wellformed = property(lambda self: bool(len(self.seq))) @Deprecated('Use property parent instead') def _getParentList(self): return self.parent parentList = property(_getParentList, doc="DEPRECATED, see property parent instead") css-parser-1.0.4/src/css_parser/css/selectorlist.py0000644000175000017500000002101413407702010022651 0ustar kovidkovid00000000000000"""SelectorList is a list of CSS Selector objects. TODO - remove duplicate Selectors. -> CSSOM canonicalize - ??? CSS2 gives a special meaning to the comma (,) in selectors. However, since it is not known if the comma may acquire other meanings in future versions of CSS, the whole statement should be ignored if there is an error anywhere in the selector, even though the rest of the selector may look reasonable in CSS2. Illegal example(s): For example, since the "&" is not a valid token in a CSS2 selector, a CSS2 user agent must ignore the whole second line, and not set the color of H3 to red: """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['SelectorList'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from .selector import Selector import css_parser class SelectorList(css_parser.util.Base, css_parser.util.ListSeq): """A list of :class:`~css_parser.css.Selector` objects of a :class:`~css_parser.css.CSSStyleRule`.""" def __init__(self, selectorText=None, parentRule=None, readonly=False): """ :Parameters: selectorText parsable list of Selectors parentRule the parent CSSRule if available """ super(SelectorList, self).__init__() self._parentRule = parentRule if selectorText: self.selectorText = selectorText self._readonly = readonly def __repr__(self): if self._namespaces: st = (self.selectorText, self._namespaces) else: st = self.selectorText return "css_parser.css.%s(selectorText=%r)" % (self.__class__.__name__, st) def __str__(self): return "" % (self.__class__.__name__, self.selectorText, self._namespaces, id(self)) def __setitem__(self, index, newSelector): """Overwrite ListSeq.__setitem__ Any duplicate Selectors are **not** removed. """ newSelector = self.__prepareset(newSelector) if newSelector: self.seq[index] = newSelector def __prepareset(self, newSelector, namespaces=None): "Used by appendSelector and __setitem__" if not namespaces: namespaces = {} self._checkReadonly() if not isinstance(newSelector, Selector): newSelector = Selector((newSelector, namespaces), parent=self) if newSelector.wellformed: newSelector._parent = self # maybe set twice but must be! return newSelector def __getNamespaces(self): """Use children namespaces if not attached to a sheet, else the sheet's ones. """ try: return self.parentRule.parentStyleSheet.namespaces except AttributeError: namespaces = {} for selector in self.seq: namespaces.update(selector._namespaces) return namespaces def _getUsedUris(self): "Used by CSSStyleSheet to check if @namespace rules are needed" uris = set() for s in self: uris.update(s._getUsedUris()) return uris _namespaces = property(__getNamespaces, doc="""If this SelectorList is attached to a CSSStyleSheet the namespaces of that sheet are mirrored here. While the SelectorList (or parentRule(s) are not attached the namespaces of all children Selectors are used.""") def append(self, newSelector): "Same as :meth:`appendSelector`." self.appendSelector(newSelector) def appendSelector(self, newSelector): """ Append `newSelector` to this list (a string will be converted to a :class:`~css_parser.css.Selector`). :param newSelector: comma-separated list of selectors (as a single string) or a tuple of `(newSelector, dict-of-namespaces)` :returns: New :class:`~css_parser.css.Selector` or ``None`` if `newSelector` is not wellformed. :exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if the specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() # might be (selectorText, namespaces) newSelector, namespaces = self._splitNamespacesOff(newSelector) try: # use parent's only if available namespaces = self.parentRule.parentStyleSheet.namespaces except AttributeError: # use already present namespaces plus new given ones _namespaces = self._namespaces _namespaces.update(namespaces) namespaces = _namespaces newSelector = self.__prepareset(newSelector, namespaces) if newSelector: seq = self.seq[:] del self.seq[:] for s in seq: if s.selectorText != newSelector.selectorText: self.seq.append(s) self.seq.append(newSelector) return newSelector def _getSelectorText(self): "Return serialized format." return css_parser.ser.do_css_SelectorList(self) def _setSelectorText(self, selectorText): """ :param selectorText: comma-separated list of selectors or a tuple of (selectorText, dict-of-namespaces) :exceptions: - :exc:`~xml.dom.NamespaceErr`: Raised if the specified selector uses an unknown namespace prefix. - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error and is unparsable. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this rule is readonly. """ self._checkReadonly() # might be (selectorText, namespaces) selectorText, namespaces = self._splitNamespacesOff(selectorText) try: # use parent's only if available namespaces = self.parentRule.parentStyleSheet.namespaces except AttributeError: pass wellformed = True tokenizer = self._tokenize2(selectorText) newseq = [] expected = True while True: # find all upto and including next ",", EOF or nothing selectortokens = self._tokensupto2(tokenizer, listseponly=True) if selectortokens: if self._tokenvalue(selectortokens[-1]) == ',': expected = selectortokens.pop() else: expected = None selector = Selector((selectortokens, namespaces), parent=self) if selector.wellformed: newseq.append(selector) else: wellformed = False self._log.error('SelectorList: Invalid Selector: %s' % self._valuestr(selectortokens)) else: break # post condition if ',' == expected: wellformed = False self._log.error('SelectorList: Cannot end with ",": %r' % self._valuestr(selectorText)) elif expected: wellformed = False self._log.error('SelectorList: Unknown Syntax: %r' % self._valuestr(selectorText)) if wellformed: self.seq = newseq selectorText = property(_getSelectorText, _setSelectorText, doc="(css_parser) The textual representation of the " "selector for a rule set.") length = property(lambda self: len(self), doc="The number of :class:`~css_parser.css.Selector` " "objects in the list.") parentRule = property(lambda self: self._parentRule, doc="(DOM) The CSS rule that contains this " "SelectorList or ``None`` if this SelectorList " "is not attached to a CSSRule.") wellformed = property(lambda self: bool(len(self.seq))) css-parser-1.0.4/src/css_parser/css/value.py0000644000175000017500000011350113413036502021257 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function import re import colorsys from css_parser.helper import normalize, pushtoken import css_parser from css_parser.prodparser import Choice, PreDef, Sequence, ProdParser, Prod """Value related classes. DOM Level 2 CSS CSSValue, CSSPrimitiveValue and CSSValueList are **no longer** supported and are replaced by these new classes. """ __all__ = ['PropertyValue', 'Value', 'ColorValue', 'DimensionValue', 'URIValue', 'CSSFunction', 'CSSCalc', 'CSSVariable', 'MSValue' ] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: text_type = str from urllib.parse import urljoin as urllib_urljoin else: text_type = unicode from urlparse import urljoin as urllib_urljoin def as_list(p): if isinstance(p, list): return p return list(p) class PropertyValue(css_parser.util._NewBase): """ An unstructured list like holder for all values defined for a :class:`~css_parser.css.Property`. Contains :class:`~css_parser.css.Value` or subclass objects. Currently there is no access to the combinators of the defined values which might simply be space or comma or slash. You may: - iterate over all contained Value objects (not the separators like ``,``, ``/`` or `` `` though!) - get a Value item by index or use ``PropertyValue[index]`` - find out the number of values defined (unstructured) """ def __init__(self, cssText=None, parent=None, readonly=False): """ :param cssText: the parsable cssText of the value :param readonly: defaults to False """ super(PropertyValue, self).__init__() self.parent = parent self.wellformed = False if cssText is not None: # may be 0 if isinstance(cssText, (int, float)): cssText = text_type(cssText) # if it is a number self.cssText = cssText self._readonly = readonly def __len__(self): return len(as_list(self.__items())) def __getitem__(self, index): try: return as_list(self.__items())[index] except IndexError: return None def __iter__(self): "Generator which iterates over values." for item in self.__items(): yield item def __repr__(self): return "css_parser.css.%s(%r)" % (self.__class__.__name__, self.cssText) def __str__(self): return "" % (self.__class__.__name__, self.length, self.cssText, id(self)) def __items(self, seq=None): "a generator of Value obects only, no , / or ' '" if seq is None: seq = self.seq return (x.value for x in seq if isinstance(x.value, Value)) def _setCssText(self, cssText): if isinstance(cssText, (int, float)): cssText = text_type(cssText) # if it is a number """ Format:: unary_operator : '-' | '+' ; operator : '/' S* | ',' S* | /* empty */ ; expr : term [ operator term ]* ; term : unary_operator? [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | TIME S* | FREQ S* ] | STRING S* | IDENT S* | URI S* | hexcolor | function | UNICODE-RANGE S* ; function : FUNCTION S* expr ')' S* ; /* * There is a constraint on the color that it must * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) * after the "#"; e.g., "#000" is OK, but "#abcd" is not. */ hexcolor : HASH S* ; :exceptions: - :exc:`~xml.dom.SyntaxErr`: Raised if the specified CSS string value has a syntax error (according to the attached property) or is unparsable. - :exc:`~xml.dom.InvalidModificationErr`: TODO: Raised if the specified CSS string value represents a different type of values than the values allowed by the CSS property. - :exc:`~xml.dom.NoModificationAllowedErr`: Raised if this value is readonly. """ self._checkReadonly() # used as operator is , / or S nextSor = ',/' term = Choice(_ColorProd(self, nextSor), _DimensionProd(self, nextSor), _URIProd(self, nextSor), _ValueProd(self, nextSor), # _Rect(self, nextSor), # all other functions _CSSVariableProd(self, nextSor), _MSValueProd(self, nextSor), _CalcValueProd(self, nextSor), _CSSFunctionProd(self, nextSor) ) operator = Choice(PreDef.S(toSeq=False), PreDef.char('comma', ',', toSeq=lambda t, tokens: ('operator', t[1]), optional=True ), PreDef.char('slash', '/', toSeq=lambda t, tokens: ('operator', t[1]), optional=True), optional=True) prods = Sequence(term, Sequence( # mayEnd this Sequence if whitespace operator, # TODO: only when setting via other class # used by variabledeclaration currently PreDef.char('END', ';', stopAndKeep=True, optional=True), # TODO: } and !important ends too! term, minmax=lambda: (0, None))) # parse ok, seq, store, unused = ProdParser().parse(cssText, 'PropertyValue', prods) # must be at least one value! ok = ok and len(as_list(self.__items(seq))) > 0 for item in seq: if hasattr(item.value, 'wellformed') and not item.value.wellformed: ok = False break self.wellformed = ok if ok: self._setSeq(seq) else: self._log.error('PropertyValue: Unknown syntax or no value: %s' % self._valuestr(cssText)) cssText = property(lambda self: css_parser.ser.do_css_PropertyValue(self), _setCssText, doc="A string representation of the current value.") def item(self, index): """ The value at position `index`. Alternatively simple use ``PropertyValue[index]``. :param index: the parsable cssText of the value :exceptions: - :exc:`~IndexError`: Raised if index if out of bounds """ return self[index] length = property(lambda self: len(self), doc="Number of values set.") value = property(lambda self: css_parser.ser.do_css_PropertyValue(self, valuesOnly=True), doc="A string representation of the current value " "without any comments used for validation.") class Value(css_parser.util._NewBase): """ Represents a single CSS value. For now simple values of IDENT, STRING, or UNICODE-RANGE values are represented directly as Value objects. Other values like e.g. FUNCTIONs are represented by subclasses with an extended API. """ IDENT = 'IDENT' STRING = 'STRING' UNICODE_RANGE = 'UNICODE-RANGE' URI = 'URI' DIMENSION = 'DIMENSION' NUMBER = 'NUMBER' PERCENTAGE = 'PERCENTAGE' COLOR_VALUE = 'COLOR_VALUE' HASH = 'HASH' FUNCTION = 'FUNCTION' CALC = 'CALC' VARIABLE = 'VARIABLE' _type = None _value = '' def __init__(self, cssText=None, parent=None, readonly=False): super(Value, self).__init__() self.parent = parent self.wellformed = False if cssText: self.cssText = cssText def __repr__(self): return "css_parser.css.%s(%r)" % (self.__class__.__name__, self.cssText) def __str__(self): return ""\ % (self.__class__.__name__, self.type, self.value, self.cssText, id(self)) def _setCssText(self, cssText): self._checkReadonly() prods = Choice(PreDef.hexcolor(stop=True), PreDef.ident(stop=True), PreDef.string(stop=True), PreDef.unicode_range(stop=True), ) ok, seq, store, unused = ProdParser().parse(cssText, 'Value', prods) self.wellformed = ok if ok: # only 1 value anyway! self._type = seq[0].type self._value = seq[0].value self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_Value(self), _setCssText, doc='String value of this value.') type = property(lambda self: self._type, # _setType, doc="Type of this value, for now the production type " "like e.g. `DIMENSION` or `STRING`. All types are " "defined as constants in :class:`~css_parser.css.Value`.") def _setValue(self, value): # TODO: check! self._value = value value = property(lambda self: self._value, _setValue, doc="Actual value if possible: An int or float or else " " a string") class ColorValue(Value): """ A color value like rgb(), rgba(), hsl(), hsla() or #rgb, #rrggbb TODO: Color Keywords """ from .colors import COLORS type = Value.COLOR_VALUE # hexcolor, FUNCTION? _colorType = None _red = 0 _green = 0 _blue = 0 _alpha = 0 def __str__(self): return ""\ % (self.__class__.__name__, self.type, self.value, self.colorType, self.red, self.green, self.blue, self.alpha, id(self)) def _setCssText(self, cssText): self._checkReadonly() types = self._prods # rename! component = Choice(PreDef.unary(toSeq=lambda t, tokens: (t[0], DimensionValue(pushtoken(t, tokens), parent=self) )), PreDef.number(toSeq=lambda t, tokens: (t[0], DimensionValue(pushtoken(t, tokens), parent=self) )), PreDef.percentage(toSeq=lambda t, tokens: (t[0], DimensionValue(pushtoken(t, tokens), parent=self) )) ) noalp = Sequence(Prod(name='FUNCTION', match=lambda t, v: t == types.FUNCTION and v.lower() in ('rgb(', 'hsl('), toSeq=lambda t, tokens: (t[0], normalize(t[1]))), component, Sequence(PreDef.comma(optional=True), component, minmax=lambda: (2, 2) ), PreDef.funcEnd(stop=True) ) witha = Sequence(Prod(name='FUNCTION', match=lambda t, v: t == types.FUNCTION and v.lower() in ('rgba(', 'hsla('), toSeq=lambda t, tokens: (t[0], normalize(t[1])) ), component, Sequence(PreDef.comma(optional=True), component, minmax=lambda: (3, 3) ), PreDef.funcEnd(stop=True) ) namedcolor = Prod(name='Named Color', match=lambda t, v: t == 'IDENT' and ( normalize(v) in as_list(self.COLORS.keys()) ), stop=True) prods = Choice(PreDef.hexcolor(stop=True), namedcolor, noalp, witha) ok, seq, store, unused = ProdParser().parse(cssText, self.type, prods) self.wellformed = ok if ok: t, v = seq[0].type, seq[0].value if 'IDENT' == t: rgba = self.COLORS[normalize(v)] if 'HASH' == t: if len(v) == 4: # HASH #rgb rgba = (int(2*v[1], 16), int(2*v[2], 16), int(2*v[3], 16), 1.0) else: # HASH #rrggbb rgba = (int(v[1:3], 16), int(v[3:5], 16), int(v[5:7], 16), 1.0) elif 'FUNCTION' == t: functiontype, raw, check = None, [], '' HSL = False for item in seq: try: type_ = item.value.type except AttributeError: # type of function, e.g. rgb( if item.type == 'FUNCTION': functiontype = item.value HSL = functiontype in ('hsl(', 'hsla(') continue # save components if type_ == Value.NUMBER: raw.append(item.value.value) check += 'N' elif type_ == Value.PERCENTAGE: if HSL: # save as percentage fraction raw.append(item.value.value / 100.0) else: # save as real value of percentage of 255 raw.append(int(255 * item.value.value / 100)) check += 'P' if HSL: # convert to rgb # h is 360 based (circle) h, s, l_ = raw[0] / 360.0, raw[1], raw[2] # ORDER h l_ s !!! r, g, b = colorsys.hls_to_rgb(h, l_, s) # back to 255 based rgba = [int(round(r*255)), int(round(g*255)), int(round(b*255))] if len(raw) > 3: rgba.append(raw[3]) else: # rgb, rgba rgba = raw if len(rgba) < 4: rgba.append(1.0) # validate checks = {'rgb(': ('NNN', 'PPP'), 'rgba(': ('NNNN', 'PPPN'), 'hsl(': ('NPP',), 'hsla(': ('NPPN',) } if check not in checks[functiontype]: self._log.error('ColorValue has invalid %s) parameters: ' '%s (N=Number, P=Percentage)' % (functiontype, check)) self._colorType = t self._red, self._green, self._blue, self._alpha = tuple(rgba) self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_ColorValue(self), _setCssText, doc="String value of this value.") value = property(lambda self: css_parser.ser.do_css_CSSFunction(self, True), doc='Same as cssText but without comments.') type = property(lambda self: Value.COLOR_VALUE, doc="Type is fixed to Value.COLOR_VALUE.") def _getName(self): for n, v in as_list(self.COLORS.items()): if v == (self.red, self.green, self.blue, self.alpha): return n colorType = property(lambda self: self._colorType, doc="IDENT (red), HASH (#f00) or FUNCTION (rgb(255, 0, 0).") name = property(_getName, doc='Name of the color if known (in ColorValue.COLORS) ' 'else None') red = property(lambda self: self._red, doc='red part as integer between 0 and 255') green = property(lambda self: self._green, doc='green part as integer between 0 and 255') blue = property(lambda self: self._blue, doc='blue part as integer between 0 and 255') alpha = property(lambda self: self._alpha, doc='alpha part as float between 0.0 and 1.0') class DimensionValue(Value): """ A numerical value with an optional dimension like e.g. "px" or "%". Covers DIMENSION, PERCENTAGE or NUMBER values. """ __reUnNumDim = re.compile(r'^([+-]?)(\d*\.\d+|\d+)(.*)$', re.I | re.U | re.X) _dimension = None _sign = None def __str__(self): return ""\ % (self.__class__.__name__, self.type, self.value, self.dimension, self.cssText, id(self)) def _setCssText(self, cssText): self._checkReadonly() prods = Sequence( # PreDef.unary(), Choice(PreDef.dimension(stop=True), PreDef.number(stop=True), PreDef.percentage(stop=True) ) ) ok, seq, store, unused = ProdParser().parse(cssText, 'DimensionValue', prods) self.wellformed = ok if ok: item = seq[0] sign, v, d = self.__reUnNumDim.findall( normalize(item.value))[0] if '.' in v: val = float(sign + v) else: val = int(sign + v) dim = None if d: dim = d self._sign = sign self._value = val self._dimension = dim self._type = item.type self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_Value(self), _setCssText, doc="String value of this value including dimension.") dimension = property(lambda self: self._dimension, # _setValue, doc="Dimension if a DIMENSION or PERCENTAGE value, " "else None") class URIValue(Value): """ An URI value like ``url(example.png)``. """ _type = Value.URI _uri = Value._value def __str__(self): return ""\ % (self.__class__.__name__, self.type, self.value, self.uri, self.cssText, id(self)) def _setCssText(self, cssText): self._checkReadonly() prods = Sequence(PreDef.uri(stop=True)) ok, seq, store, unused = ProdParser().parse(cssText, 'URIValue', prods) self.wellformed = ok if ok: # only 1 value only anyway self._type = seq[0].type self._value = seq[0].value self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_Value(self), _setCssText, doc='String value of this value.') def _setUri(self, uri): # TODO: check? self._value = uri uri = property(lambda self: self._value, _setUri, doc="Actual URL without delimiters or the empty string") def absoluteUri(self): """Actual URL, made absolute if possible, else same as `uri`.""" # Ancestry: PropertyValue, Property, CSSStyleDeclaration, CSSStyleRule, # CSSStyleSheet try: # TODO: better way? styleSheet = self.parent.parent.parent.parentRule.parentStyleSheet except AttributeError: return self.uri else: return urllib_urljoin(styleSheet.href, self.uri) absoluteUri = property(absoluteUri, doc=absoluteUri.__doc__) class CSSFunction(Value): """ A function value. """ _functionName = 'Function' def _productions(self): """Return definition used for parsing.""" types = self._prods # rename! itemProd = Choice(_ColorProd(self), _DimensionProd(self), _URIProd(self), _ValueProd(self), _CalcValueProd(self), _CSSVariableProd(self), _CSSFunctionProd(self) ) funcProds = Sequence(Prod(name='FUNCTION', match=lambda t, v: t == types.FUNCTION, toSeq=lambda t, tokens: (t[0], normalize(t[1]))), Choice(Sequence(itemProd, Sequence(PreDef.comma(optional=True), itemProd, minmax=lambda: (0, None)), PreDef.funcEnd(stop=True)), PreDef.funcEnd(stop=True)) ) return funcProds def _setCssText(self, cssText): self._checkReadonly() ok, seq, store, unused = ProdParser().parse(cssText, self.type, self._productions()) self.wellformed = ok if ok: self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_CSSFunction(self), _setCssText, doc="String value of this value.") value = property(lambda self: css_parser.ser.do_css_CSSFunction(self, True), doc='Same as cssText but without comments.') type = property(lambda self: Value.FUNCTION, doc="Type is fixed to Value.FUNCTION.") class MSValue(CSSFunction): """An IE specific Microsoft only function value which is much looser in what is syntactically allowed.""" _functionName = 'MSValue' def _productions(self): """Return definition used for parsing.""" types = self._prods # rename! func = Prod(name='MSValue-Sub', match=lambda t, v: t == self._prods.FUNCTION, toSeq=lambda t, tokens: (MSValue._functionName, MSValue(pushtoken(t, tokens ), parent=self ) ) ) funcProds = Sequence(Prod(name='FUNCTION', match=lambda t, v: t == types.FUNCTION, toSeq=lambda t, tokens: (t[0], t[1]) ), Sequence(Choice(_ColorProd(self), _DimensionProd(self), _URIProd(self), _ValueProd(self), _MSValueProd(self), # _CalcValueProd(self), _CSSVariableProd(self), func, # _CSSFunctionProd(self), Prod(name='MSValuePart', match=lambda t, v: v != ')', toSeq=lambda t, tokens: (t[0], t[1]) ) ), minmax=lambda: (0, None) ), PreDef.funcEnd(stop=True) ) return funcProds def _setCssText(self, cssText): super(MSValue, self)._setCssText(cssText) cssText = property(lambda self: css_parser.ser.do_css_MSValue(self), _setCssText, doc="String value of this value.") class CSSCalc(CSSFunction): """The CSSCalc function represents a CSS calc() function. No further API is provided. For multiplication and division no check if one operand is a NUMBER is made. """ _functionName = 'CSSCalc' def __str__(self): return "" % ( self.__class__.__name__, id(self)) def _setCssText(self, cssText): self._checkReadonly() types = self._prods # rename! _operator = Choice(Prod(name='Operator */', match=lambda t, v: v in '*/', toSeq=lambda t, tokens: (t[0], t[1]) ), Sequence( PreDef.S(), Choice( Sequence( Prod(name='Operator */', match=lambda t, v: v in '*/', toSeq=lambda t, tokens: (t[0], t[1]) ), PreDef.S(optional=True) ), Sequence( Prod(name='Operator +-', match=lambda t, v: v in '+-', toSeq=lambda t, tokens: (t[0], t[1]) ), PreDef.S() ), PreDef.funcEnd(stop=True, mayEnd=True) ) ) ) def _operant(): return Choice(_DimensionProd(self), _CSSVariableProd(self)) prods = Sequence(Prod(name='CALC', match=lambda t, v: t == types.FUNCTION and normalize(v) == 'calc(' ), PreDef.S(optional=True), _operant(), Sequence(_operator, _operant(), minmax=lambda: (0, None) ), PreDef.funcEnd(stop=True) ) # store: name of variable ok, seq, store, unused = ProdParser().parse(cssText, 'CSSCalc', prods, checkS=True) self.wellformed = ok if ok: self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_CSSCalc(self), _setCssText, doc="String representation of calc function.") type = property(lambda self: Value.CALC, doc="Type is fixed to Value.CALC.") class CSSVariable(CSSFunction): """The CSSVariable represents a CSS variables like ``var(varname)``. A variable has a (nonnormalized!) `name` and a `value` which is tried to be resolved from any available CSSVariablesRule definition. """ _functionName = 'CSSVariable' _name = None _fallback = None def __str__(self): return "" % ( self.__class__.__name__, self.name, self.value, id(self)) def _setCssText(self, cssText): self._checkReadonly() types = self._prods # rename! prods = Sequence(Prod(name='var', match=lambda t, v: t == types.FUNCTION and normalize(v) == 'var(' ), PreDef.ident(toStore='ident'), Sequence(PreDef.comma(), Choice(_ColorProd(self, toStore='fallback'), _DimensionProd(self, toStore='fallback'), _URIProd(self, toStore='fallback'), _ValueProd(self, toStore='fallback'), _CalcValueProd(self, toStore='fallback'), _CSSVariableProd(self, toStore='fallback'), _CSSFunctionProd(self, toStore='fallback') ), minmax=lambda: (0, 1) ), PreDef.funcEnd(stop=True)) # store: name of variable store = {'ident': None, 'fallback': None} ok, seq, store, unused = ProdParser().parse(cssText, 'CSSVariable', prods) self.wellformed = ok if ok: self._name = store['ident'].value try: self._fallback = store['fallback'].value except KeyError: self._fallback = None self._setSeq(seq) cssText = property(lambda self: css_parser.ser.do_css_CSSVariable(self), _setCssText, doc="String representation of variable.") # TODO: writable? check if var (value) available? name = property(lambda self: self._name, doc="The name identifier of this variable referring to " "a value in a " ":class:`css_parser.css.CSSVariablesDeclaration`.") fallback = property(lambda self: self._fallback, doc="The fallback Value of this variable") type = property(lambda self: Value.VARIABLE, doc="Type is fixed to Value.VARIABLE.") def _getValue(self): "Find contained sheet and @variables there" rel = self while True: # find node which has parentRule to get to StyleSheet if hasattr(rel, 'parent'): rel = rel.parent else: break try: variables = rel.parentRule.parentStyleSheet.variables except AttributeError: return None else: try: return variables[self.name] except KeyError: return None value = property(_getValue, doc='The resolved actual value or None.') # helper for productions def _ValueProd(parent, nextSor=False, toStore=None): return Prod(name='Value', match=lambda t, v: t in ('IDENT', 'STRING', 'UNICODE-RANGE'), nextSor=nextSor, toStore=toStore, toSeq=lambda t, tokens: ('Value', Value(pushtoken(t, tokens), parent=parent) ) ) def _DimensionProd(parent, nextSor=False, toStore=None): return Prod(name='Dimension', match=lambda t, v: t in ('DIMENSION', 'NUMBER', 'PERCENTAGE'), nextSor=nextSor, toStore=toStore, toSeq=lambda t, tokens: ('DIMENSION', DimensionValue( pushtoken(t, tokens), parent=parent) ) ) def _URIProd(parent, nextSor=False, toStore=None): return Prod(name='URIValue', match=lambda t, v: t == 'URI', toStore=toStore, nextSor=nextSor, toSeq=lambda t, tokens: ('URIValue', URIValue( pushtoken(t, tokens), parent=parent) ) ) reHexcolor = re.compile(r'^\#(?:[0-9abcdefABCDEF]{3}|[0-9abcdefABCDEF]{6})$') def _ColorProd(parent, nextSor=False, toStore=None): return Prod(name='ColorValue', match=lambda t, v: (t == 'HASH' and reHexcolor.match(v) ) or (t == 'FUNCTION' and normalize(v) in ('rgb(', 'rgba(', 'hsl(', 'hsla(') ) or (t == 'IDENT' and normalize(v) in as_list(ColorValue.COLORS.keys()) ), nextSor=nextSor, toStore=toStore, toSeq=lambda t, tokens: ('ColorValue', ColorValue( pushtoken(t, tokens), parent=parent) ) ) def _CSSFunctionProd(parent, nextSor=False, toStore=None): return PreDef.function(nextSor=nextSor, toStore=toStore, toSeq=lambda t, tokens: (CSSFunction._functionName, CSSFunction( pushtoken(t, tokens), parent=parent) ) ) def _CalcValueProd(parent, nextSor=False, toStore=None): return Prod(name=CSSCalc._functionName, match=lambda t, v: t == PreDef.types.FUNCTION and normalize(v) == 'calc(', toStore=toStore, toSeq=lambda t, tokens: (CSSCalc._functionName, CSSCalc( pushtoken(t, tokens), parent=parent) ), nextSor=nextSor) def _CSSVariableProd(parent, nextSor=False, toStore=None): return PreDef.variable(nextSor=nextSor, toStore=toStore, toSeq=lambda t, tokens: (CSSVariable._functionName, CSSVariable( pushtoken(t, tokens), parent=parent) ) ) def _MSValueProd(parent, nextSor=False): return Prod(name=MSValue._functionName, match=lambda t, v: ( # t == self._prods.FUNCTION and ( normalize(v) in ('expression(', 'alpha(', 'blur(', 'chroma(', 'dropshadow(', 'fliph(', 'flipv(', 'glow(', 'gray(', 'invert(', 'mask(', 'shadow(', 'wave(', 'xray(') or v.startswith('progid:DXImageTransform.Microsoft.') ), nextSor=nextSor, toSeq=lambda t, tokens: (MSValue._functionName, MSValue(pushtoken(t, tokens ), parent=parent ) ) ) def MediaQueryValueProd(parent): return Choice(_ColorProd(parent), _DimensionProd(parent), _ValueProd(parent), PreDef.ratio(), ) css-parser-1.0.4/src/css_parser/cssproductions.py0000644000175000017500000001147113407702010022435 0ustar kovidkovid00000000000000"""productions for css_parser based on a mix of CSS 2.1 and CSS 3 Syntax productions - http://www.w3.org/TR/css3-syntax - http://www.w3.org/TR/css3-syntax/#grammar0 open issues - numbers contain "-" if present - HASH: #aaa is, #000 is not anymore, CSS2.1: 'nmchar': r'[_a-z0-9-]|{nonascii}|{escape}', CSS3: 'nmchar': r'[_a-z-]|{nonascii}|{escape}', """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS'] __docformat__ = 'restructuredtext' __version__ = '$Id$' # a complete list of css3 macros MACROS = { 'nonascii': r'[^\0-\177]', 'unicode': r'\\[0-9A-Fa-f]{1,6}(?:{nl}|{s})?', # 'escape': r'{unicode}|\\[ -~\200-\777]', 'escape': r'{unicode}|\\[^\n\r\f0-9a-f]', 'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}', 'nmchar': r'[-_a-zA-Z0-9]|{nonascii}|{escape}', 'string1': r'"([^\n\r\f\\"]|\\{nl}|{escape})*"', 'string2': r"'([^\n\r\f\\']|\\{nl}|{escape})*'", 'invalid1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*', 'invalid2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*", 'comment': r'\/\*[^*]*\*+([^/][^*]*\*+)*\/', 'ident': r'[-]?{nmstart}{nmchar}*', 'name': r'{nmchar}+', # TODO??? 'num': r'[+-]?[0-9]*\.[0-9]+|[+-]?[0-9]+', # r'[-]?\d+|[-]?\d*\.\d+', 'string': r'{string1}|{string2}', # from CSS2.1 'invalid': r'{invalid1}|{invalid2}', 'url': r'[\x09\x21\x23-\x26\x28\x2a-\x7E]|{nonascii}|{escape}', 's': r'\t|\r|\n|\f|\x20', 'w': r'{s}*', 'nl': r'\n|\r\n|\r|\f', 'A': r'A|a|\\0{0,4}(?:41|61)(?:\r\n|[ \t\r\n\f])?', 'B': r'B|b|\\0{0,4}(?:42|62)(?:\r\n|[ \t\r\n\f])?', 'C': r'C|c|\\0{0,4}(?:43|63)(?:\r\n|[ \t\r\n\f])?', 'D': r'D|d|\\0{0,4}(?:44|64)(?:\r\n|[ \t\r\n\f])?', 'E': r'E|e|\\0{0,4}(?:45|65)(?:\r\n|[ \t\r\n\f])?', 'F': r'F|f|\\0{0,4}(?:46|66)(?:\r\n|[ \t\r\n\f])?', 'G': r'G|g|\\0{0,4}(?:47|67)(?:\r\n|[ \t\r\n\f])?|\\G|\\g', 'H': r'H|h|\\0{0,4}(?:48|68)(?:\r\n|[ \t\r\n\f])?|\\H|\\h', 'I': r'I|i|\\0{0,4}(?:49|69)(?:\r\n|[ \t\r\n\f])?|\\I|\\i', 'K': r'K|k|\\0{0,4}(?:4b|6b)(?:\r\n|[ \t\r\n\f])?|\\K|\\k', 'L': r'L|l|\\0{0,4}(?:4c|6c)(?:\r\n|[ \t\r\n\f])?|\\L|\\l', 'M': r'M|m|\\0{0,4}(?:4d|6d)(?:\r\n|[ \t\r\n\f])?|\\M|\\m', 'N': r'N|n|\\0{0,4}(?:4e|6e)(?:\r\n|[ \t\r\n\f])?|\\N|\\n', 'O': r'O|o|\\0{0,4}(?:4f|6f)(?:\r\n|[ \t\r\n\f])?|\\O|\\o', 'P': r'P|p|\\0{0,4}(?:50|70)(?:\r\n|[ \t\r\n\f])?|\\P|\\p', 'R': r'R|r|\\0{0,4}(?:52|72)(?:\r\n|[ \t\r\n\f])?|\\R|\\r', 'S': r'S|s|\\0{0,4}(?:53|73)(?:\r\n|[ \t\r\n\f])?|\\S|\\s', 'T': r'T|t|\\0{0,4}(?:54|74)(?:\r\n|[ \t\r\n\f])?|\\T|\\t', 'U': r'U|u|\\0{0,4}(?:55|75)(?:\r\n|[ \t\r\n\f])?|\\U|\\u', 'V': r'V|v|\\0{0,4}(?:56|76)(?:\r\n|[ \t\r\n\f])?|\\V|\\v', 'X': r'X|x|\\0{0,4}(?:58|78)(?:\r\n|[ \t\r\n\f])?|\\X|\\x', 'Z': r'Z|z|\\0{0,4}(?:5a|7a)(?:\r\n|[ \t\r\n\f])?|\\Z|\\z', } # The following productions are the complete list of tokens # used by css_parser, a mix of CSS3 and some CSS2.1 productions. # The productions are **ordered**: PRODUCTIONS = [ # UTF8_BOM or UTF8_BOM_SIG will only be checked at beginning of CSS ('BOM', '\xfe\xff|\xef\xbb\xbf'), ('S', r'{s}+'), # 1st in list of general productions ('URI', r'{U}{R}{L}\({w}({string}|{url}*){w}\)'), ('RATIO', r'[^\(]{w}[0-9]+{w}\/{w}[0-9]+'), ('UNICODE-RANGE', r'{U}\+[0-9A-Fa-f?]{1,6}(\-[0-9A-Fa-f]{1,6})?'), ('IDENT', r'{ident}'), ('FUNCTION', r'{ident}\('), ('DIMENSION', r'{num}{ident}'), ('PERCENTAGE', r'{num}\%'), ('NUMBER', r'{num}'), ('HASH', r'\#{name}'), ('COMMENT', r'{comment}'), # r'\/\*[^*]*\*+([^/][^*]*\*+)*\/'), ('STRING', r'{string}'), ('INVALID', r'{invalid}'), # from CSS2.1 ('ATKEYWORD', r'@{ident}'), # other keywords are done in the tokenizer ('INCLUDES', r'\~\='), ('DASHMATCH', r'\|\='), ('PREFIXMATCH', r'\^\='), ('SUFFIXMATCH', r'\$\='), ('SUBSTRINGMATCH', r'\*\='), ('CDO', r'\<\!\-\-'), ('CDC', r'\-\-\>'), ('CHAR', r'[^"\']') # MUST always be last # valid ony at start so not checked everytime # ('CHARSET_SYM', r'@charset '), # from Errata includes ending space! # checked specially if fullsheet is parsed ] class CSSProductions(object): """ most attributes are set later """ EOF = True # removed from productions as they simply are ATKEYWORD until # tokenizing CHARSET_SYM = 'CHARSET_SYM' FONT_FACE_SYM = 'FONT_FACE_SYM' MEDIA_SYM = 'MEDIA_SYM' IMPORT_SYM = 'IMPORT_SYM' NAMESPACE_SYM = 'NAMESPACE_SYM' PAGE_SYM = 'PAGE_SYM' VARIABLES_SYM = 'VARIABLES_SYM' for i, t in enumerate(PRODUCTIONS): setattr(CSSProductions, t[0].replace('-', '_'), t[0]) # may be enabled by settings.set _DXImageTransform = ('FUNCTION', r'progid\:DXImageTransform\.Microsoft\..+\(' ) css-parser-1.0.4/src/css_parser/encutils/0000755000175000017500000000000013413156014020627 5ustar kovidkovid00000000000000css-parser-1.0.4/src/css_parser/encutils/__init__.py0000644000175000017500000005472413407702010022750 0ustar kovidkovid00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- """encutils - encoding detection collection for Python :Version: 0.9.8 :Author: Christof Hoeke, see http://cthedot.de/encutils/ :Contributor: Robert Siemer, Fredrik Hedman ported to python3 :Copyright: 2005-2012: Christof Hoeke :License: encutils has a dual-license, please choose whatever you prefer: * encutils is published under the `LGPL 3 or later `__ * encutils is published under the `Creative Commons License `__. encutils is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. encutils is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with encutils. If not, see . A collection of helper functions to detect encodings of text files (like HTML, XHTML, XML, CSS, etc.) retrieved via HTTP, file or string. :func:`getEncodingInfo` is probably the main function of interest which uses other supplied functions itself and gathers all information together and supplies an :class:`EncodingInfo` object. example:: >>> import encutils >>> info = encutils.getEncodingInfo(url='http://cthedot.de/encutils/') >>> str(info) utf-8 >>> repr(info) # doctest:+ELLIPSIS >>> info.logtext HTTP media_type: text/html HTTP encoding: utf-8 Encoding (probably): utf-8 (Mismatch: False) references XML RFC 3023 (http://www.ietf.org/rfc/rfc3023.txt) easier explained in - http://feedparser.org/docs/advanced.html - http://www.xml.com/pub/a/2004/07/21/dive.html HTML http://www.w3.org/TR/REC-html40/charset.html#h-5.2.2 TODO - parse @charset of HTML elements? - check for more texttypes if only text given """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = [ 'buildlog', 'encodingByMediaType', 'getHTTPInfo', 'getMetaInfo', 'detectXMLEncoding', 'getEncodingInfo', 'tryEncodings', 'EncodingInfo' ] __docformat__ = 'restructuredtext' __author__ = 'Christof Hoeke, Robert Siemer, Fredrik Hedman' __version__ = '$Id$' import sys import cgi import re import types PY2x = sys.version_info < (3, 0) if PY2x: from StringIO import StringIO as io_StringIO from HTMLParser import HTMLParser as htmlparser_HTMLParser from HTMLParser import HTMLParseError as htmlparser_HTMLParseError from urllib2 import urlopen as urllib_urlopen else: from io import StringIO as io_StringIO from html.parser import HTMLParser as htmlparser_HTMLParser from urllib.request import urlopen as urllib_urlopen VERSION = '0.9.8' class _MetaHTMLParser(htmlparser_HTMLParser): """Parse given data for .""" content_type = None def handle_starttag(self, tag, attrs): if tag == 'meta' and not self.content_type: atts = dict([(a.lower(), v.lower()) for a, v in attrs]) if atts.get('http-equiv', '').strip() == 'content-type': self.content_type = atts.get('content') # application/xml, application/xml-dtd, application/xml-external-parsed-entity, or a subtype like application/rss+xml. _XML_APPLICATION_TYPE = 0 # text/xml, text/xml-external-parsed-entity, or a subtype like text/AnythingAtAll+xml _XML_TEXT_TYPE = 1 # text/html _HTML_TEXT_TYPE = 2 # any other of text/* like text/plain, ... _TEXT_TYPE = 3 # any text/* like which defaults to UTF-8 encoding, for now only text/css _TEXT_UTF8 = 5 # types not fitting in above types _OTHER_TYPE = 4 class EncodingInfo(object): """ All encoding related information, returned by :func:`getEncodingInfo`. Attributes filled: - ``encoding``: The guessed encoding Encoding is the explicit or implicit encoding or None and always lowercase. - from HTTP response * ``http_encoding`` * ``http_media_type`` - from HTML element * ``meta_encoding`` * ``meta_media_type`` - from XML declaration * ``xml_encoding`` - ``mismatch``: True if mismatch between XML declaration and HTTP header. Mismatch is True if any mismatches between HTTP header, XML declaration or textcontent (meta) are found. More detailed mismatch reports are written to the optional log or ``logtext`` Mismatches are not necessarily errors as preferences are defined. For details see the specifications. - ``logtext``: if no log was given log reports are given here """ def __init__(self): """Initialize all possible properties to ``None``, see class description """ self.encoding = self.mismatch = self.logtext =\ self.http_encoding = self.http_media_type =\ self.meta_encoding = self.meta_media_type =\ self.xml_encoding = None def __str__(self): """Output the guessed encoding itself or the empty string.""" if self.encoding: return self.encoding else: return '' def __repr__(self): return "<%s.%s object encoding=%r mismatch=%s at 0x%x>" % ( self.__class__.__module__, self.__class__.__name__, self.encoding, self.mismatch, id(self)) def buildlog(logname='encutils', level='INFO', stream=sys.stderr, filename=None, filemode="w", format='%(levelname)s\t%(message)s'): """Helper to build a basic log - if `filename` is given returns a log logging to `filename` with mode `filemode` - else uses a log streaming to `stream` which defaults to `sys.stderr` - `level` defines the level of the log - `format` defines the formatter format of the log :returns: a log with the name `logname` """ import logging log = logging.getLogger(logname) if filename: hdlr = logging.FileHandler(filename, filemode) else: hdlr = logging.StreamHandler(stream) formatter = logging.Formatter(format) hdlr.setFormatter(formatter) log.addHandler(hdlr) log.setLevel(logging.__dict__.get(level, logging.INFO)) return log def _getTextTypeByMediaType(media_type, log=None): """ :returns: type as defined by constants in this class """ if not media_type: return _OTHER_TYPE xml_application_types = [ r'application/.*?\+xml', 'application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity' ] xml_text_types = [ r'text\/.*?\+xml', 'text/xml', 'text/xml-external-parsed-entity' ] media_type = media_type.strip().lower() if media_type in xml_application_types or\ re.match(xml_application_types[0], media_type, re.I | re.S | re.X): return _XML_APPLICATION_TYPE elif media_type in xml_text_types or\ re.match(xml_text_types[0], media_type, re.I | re.S | re.X): return _XML_TEXT_TYPE elif media_type == 'text/html': return _HTML_TEXT_TYPE elif media_type == 'text/css': return _TEXT_UTF8 elif media_type.startswith('text/'): return _TEXT_TYPE else: return _OTHER_TYPE def _getTextType(text, log=None): """Check if given text is XML (**naive test!**) used if no content-type given """ if text[:30].find('`` element if available in `text`. XHTML format:: """ p = _MetaHTMLParser() try: p.feed(text) except htmlparser_HTMLParseError: pass if p.content_type: media_type, params = cgi.parse_header(p.content_type) encoding = params.get('charset') # defaults to None if encoding: encoding = encoding.lower() if log: log.info('HTML META media_type: %s', media_type) log.info('HTML META encoding: %s', encoding) else: media_type = encoding = None return media_type, encoding def detectXMLEncoding(fp, log=None, includeDefault=True): """Attempt to detect the character encoding of the xml file given by a file object `fp`. `fp` must not be a codec wrapped file object! `fp` may be a string or unicode string though. Based on a recipe by Lars Tiede: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/363841 which itself is based on Paul Prescotts recipe: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52257 :returns: - if detection of the BOM succeeds, the codec name of the corresponding unicode charset is returned - if BOM detection fails, the xml declaration is searched for the encoding attribute and its value returned. the "<" character has to be the very first in the file then (it's xml standard after all). - if BOM and xml declaration fail, utf-8 is returned according to XML 1.0. """ if PY2x and isinstance(fp, types.StringTypes): fp = io_StringIO(fp) elif isinstance(fp, str): fp = io_StringIO(fp) # detection using BOM # the BOMs we know, by their pattern bomDict = { # bytepattern: name (0x00, 0x00, 0xFE, 0xFF): "utf_32_be", (0xFF, 0xFE, 0x00, 0x00): "utf_32_le", (0xFE, 0xFF, None, None): "utf_16_be", (0xFF, 0xFE, None, None): "utf_16_le", (0xEF, 0xBB, 0xBF, None): "utf-8", } # go to beginning of file and get the first 4 bytes oldFP = fp.tell() fp.seek(0) (byte1, byte2, byte3, byte4) = tuple(map(ord, fp.read(4))) # try bom detection using 4 bytes, 3 bytes, or 2 bytes bomDetection = bomDict.get((byte1, byte2, byte3, byte4)) if not bomDetection: bomDetection = bomDict.get((byte1, byte2, byte3, None)) if not bomDetection: bomDetection = bomDict.get((byte1, byte2, None, None)) # if BOM detected, we're done :-) if bomDetection: if log: log.info('XML BOM encoding: %s' % bomDetection) fp.seek(oldFP) return bomDetection # still here? BOM detection failed. # now that BOM detection has failed we assume one byte character # encoding behaving ASCII # search xml declaration for encoding attribute # assume xml declaration fits into the first 2 KB (*cough*) fp.seek(0) buffer = fp.read(2048) # set up regular expression xmlDeclPattern = r""" ^<\?xml # w/o BOM, xmldecl starts with # what's matched in the brackets will be named encstr [^"']+ # every character not delimiter (not overly exact!) ) # closes the brackets pair for the named group ["'] # attribute end delimiter .*? # some chars optionally (standalone decl or whitespace) \?> # xmldecl end """ xmlDeclRE = re.compile(xmlDeclPattern, re.VERBOSE) # search and extract encoding string match = xmlDeclRE.search(buffer) fp.seek(oldFP) if match: enc = match.group("encstr").lower() if log: log.info('XML encoding="%s"' % enc) return enc else: if includeDefault: if log: log.info('XML encoding default utf-8') return 'utf-8' else: return None def tryEncodings(text, log=None): """If installed uses chardet http://chardet.feedparser.org/ to detect encoding, else tries different encodings on `text` and returns the one that does not raise an exception which is not very advanced or may be totally wrong. The tried encoding are in order 'ascii', 'iso-8859-1', 'windows-1252' (which probably will never happen as 'iso-8859-1' can decode these strings too) and lastly 'utf-8'. :param text: a byte string :returns: Working encoding or ``None`` if no encoding does work at all. The returned encoding might nevertheless be not the one intended by the author as it is only checked if the text might be encoded in that encoding. Some texts might be working in "iso-8859-1" *and* "windows-1252" *and* "ascii" *and* "utf-8" and ... """ try: import chardet encoding = chardet.detect(text)["encoding"] except ImportError: msg = 'Using simplified encoding detection, you might want to install chardet.' if log: log.warning(msg) else: print(msg) encodings = ( 'ascii', 'iso-8859-1', # 'windows-1252', # test later 'utf-8') encoding = None for e in encodings: try: text.decode(e) except UnicodeDecodeError: pass else: if 'iso-8859-1' == e: try: if '€' in text.decode('windows-1252'): return 'windows-1252' except UnicodeDecodeError: pass return e return encoding def getEncodingInfo(response=None, text='', log=None, url=None): """Find all encoding related information in given `text`. Information in headers of supplied HTTPResponse, possible XML declaration and X/HTML ```` elements are used. :param response: HTTP response object, e.g. via ``urllib.urlopen('url')`` :param text: a byte string to guess encoding for. XML prolog with encoding pseudo attribute or HTML meta element will be used to detect the encoding :param url: When given fetches document at `url` and all needed information. No `reponse` or `text` parameters are needed in this case. :param log: an optional logging logger to which messages may go, if no log given all log messages are available from resulting ``EncodingInfo`` :returns: instance of :class:`EncodingInfo`. How the resulting encoding is retrieved: XML RFC 3023 states if media type given in the Content-Type HTTP header is application/xml, application/xml-dtd, application/xml-external-parsed-entity, or any one of the subtypes of application/xml such as application/atom+xml or application/rss+xml etc then the character encoding is determined in this order: 1. the encoding given in the charset parameter of the Content-Type HTTP header, or 2. the encoding given in the encoding attribute of the XML declaration within the document, or 3. utf-8. Mismatch possibilities: - HTTP + XMLdecla - HTTP + HTMLmeta application/xhtml+xml ? XMLdecla + HTMLmeta If the media type given in the Content-Type HTTP header is text/xml, text/xml-external-parsed-entity, or a subtype like text/Anything+xml, the encoding attribute of the XML declaration is ignored completely and the character encoding is determined in the order: 1. the encoding given in the charset parameter of the Content-Type HTTP header, or 2. ascii. No mismatch possible. If no media type is given the XML encoding pseuso attribute is used if present. No mismatch possible. HTML For HTML served as text/html: http://www.w3.org/TR/REC-html40/charset.html#h-5.2.2 1. An HTTP "charset" parameter in a "Content-Type" field. (maybe defaults to ISO-8859-1, but should not assume this) 2. A META declaration with "http-equiv" set to "Content-Type" and a value set for "charset". 3. The charset attribute set on an element that designates an external resource. (NOT IMPLEMENTED HERE YET) Mismatch possibilities: - HTTP + HTMLmeta TEXT For most text/* types the encoding will be reported as iso-8859-1. Exceptions are XML formats send as text/* mime type (see above) and text/css which has a default encoding of UTF-8. """ if url: # may cause IOError which is raised response = urllib_urlopen(url) if text is None: # read text from response only if not explicitly given try: text = response.read() except IOError: pass if text is None: # text must be a string (not None) text = '' encinfo = EncodingInfo() logstream = io_StringIO() if not log: log = buildlog(stream=logstream, format='%(message)s') # HTTP if response: encinfo.http_media_type, encinfo.http_encoding = getHTTPInfo( response, log) texttype = _getTextTypeByMediaType(encinfo.http_media_type, log) else: # check if maybe XML or (TODO:) HTML texttype = _getTextType(text, log) # XML only served as application/xml ! #(also XHTML served as text/html) if texttype == _XML_APPLICATION_TYPE: # or texttype == _XML_TEXT_TYPE: try: encinfo.xml_encoding = detectXMLEncoding(text, log) except (AttributeError, ValueError): encinfo.xml_encoding = None # XML (also XHTML served as text/html) if texttype == _HTML_TEXT_TYPE: try: encinfo.xml_encoding = detectXMLEncoding( text, log, includeDefault=False) except (AttributeError, ValueError): encinfo.xml_encoding = None # HTML if texttype == _HTML_TEXT_TYPE or texttype == _TEXT_TYPE: encinfo.meta_media_type, encinfo.meta_encoding = getMetaInfo(text, log) # guess # 1. HTTP charset? encinfo.encoding = encinfo.http_encoding encinfo.mismatch = False # 2. media_type? # XML application/... if texttype == _XML_APPLICATION_TYPE: if not encinfo.encoding: encinfo.encoding = encinfo.xml_encoding # xml_encoding has default of utf-8 # text/html elif texttype == _HTML_TEXT_TYPE: if not encinfo.encoding: encinfo.encoding = encinfo.meta_encoding if not encinfo.encoding: encinfo.encoding = encodingByMediaType(encinfo.http_media_type) if not encinfo.encoding: encinfo.encoding = tryEncodings(text) # text/... + xml or text/* elif texttype == _XML_TEXT_TYPE or texttype == _TEXT_TYPE: if not encinfo.encoding: encinfo.encoding = encodingByMediaType(encinfo.http_media_type) elif texttype == _TEXT_UTF8: if not encinfo.encoding: encinfo.encoding = encodingByMediaType(encinfo.http_media_type) # possible mismatches, checks if present at all and then if equal # HTTP + XML if encinfo.http_encoding and encinfo.xml_encoding and\ encinfo.http_encoding != encinfo.xml_encoding: encinfo.mismatch = True log.warning('"%s" (HTTP) != "%s" (XML) encoding mismatch' % (encinfo.http_encoding, encinfo.xml_encoding)) # HTTP + Meta if encinfo.http_encoding and encinfo.meta_encoding and\ encinfo.http_encoding != encinfo.meta_encoding: encinfo.mismatch = True log.warning('"%s" (HTTP) != "%s" (HTML ) encoding mismatch' % (encinfo.http_encoding, encinfo.meta_encoding)) # XML + Meta if encinfo.xml_encoding and encinfo.meta_encoding and\ encinfo.xml_encoding != encinfo.meta_encoding: encinfo.mismatch = True log.warning('"%s" (XML) != "%s" (HTML ) encoding mismatch' % (encinfo.xml_encoding, encinfo.meta_encoding)) log.info('Encoding (probably): %s (Mismatch: %s)', encinfo.encoding, encinfo.mismatch) encinfo.logtext = logstream.getvalue() return encinfo if __name__ == '__main__': import pydoc pydoc.help(__name__) css-parser-1.0.4/src/css_parser/errorhandler.py0000644000175000017500000001017213407702010022037 0ustar kovidkovid00000000000000#!/usr/bin/env python from __future__ import unicode_literals, division, absolute_import, print_function import xml.dom import logging """css_parser ErrorHandler ErrorHandler used as log with usual levels (debug, info, warn, error) if instanciated with ``raiseExceptions=True`` raises exeptions instead of logging log defaults to instance of ErrorHandler for any kind of log message from lexerm, parser etc. - raiseExceptions = [False, True] - setloglevel(loglevel) """ __all__ = ['ErrorHandler'] __docformat__ = 'restructuredtext' __version__ = '$Id$' import sys if sys.version_info[0] >= 3: from urllib.error import HTTPError as urllib_HTTPError from urllib.error import URLError as urllib_URLError else: from urllib2 import HTTPError as urllib_HTTPError from urllib2 import URLError as urllib_URLError class _ErrorHandler(object): """ handles all errors and log messages """ def __init__(self, log, defaultloglevel=logging.INFO, raiseExceptions=True): """ inits log if none given log for parse messages, default logs to sys.stderr defaultloglevel if none give this is logging.DEBUG raiseExceptions - True: Errors will be raised e.g. during building - False: Errors will be written to the log, this is the default behaviour when parsing """ # may be disabled during setting of known valid items self.enabled = True if log: self._log = log else: import sys self._log = logging.getLogger('CSSUTILS') hdlr = logging.StreamHandler(sys.stderr) formatter = logging.Formatter('%(levelname)s\t%(message)s') hdlr.setFormatter(formatter) self._log.addHandler(hdlr) self._log.setLevel(defaultloglevel) self.raiseExceptions = raiseExceptions def __getattr__(self, name): "use self._log items" calls = ('debug', 'info', 'warn', 'error', 'critical', 'fatal') other = ('setLevel', 'getEffectiveLevel', 'addHandler', 'removeHandler') if name in calls: if name == 'warn': name = 'warning' self._logcall = getattr(self._log, name) return self.__handle elif name in other: return getattr(self._log, name) else: raise AttributeError( '(errorhandler) No Attribute %r found' % name) def __handle(self, msg='', token=None, error=xml.dom.SyntaxErr, neverraise=False, args=None): """ handles all calls logs or raises exception """ if self.enabled: if error is None: error = xml.dom.SyntaxErr line, col = None, None if token: if isinstance(token, tuple): value, line, col = token[1], token[2], token[3] else: value, line, col = token.value, token.line, token.col msg = '%s [%s:%s: %s]' % ( msg, line, col, value) if error and self.raiseExceptions and not neverraise: if isinstance(error, urllib_HTTPError) or isinstance(error, urllib_URLError): raise elif issubclass(error, xml.dom.DOMException): error.line = line error.col = col raise error(msg) else: self._logcall(msg) def setLog(self, log): """set log of errorhandler's log""" self._log = log class ErrorHandler(_ErrorHandler): "Singleton, see _ErrorHandler" instance = None def __init__(self, log=None, defaultloglevel=logging.INFO, raiseExceptions=True): if ErrorHandler.instance is None: ErrorHandler.instance = _ErrorHandler(log=log, defaultloglevel=defaultloglevel, raiseExceptions=raiseExceptions) self.__dict__ = ErrorHandler.instance.__dict__ css-parser-1.0.4/src/css_parser/helper.py0000644000175000017500000000766213407702010020641 0ustar kovidkovid00000000000000"""css_parser helper TEST """ from __future__ import unicode_literals, division, absolute_import, print_function __docformat__ = 'restructuredtext' __version__ = '$Id: errorhandler.py 1234 2008-05-22 20:26:12Z cthedot $' import os import re import sys if sys.version_info[0] >= 3: from urllib.request import pathname2url as urllib_pathname2url else: from urllib import pathname2url as urllib_pathname2url class Deprecated(object): """This is a decorator which can be used to mark functions as deprecated. It will result in a warning being emitted when the function is used. It accepts a single paramter ``msg`` which is shown with the warning. It should contain information which function or method to use instead. """ def __init__(self, msg): self.msg = msg def __call__(self, func): def newFunc(*args, **kwargs): import warnings warnings.warn("Call to deprecated method %r. %s" % (func.__name__, self.msg), category=DeprecationWarning, stacklevel=2) return func(*args, **kwargs) newFunc.__name__ = func.__name__ newFunc.__doc__ = func.__doc__ newFunc.__dict__.update(func.__dict__) return newFunc # simple escapes, all non unicodes _simpleescapes = re.compile(r'(\\[^0-9a-fA-F])').sub def normalize(x): r""" normalizes x, namely: - remove any \ before non unicode sequences (0-9a-zA-Z) so for x=="c\olor\" return "color" (unicode escape sequences should have been resolved by the tokenizer already) - lowercase """ if x: def removeescape(matchobj): return matchobj.group(0)[1:] x = _simpleescapes(removeescape, x) return x.lower() else: return x def path2url(path): """Return file URL of `path`""" return 'file:' + urllib_pathname2url(os.path.abspath(path)) def pushtoken(token, tokens): """Return new generator starting with token followed by all tokens in ``tokens``""" # TODO: may use itertools.chain? yield token for t in tokens: yield t def string(value): """ Serialize value with quotes e.g.:: ``a \'string`` => ``'a \'string'`` """ # \n = 0xa, \r = 0xd, \f = 0xc value = value.replace('\n', '\\a ').replace( '\r', '\\d ').replace( '\f', '\\c ').replace( '"', '\\"') if value.endswith('\\'): value = value[:-1] + '\\\\' return '"%s"' % value def stringvalue(string): """ Retrieve actual value of string without quotes. Escaped quotes inside the value are resolved, e.g.:: ``'a \'string'`` => ``a 'string`` """ return string.replace('\\'+string[0], string[0])[1:-1] _match_forbidden_in_uri = re.compile(r'''.*?[\(\)\s\;,'"]''', re.U).match def uri(value): """ Serialize value by adding ``url()`` and with quotes if needed e.g.:: ``"`` => ``url("\"")`` """ if _match_forbidden_in_uri(value): value = string(value) return 'url(%s)' % value def urivalue(uri): """ Return actual content without surrounding "url(" and ")" and removed surrounding quotes too including contained escapes of quotes, e.g.:: ``url("\"")`` => ``"`` """ uri = uri[uri.find('(')+1:-1].strip() if uri and (uri[0] in '\'"') and (uri[0] == uri[-1]): return stringvalue(uri) else: return uri # def normalnumber(num): # """ # Return normalized number as string. # """ # sign = '' # if num.startswith('-'): # sign = '-' # num = num[1:] # elif num.startswith('+'): # num = num[1:] # # if float(num) == 0.0: # return '0' # else: # if num.find('.') == -1: # return sign + str(int(num)) # else: # a, b = num.split('.') # if not a: # a = '0' # return '%s%s.%s' % (sign, int(a), b) css-parser-1.0.4/src/css_parser/parse.py0000644000175000017500000002114313407703013020466 0ustar kovidkovid00000000000000#!/usr/bin/env python """A validating CSSParser""" from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['CSSParser'] __docformat__ = 'restructuredtext' __version__ = '$Id$' from .helper import path2url import codecs import css_parser import sys from . import tokenize2 from css_parser import css if sys.version_info < (2, 6): bytes = str class CSSParser(object): """Parse a CSS StyleSheet from URL, string or file and return a DOM Level 2 CSS StyleSheet object. Usage:: parser = CSSParser() # optionally parser.setFetcher(fetcher) sheet = parser.parseFile('test1.css', 'ascii') print sheet.cssText """ def __init__(self, log=None, loglevel=None, raiseExceptions=None, fetcher=None, parseComments=True, validate=True): """ :param log: logging object :param loglevel: logging loglevel :param raiseExceptions: if log should simply log (default) or raise errors during parsing. Later while working with the resulting sheets the setting used in css_parser.log.raiseExeptions is used :param fetcher: see ``setFetcher(fetcher)`` :param parseComments: if comments should be added to CSS DOM or simply omitted :param validate: if parsing should validate, may be overwritten in parse methods """ if log is not None: css_parser.log.setLog(log) if loglevel is not None: css_parser.log.setLevel(loglevel) # remember global setting self.__globalRaising = css_parser.log.raiseExceptions if raiseExceptions: self.__parseRaising = raiseExceptions else: # DEFAULT during parse self.__parseRaising = False self.__tokenizer = tokenize2.Tokenizer(doComments=parseComments) self.setFetcher(fetcher) self._validate = validate def __parseSetting(self, parse): """during parse exceptions may be handled differently depending on init parameter ``raiseExceptions`` """ if parse: css_parser.log.raiseExceptions = self.__parseRaising else: css_parser.log.raiseExceptions = self.__globalRaising def parseStyle(self, cssText, encoding='utf-8', validate=None): """Parse given `cssText` which is assumed to be the content of a HTML style attribute. :param cssText: CSS string to parse :param encoding: It will be used to decode `cssText` if given as a (byte) string. :param validate: If given defines if validation is used. Uses CSSParser settings as fallback :returns: :class:`~css_parser.css.CSSStyleDeclaration` """ self.__parseSetting(True) if isinstance(cssText, bytes): # TODO: use codecs.getdecoder('css') here? cssText = cssText.decode(encoding) if validate is None: validate = self._validate style = css.CSSStyleDeclaration(cssText, validating=validate) self.__parseSetting(False) return style def parseString(self, cssText, encoding=None, href=None, media=None, title=None, validate=None): """Parse `cssText` as :class:`~css_parser.css.CSSStyleSheet`. Errors may be raised (e.g. UnicodeDecodeError). :param cssText: CSS string to parse :param encoding: If ``None`` the encoding will be read from BOM or an @charset rule or defaults to UTF-8. If given overrides any found encoding including the ones for imported sheets. It also will be used to decode `cssText` if given as a (byte) string. :param href: The ``href`` attribute to assign to the parsed style sheet. Used to resolve other urls in the parsed sheet like @import hrefs. :param media: The ``media`` attribute to assign to the parsed style sheet (may be a MediaList, list or a string). :param title: The ``title`` attribute to assign to the parsed style sheet. :param validate: If given defines if validation is used. Uses CSSParser settings as fallback :returns: :class:`~css_parser.css.CSSStyleSheet`. """ self.__parseSetting(True) # TODO: py3 needs bytes here! if isinstance(cssText, bytes): cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0] if validate is None: validate = self._validate sheet = css_parser.css.CSSStyleSheet( href=href, media=css_parser.stylesheets.MediaList(media), title=title, validating=validate) sheet._setFetcher(self.__fetcher) # tokenizing this ways closes open constructs and adds EOF sheet._setCssTextWithEncodingOverride(self.__tokenizer.tokenize(cssText, fullsheet=True), encodingOverride=encoding) self.__parseSetting(False) return sheet def parseFile(self, filename, encoding=None, href=None, media=None, title=None, validate=None): """Retrieve content from `filename` and parse it. Errors may be raised (e.g. IOError). :param filename: of the CSS file to parse, if no `href` is given filename is converted to a (file:) URL and set as ``href`` of resulting stylesheet. If `href` is given it is set as ``sheet.href``. Either way ``sheet.href`` is used to resolve e.g. stylesheet imports via @import rules. :param encoding: Value ``None`` defaults to encoding detection via BOM or an @charset rule. Other values override detected encoding for the sheet at `filename` including any imported sheets. :returns: :class:`~css_parser.css.CSSStyleSheet`. """ if not href: href = path2url(filename) f = open(filename, 'rb') css = f.read() f.close() return self.parseString(css, encoding=encoding, # read returns a str href=href, media=media, title=title, validate=validate) def parseUrl(self, href, encoding=None, media=None, title=None, validate=None): """Retrieve content from URL `href` and parse it. Errors may be raised (e.g. URLError). :param href: URL of the CSS file to parse, will also be set as ``href`` of resulting stylesheet :param encoding: Value ``None`` defaults to encoding detection via HTTP, BOM or an @charset rule. A value overrides detected encoding for the sheet at ``href`` including any imported sheets. :returns: :class:`~css_parser.css.CSSStyleSheet`. """ encoding, enctype, text = css_parser.util._readUrl( href, fetcher=self.__fetcher, overrideEncoding=encoding) if enctype == 5: # do not use if defaulting to UTF-8 encoding = None if text is not None: return self.parseString(text, encoding=encoding, href=href, media=media, title=title, validate=validate) def setFetcher(self, fetcher=None): """Replace the default URL fetch function with a custom one. :param fetcher: A function which gets a single parameter ``url`` the URL to read and must return ``(encoding, content)`` where ``encoding`` is the HTTP charset normally given via the Content-Type header (which may simply omit the charset in which case ``encoding`` would be ``None``) and ``content`` being the string (or unicode) content. The Mimetype should be 'text/css' but this has to be checked by the fetcher itself (the default fetcher emits a warning if encountering a different mimetype). Calling ``setFetcher`` with ``fetcher=None`` resets css_parser to use its default function. """ self.__fetcher = fetcher css-parser-1.0.4/src/css_parser/prodparser.py0000644000175000017500000006725413407702706021562 0ustar kovidkovid00000000000000# -*- coding: utf-8 -*- """Productions parser used by css and stylesheets classes to parse test into a css_parser.util.Seq and at the same time retrieving additional specific css_parser.util.Item objects for later use. TODO: - ProdsParser - handle EOF or STOP? - handle unknown @rules - handle S: maybe save to Seq? parameterized? - store['_raw']: always? - Sequence: - opt first(), naive impl for now """ from __future__ import unicode_literals, division, absolute_import, print_function __all__ = ['ProdParser', 'Sequence', 'Choice', 'Prod', 'PreDef'] __docformat__ = 'restructuredtext' __version__ = '$Id: parse.py 1418 2008-08-09 19:27:50Z cthedot $' from .helper import pushtoken import css_parser import itertools import re import sys import types if sys.version_info[0] >= 3: text_type = str string_type = str else: text_type = unicode string_type = basestring class ParseError(Exception): """Base Exception class for ProdParser (used internally).""" pass class Done(ParseError): """Raised if Sequence or Choice is finished and no more Prods left.""" pass class Exhausted(ParseError): """Raised if Sequence or Choice is finished but token is given.""" pass class Missing(ParseError): """Raised if Sequence or Choice is not finished but no matching token given.""" pass class NoMatch(ParseError): """Raised if nothing in Sequence or Choice does match.""" pass class Choice(object): """A Choice of productions (Sequence or single Prod).""" def __init__(self, *prods, **options): """ *prods Prod or Sequence objects options: optional=False """ self._prods = prods try: self.optional = options['optional'] except KeyError: for p in self._prods: if p.optional: self.optional = True break else: self.optional = False self.reset() def reset(self): """Start Choice from zero""" self._exhausted = False def matches(self, token): """Check if token matches""" for prod in self._prods: if prod.matches(token): return True return False def nextProd(self, token): """ Return: - next matching Prod or Sequence - ``None`` if any Prod or Sequence is optional and no token matched - raise ParseError if nothing matches and all are mandatory - raise Exhausted if choice already done ``token`` may be None but this occurs when no tokens left.""" # print u'TEST for %s in %s' % (token, self) if not self._exhausted: optional = False for p in self._prods: if p.matches(token): self._exhausted = True p.reset() # print u'FOUND for %s: %s' % (token, p);#print return p elif p.optional: optional = True else: if not optional: # None matched but also None is optional raise NoMatch('No match for %s in %s' % (token, self)) # raise ParseError(u'No match in %s for %s' % (self, token)) elif token: raise Exhausted('Extra token') def __repr__(self): return "" % ( self.__class__.__name__, self.__str__(), self.optional, id(self)) def __str__(self): return 'Choice(%s)' % ', '.join([str(x) for x in self._prods]) class Sequence(object): """A Sequence of productions (Choice or single Prod).""" def __init__(self, *prods, **options): """ *prods Prod or Choice or Sequence objects **options: minmax = lambda: (1, 1) callback returning number of times this sequence may run """ self._prods = prods try: minmax = options['minmax'] except KeyError: def minmax(): return (1, 1) self._min, self._max = minmax() if self._max is None: # unlimited try: # py2.6/3 self._max = sys.maxsize except AttributeError: # py<2.6 self._max = sys.maxint self._prodcount = len(self._prods) self.reset() def matches(self, token): """Called by Choice to try to find if Sequence matches.""" for prod in self._prods: if prod.matches(token): return True try: if not prod.optional: break except AttributeError: pass return False def reset(self): """Reset this Sequence if it is nested.""" self._roundstarted = False self._i = 0 self._round = 0 def _currentName(self): """Return current element of Sequence, used by name""" # TODO: current impl first only if 1st if an prod! for prod in self._prods[self._i:]: if not prod.optional: return str(prod) else: return 'Sequence' optional = property(lambda self: self._min == 0) def nextProd(self, token): """Return - next matching Prod or Choice - raises ParseError if nothing matches - raises Exhausted if sequence already done """ # print u'TEST for %s in %s' % (token, self) while self._round < self._max: # for this round i = self._i round = self._round p = self._prods[i] if i == 0: self._roundstarted = False # for next round self._i += 1 if self._i == self._prodcount: self._round += 1 self._i = 0 if p.matches(token): self._roundstarted = True # reset nested Choice or Prod to use from start p.reset() # print u'FOUND for %s: %s' % (token, p);#print return p elif p.optional: continue elif round < self._min or self._roundstarted: # or (round == 0 and self._min == 0): raise Missing('Missing token for production %s' % p) elif not token: if self._roundstarted: raise Missing('Missing token for production %s' % p) else: raise Done() else: raise NoMatch('No match for %s in %s' % (token, self)) if token: raise Exhausted('Extra token') def __repr__(self): return "" % ( self.__class__.__name__, self.__str__(), self.optional, id(self)) def __str__(self): return 'Sequence(%s)' % ', '.join([str(x) for x in self._prods]) class Prod(object): """Single Prod in Sequence or Choice.""" def __init__(self, name, match, optional=False, toSeq=None, toStore=None, stop=False, stopAndKeep=False, stopIfNoMoreMatch=False, nextSor=False, mayEnd=False, storeToken=None, exception=None): """ name name used for error reporting match callback function called with parameters tokentype and tokenvalue returning True, False or raising ParseError toSeq callback (optional) or False calling toSeq(token, tokens) returns (type_, val) == (token[0], token[1]) to be appended to seq else simply unaltered (type_, val) if False nothing is added toStore (optional) key to save util.Item to store or callback(store, util.Item) optional = False whether Prod is optional or not stop = False if True stop parsing of tokens here stopAndKeep if True stop parsing of tokens here but return stopping token in unused tokens stopIfNoMoreMatch = False stop even if more tokens available, similar to stop and keep but with condition no more matches nextSor=False next is S or other like , or / (CSSValue) mayEnd = False no token must follow even defined by Sequence. Used for operator ',/ ' currently only storeToken = None if True toStore saves simple token tuple and not and Item object to store. Old style processing, TODO: resolve exception = None exception to be raised in case of error, normaly SyntaxErr """ self._name = name self.match = match self.optional = optional self.stop = stop self.stopAndKeep = stopAndKeep self.stopIfNoMoreMatch = stopIfNoMoreMatch self.nextSor = nextSor self.mayEnd = mayEnd self.storeToken = storeToken self.exception = exception def makeToStore(key): "Return a function used by toStore." def toStore(store, item): "Set or append store item." if key in store: _v = store[key] if not isinstance(_v, list): store[key] = [_v] store[key].append(item) else: store[key] = item return toStore if toSeq or toSeq is False: # called: seq.append(toSeq(value)) self.toSeq = toSeq else: self.toSeq = lambda t, tokens: (t[0], t[1]) if hasattr(toStore, '__call__'): self.toStore = toStore elif toStore: self.toStore = makeToStore(toStore) else: # always set! self.toStore = None def matches(self, token): """Return if token matches.""" if not token: return False type_, val, line, col = token return self.match(type_, val) def reset(self): pass def __str__(self): return self._name def __repr__(self): return "" % ( self.__class__.__name__, self._name, id(self)) # global tokenizer as there is only one! tokenizer = css_parser.tokenize2.Tokenizer() # global: saved from subProds savedTokens = [] class ProdParser(object): """Productions parser.""" def __init__(self, clear=True): self.types = css_parser.cssproductions.CSSProductions self._log = css_parser.log if clear: tokenizer.clear() def _texttotokens(self, text): """Build a generator which is the only thing that is parsed! old classes may use lists etc """ # under python 2.x this was basestring, but ... if isinstance(text, string_type): # DEFAULT, to tokenize strip space return tokenizer.tokenize(text.strip()) elif type(text) is types.GeneratorType: # noqa # DEFAULT, already tokenized, should be generator return text elif isinstance(text, tuple): # single token return iter([text]) elif isinstance(text, list): # OLD: generator from list return iter(text) else: # ? return text def _SorTokens(self, tokens, until=',/'): """New tokens generator which has S tokens removed, if followed by anything in ``until``, normally a ``,``.""" for token in tokens: if token[0] == self.types.S: try: next_ = next(tokens) except StopIteration: yield token else: if next_[1] in until: # omit S as e.g. ``,`` has been found yield next_ elif next_[0] == self.types.COMMENT: # pass COMMENT yield next_ else: yield token yield next_ elif token[0] == self.types.COMMENT: # pass COMMENT yield token else: yield token break # normal mode again for token in tokens: yield token def parse(self, text, name, productions, keepS=False, checkS=False, store=None, emptyOk=False, debug=False): """ text (or token generator) to parse, will be tokenized if not a generator yet may be: - a string to be tokenized - a single token, a tuple - a tuple of (token, tokensGenerator) - already tokenized so a tokens generator name used for logging productions used to parse tokens keepS if WS should be added to Seq or just be ignored store UPDATED If a Prod defines ``toStore`` the key defined there is a key in store to be set or if store[key] is a list the next Item is appended here. TODO: NEEDED? : Key ``raw`` is always added and holds all unprocessed values found emptyOk if True text may be empty, hard to test before as may be generator returns :wellformed: True or False :seq: a filled css_parser.util.Seq object which is NOT readonly yet :store: filled keys defined by Prod.toStore :unusedtokens: token generator containing tokens not used yet """ tokens = self._texttotokens(text) if not tokens: self._log.error('No content to parse.') return False, [], None, None seq = css_parser.util.Seq(readonly=False) if not store: # store for specific values store = {} prods = [productions] # stack of productions wellformed = True # while no real token is found any S are ignored started = False stopall = False prod = None # flag if default S handling should be done defaultS = True stopIfNoMoreMatch = False while True: # get from savedTokens or normal tokens try: # print debug, "SAVED", savedTokens token = savedTokens.pop() except IndexError: try: token = next(tokens) except StopIteration: break # print debug, token, stopIfNoMoreMatch type_, val, line, col = token # default productions if type_ == self.types.COMMENT: # always append COMMENT seq.append(css_parser.css.CSSComment(val), css_parser.css.CSSComment, line, col) elif defaultS and type_ == self.types.S and not checkS: # append S (but ignore starting ones) if not keepS or not started: continue else: seq.append(val, type_, line, col) # elif type_ == self.types.ATKEYWORD: # # @rule # r = css_parser.css.CSSUnknownRule(cssText=val) # seq.append(r, type(r), line, col) elif type_ == self.types.INVALID: # invalidate parse wellformed = False self._log.error('Invalid token: %r' % (token,)) break elif type_ == 'EOF': # do nothing? (self.types.EOF == True!) stopall = True else: started = True # check S now try: while True: # find next matching production try: prod = prods[-1].nextProd(token) except (Exhausted, NoMatch): # try next prod = None if isinstance(prod, Prod): # found actual Prod, not a Choice or Sequence break elif prod: # nested Sequence, Choice prods.append(prod) else: # nested exhausted, try in parent if len(prods) > 1: prods.pop() else: raise NoMatch('No match') except NoMatch as e: if stopIfNoMoreMatch: # and token: # print "\t1stopIfNoMoreMatch", e, token, prod, 'PUSHING' # tokenizer.push(token) savedTokens.append(token) stopall = True else: wellformed = False self._log.error('%s: %s: %r' % (name, e, token)) break except ParseError as e: # needed??? if stopIfNoMoreMatch: # and token: # print "\t2stopIfNoMoreMatch", e, token, prod tokenizer.push(token) stopall = True else: wellformed = False self._log.error('%s: %s: %r' % (name, e, token)) break else: # print '\t1', debug, 'PROD', prod # may stop next time, once set stays stopIfNoMoreMatch = prod.stopIfNoMoreMatch or stopIfNoMoreMatch # process prod if prod.toSeq and not prod.stopAndKeep: type_, val = prod.toSeq(token, tokens) if val is not None: seq.append(val, type_, line, col) if prod.toStore: if not prod.storeToken: prod.toStore(store, seq[-1]) else: # workaround for now for old style token # parsing! # TODO: remove when all new style prod.toStore(store, token) if prod.stop: # stop here and ignore following tokens # EOF? or end of e.g. func ")" break if prod.stopAndKeep: # e.g. ; # stop here and ignore following tokens # but keep this token for next run # TODO: CHECK!!!! tokenizer.push(token) tokens = itertools.chain(token, tokens) stopall = True break if prod.nextSor: # following is S or other token (e.g. ",")? # remove S if tokens = self._SorTokens(tokens, ',/') defaultS = False else: defaultS = True lastprod = prod # print debug, 'parse done', token, stopall, '\n' if not stopall: # stop immediately while True: # all productions exhausted? try: prod = prods[-1].nextProd(token=None) except Done: # ok prod = None except Missing as e: prod = None # last was a S operator which may End a Sequence, then ok if hasattr(lastprod, 'mayEnd') and not lastprod.mayEnd: wellformed = False self._log.error('%s: %s' % (name, e)) except ParseError as e: prod = None wellformed = False self._log.error('%s: %s' % (name, e)) else: if prods[-1].optional: prod = None elif prod and prod.optional: # ignore optional continue if prod and not prod.optional: wellformed = False self._log.error('%s: Missing token for production %r' % (name, text_type(prod))) break elif len(prods) > 1: # nested exhausted, next in parent prods.pop() else: break if not emptyOk and not len(seq): self._log.error('No content to parse.') return False, [], None, None # trim S from end seq.rstrip() return wellformed, seq, store, tokens class PreDef(object): """Predefined Prod definition for use in productions definition for ProdParser instances. """ types = css_parser.cssproductions.CSSProductions reHexcolor = re.compile(r'^\#(?:[0-9abcdefABCDEF]{3}|[0-9abcdefABCDEF]{6})$') @staticmethod def calc(toSeq=None, nextSor=False): return Prod(name='calcfunction', match=lambda t, v: 'calc(' == css_parser.helper.normalize(v), toSeq=toSeq, nextSor=nextSor) @staticmethod def char(name='char', char=',', toSeq=None, stop=False, stopAndKeep=False, mayEnd=False, stopIfNoMoreMatch=False, optional=False, # WAS: optional=True, nextSor=False): "any CHAR" return Prod(name=name, match=lambda t, v: v == char, toSeq=toSeq, stop=stop, stopAndKeep=stopAndKeep, mayEnd=mayEnd, stopIfNoMoreMatch=stopIfNoMoreMatch, optional=optional, nextSor=nextSor) @staticmethod def comma(optional=False, toSeq=None): return PreDef.char('comma', ',', optional=optional, toSeq=toSeq) @staticmethod def comment(parent=None): return Prod(name='comment', match=lambda t, v: t == 'COMMENT', toSeq=lambda t, tokens: (t[0], css_parser.css.CSSComment([1], parentRule=parent)), optional=True ) @staticmethod def dimension(nextSor=False, stop=False): return Prod(name='dimension', match=lambda t, v: t == PreDef.types.DIMENSION, toSeq=lambda t, tokens: (t[0], css_parser.helper.normalize(t[1])), stop=stop, nextSor=nextSor) @staticmethod def function(toSeq=None, nextSor=False, toStore=None): return Prod(name='function', match=lambda t, v: t == PreDef.types.FUNCTION, toStore=toStore, toSeq=toSeq, nextSor=nextSor) @staticmethod def funcEnd(stop=False, mayEnd=False): ")" return PreDef.char('end FUNC ")"', ')', stop=stop, mayEnd=mayEnd) @staticmethod def hexcolor(stop=False, nextSor=False): "#123 or #123456" return Prod(name='HEX color', match=lambda t, v: ( t == PreDef.types.HASH and PreDef.reHexcolor.match(v) ), stop=stop, nextSor=nextSor) @staticmethod def ident(stop=False, toStore=None, nextSor=False): return Prod(name='ident', match=lambda t, v: t == PreDef.types.IDENT, stop=stop, toStore=toStore, nextSor=nextSor) @staticmethod def number(stop=False, toSeq=None, nextSor=False): return Prod(name='number', match=lambda t, v: t == PreDef.types.NUMBER, stop=stop, toSeq=toSeq, nextSor=nextSor) @staticmethod def percentage(stop=False, toSeq=None, nextSor=False): return Prod(name='percentage', match=lambda t, v: t == PreDef.types.PERCENTAGE, stop=stop, toSeq=toSeq, nextSor=nextSor) @staticmethod def string(stop=False, nextSor=False): "string delimiters are removed by default" return Prod(name='string', match=lambda t, v: t == PreDef.types.STRING, toSeq=lambda t, tokens: (t[0], css_parser.helper.stringvalue(t[1])), stop=stop, nextSor=nextSor) @staticmethod def S(name='whitespace', toSeq=None, optional=False): return Prod(name=name, match=lambda t, v: t == PreDef.types.S, toSeq=toSeq, optional=optional, mayEnd=True) @staticmethod def unary(stop=False, toSeq=None, nextSor=False): "+ or -" return Prod(name='unary +-', match=lambda t, v: v in ('+', '-'), optional=True, stop=stop, toSeq=toSeq, nextSor=nextSor) @staticmethod def uri(stop=False, nextSor=False): "'url(' and ')' are removed and URI is stripped" return Prod(name='URI', match=lambda t, v: t == PreDef.types.URI, toSeq=lambda t, tokens: (t[0], css_parser.helper.urivalue(t[1])), stop=stop, nextSor=nextSor) @staticmethod def unicode_range(stop=False, nextSor=False): "u+123456-abc normalized to lower `u`" return Prod(name='unicode-range', match=lambda t, v: t == PreDef.types.UNICODE_RANGE, toSeq=lambda t, tokens: (t[0], t[1].lower()), stop=stop, nextSor=nextSor ) @staticmethod def ratio(stop=False, nextSor=False): "positive integer / positive integer" return Prod(name='ratio', match=lambda t, v: t == PreDef.types.RATIO, toSeq=lambda t, tokens: (t[0], t[1].lower()), stop=stop, nextSor=nextSor ) @staticmethod def variable(toSeq=None, stop=False, nextSor=False, toStore=None): return Prod(name='variable', match=lambda t, v: 'var(' == css_parser.helper.normalize(v), toSeq=toSeq, toStore=toStore, stop=stop, nextSor=nextSor) # used for MarginRule for now: @staticmethod def unknownrule(name='@', toStore=None): """@rule dummy (matches ATKEYWORD to remove unknown rule tokens from stream:: @x; @x {...} no nested yet! """ def rule(tokens): saved = [] for t in tokens: saved.append(t) if (t[1] == '}' or t[1] == ';'): return css_parser.css.CSSUnknownRule(saved) return Prod(name=name, match=lambda t, v: t == 'ATKEYWORD', toSeq=lambda t, tokens: ('CSSUnknownRule', rule(pushtoken(t, tokens)) ), toStore=toStore ) css-parser-1.0.4/src/css_parser/profiles.py0000644000175000017500000010461313407702010021177 0ustar kovidkovid00000000000000from __future__ import unicode_literals, division, absolute_import, print_function from css_parser import util import sys import re """CSS profiles. Profiles is based on code by Kevin D. Smith, orginally used as cssvalues, thanks! """ __all__ = ['Profiles'] __docformat__ = 'restructuredtext' __version__ = '$Id: cssproperties.py 1116 2008-03-05 13:52:23Z cthedot $' if sys.version_info[0] >= 3: string_type = str else: string_type = basestring def as_list(p): if isinstance(p, list): return p return list(p) class NoSuchProfileException(Exception): """Raised if no profile with given name is found""" pass class Profiles(object): """ All profiles used for validation. ``css_parser.profile`` is a preset object of this class and used by all properties for validation. Predefined profiles are (use :meth:`~css_parser.profiles.Profiles.propertiesByProfile` to get a list of defined properties): :attr:`~css_parser.profiles.Profiles.CSS_LEVEL_2` Properties defined by CSS2.1 :attr:`~css_parser.profiles.Profiles.CSS3_BASIC_USER_INTERFACE` Currently resize and outline properties only :attr:`~css_parser.profiles.Profiles.CSS3_BOX` Currently overflow related properties only :attr:`~css_parser.profiles.Profiles.CSS3_COLOR` CSS 3 color properties :attr:`~css_parser.profiles.Profiles.CSS3_PAGED_MEDIA` As defined at http://www.w3.org/TR/css3-page/ (at 090307) Predefined macros are: :attr:`~css_parser.profiles.Profiles._TOKEN_MACROS` Macros containing the token values as defined to CSS2 :attr:`~css_parser.profiles.Profiles._MACROS` Additional general macros. If you want to redefine any of these macros do this in your custom macros. """ CSS_LEVEL_2 = 'CSS Level 2.1' CSS3_BACKGROUNDS_AND_BORDERS = 'CSS Backgrounds and Borders Module Level 3' CSS3_BASIC_USER_INTERFACE = 'CSS3 Basic User Interface Module' CSS3_BOX = CSS_BOX_LEVEL_3 = 'CSS Box Module Level 3' CSS3_COLOR = CSS_COLOR_LEVEL_3 = 'CSS Color Module Level 3' CSS3_FONTS = 'CSS Fonts Module Level 3' CSS3_FONT_FACE = 'CSS Fonts Module Level 3 @font-face properties' CSS3_PAGED_MEDIA = 'CSS3 Paged Media Module' CSS3_TEXT = 'CSS Text Level 3' _TOKEN_MACROS = { 'ident': r'[-]?{nmstart}{nmchar}*', 'name': r'{nmchar}+', 'nmstart': r'[_a-z]|{nonascii}|{escape}', 'nonascii': r'[^\0-\177]', 'unicode': r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?', 'escape': r'{unicode}|\\[ -~\u0080-\u01ff]', # 'escape': r'{unicode}|\\[ -~\200-\4177777]', 'int': r'[-]?\d+', 'nmchar': r'[\w-]|{nonascii}|{escape}', 'num': r'[-]?\d+|[-]?\d*\.\d+', 'positivenum': r'\d+|\d*\.\d+', 'number': r'{num}', 'string': r'{string1}|{string2}', 'string1': r'"(\\\"|[^\"])*"', 'uri': r'url\({w}({string}|(\\\)|[^\)])+){w}\)', 'string2': r"'(\\\'|[^\'])*'", 'nl': r'\n|\r\n|\r|\f', 'w': r'\s*', } _MACROS = { 'hexcolor': r'#[0-9a-f]{3}|#[0-9a-f]{6}', 'rgbcolor': r'rgb\({w}{int}{w}\,{w}{int}{w}\,{w}{int}{w}\)|rgb\({w}{num}%{w}\,{w}{num}%{w}\,{w}{num}%{w}\)', 'namedcolor': r'(transparent|orange|maroon|red|orange|yellow|olive|purple|fuchsia|white|lime|green|navy|blue|aqua|teal|black|silver|gray)', # noqa 'uicolor': r'(ActiveBorder|ActiveCaption|AppWorkspace|Background|ButtonFace|ButtonHighlight|ButtonShadow|ButtonText|CaptionText|GrayText|Highlight|HighlightText|InactiveBorder|InactiveCaption|InactiveCaptionText|InfoBackground|InfoText|Menu|MenuText|Scrollbar|ThreeDDarkShadow|ThreeDFace|ThreeDHighlight|ThreeDLightShadow|ThreeDShadow|Window|WindowFrame|WindowText)', # noqa 'color': r'{namedcolor}|{hexcolor}|{rgbcolor}|{uicolor}', 'integer': r'{int}', 'length': r'0|{num}(em|ex|px|in|cm|mm|pt|pc)', 'positivelength': r'0|{positivenum}(em|ex|px|in|cm|mm|pt|pc)', 'angle': r'0|{num}(deg|grad|rad)', 'time': r'0|{num}m?s', 'frequency': r'0|{num}k?Hz', 'percentage': r'{num}%', 'shadow': '(inset)?{w}{length}{w}{length}{w}{length}?{w}{length}?{w}{color}?' } def __init__(self, log=None): """A few profiles are predefined.""" self._log = log # macro cache self._usedMacros = Profiles._TOKEN_MACROS.copy() self._usedMacros.update(Profiles._MACROS.copy()) # to keep order, REFACTOR! self._profileNames = [] # for reset if macro changes self._rawProfiles = {} # already compiled profiles: {profile: {property: checkfunc, ...}, ...} self._profilesProperties = {} self._defaultProfiles = None self.addProfiles([(self.CSS_LEVEL_2, properties[self.CSS_LEVEL_2], macros[self.CSS_LEVEL_2] ), (self.CSS3_BACKGROUNDS_AND_BORDERS, properties[self.CSS3_BACKGROUNDS_AND_BORDERS], macros[self.CSS3_BACKGROUNDS_AND_BORDERS] ), (self.CSS3_BASIC_USER_INTERFACE, properties[self.CSS3_BASIC_USER_INTERFACE], macros[self.CSS3_BASIC_USER_INTERFACE] ), (self.CSS3_BOX, properties[self.CSS3_BOX], macros[self.CSS3_BOX] ), (self.CSS3_COLOR, properties[self.CSS3_COLOR], macros[self.CSS3_COLOR] ), (self.CSS3_FONTS, properties[self.CSS3_FONTS], macros[self.CSS3_FONTS] ), # new object for font-face only? (self.CSS3_FONT_FACE, properties[self.CSS3_FONT_FACE], macros[self.CSS3_FONTS] ), (self.CSS3_PAGED_MEDIA, properties[self.CSS3_PAGED_MEDIA], macros[self.CSS3_PAGED_MEDIA] ), (self.CSS3_TEXT, properties[self.CSS3_TEXT], macros[self.CSS3_TEXT] ) ]) self.__update_knownNames() def _expand_macros(self, dictionary, macros): """Expand macros in token dictionary""" def macro_value(m): return '(?:%s)' % macros[m.groupdict()['macro']] for key, value in as_list(dictionary.items()): if not hasattr(value, '__call__'): while re.search(r'{[a-z][a-z0-9-]*}', value): value = re.sub(r'{(?P[a-z][a-z0-9-]*)}', macro_value, value) dictionary[key] = value return dictionary def _compile_regexes(self, dictionary): """Compile all regular expressions into callable objects""" for key, value in as_list(dictionary.items()): if not hasattr(value, '__call__'): # Compiling them now will slow down the css_parser import time, # even if css_parser is not needed. We lazily compile them the # first time they're needed. # https://bitbucket.org/cthedot/css_parser/issues/72 value = util.LazyRegex('^(?:%s)$' % value, re.I) dictionary[key] = value return dictionary def __update_knownNames(self): self._knownNames = [] for properties in as_list(self._profilesProperties.values()): self._knownNames.extend(as_list(properties.keys())) def _getDefaultProfiles(self): "If not explicitly set same as Profiles.profiles but in reverse order." if not self._defaultProfiles: return self.profiles else: return self._defaultProfiles def _setDefaultProfiles(self, profiles): "profiles may be a single or a list of profile names" # under python 2.X this was originally basestring but given unicode literals ... if isinstance(profiles, string_type): self._defaultProfiles = (profiles,) else: self._defaultProfiles = profiles defaultProfiles = property(_getDefaultProfiles, _setDefaultProfiles, doc="Names of profiles to use for validation." "To use e.g. the CSS2 profile set " "``css_parser.profile.defaultProfiles = " "css_parser.profile.CSS_LEVEL_2``") profiles = property(lambda self: self._profileNames, doc='Names of all profiles in order as defined.') knownNames = property(lambda self: self._knownNames, doc="All known property names of all profiles.") def _resetProperties(self, newMacros=None): "reset all props from raw values as changes in macros happened" # base macros = Profiles._TOKEN_MACROS.copy() macros.update(Profiles._MACROS.copy()) # former for profile in self._profileNames: macros.update(self._rawProfiles[profile]['macros']) # new if newMacros: macros.update(newMacros) # reset properties self._profilesProperties.clear() for profile in self._profileNames: properties = self._expand_macros( # keep raw self._rawProfiles[profile]['properties'].copy(), macros) self._profilesProperties[profile] = self._compile_regexes(properties) # save self._usedMacros = macros def addProfiles(self, profiles): """Add a list of profiles at once. Useful as if profiles define custom macros these are used in one go. Using `addProfile` instead my be **very** slow instead. """ # add macros for profile, properties, macros in profiles: if macros: self._usedMacros.update(macros) self._rawProfiles[profile] = {'macros': macros.copy()} # only add new properties for profile, properties, macros in profiles: self.addProfile(profile, properties.copy(), None) def addProfile(self, profile, properties, macros=None): """Add a new profile with name `profile` (e.g. 'CSS level 2') and the given `properties`. :param profile: the new `profile`'s name :param properties: a dictionary of ``{ property-name: propery-value }`` items where property-value is a regex which may use macros defined in given ``macros`` or the standard macros Profiles.tokens and Profiles.generalvalues. ``propery-value`` may also be a function which takes a single argument which is the value to validate and which should return True or False. Any exceptions which may be raised during this custom validation are reported or raised as all other css_parser exceptions depending on css_parser.log.raiseExceptions which e.g during parsing normally is False so the exceptions would be logged only. :param macros: may be used in the given properties definitions. There are some predefined basic macros which may always be used in :attr:`Profiles._TOKEN_MACROS` and :attr:`Profiles._MACROS`. """ if macros: # check if known macros would change and if yes reset properties if len(set(macros.keys()).intersection(as_list(self._usedMacros.keys()))): self._resetProperties(newMacros=macros) else: # no replacement, simply continue self._usedMacros.update(macros) else: # might have been set by addProfiles before try: macros = self._rawProfiles[profile]['macros'] except KeyError: macros = {} # save name and raw props/macros if macros change to completely reset self._profileNames.append(profile) self._rawProfiles[profile] = {'properties': properties.copy(), 'macros': macros.copy()} # prepare and save properties properties = self._expand_macros(properties, self._usedMacros) self._profilesProperties[profile] = self._compile_regexes(properties) self.__update_knownNames() def removeProfile(self, profile=None, all=False): """Remove `profile` or remove `all` profiles. If the removed profile used custom macros all remaining profiles are reset to reflect the macro changes. This may be quite an expensive operation! :param profile: profile name to remove :param all: if ``True`` removes all profiles to start with a clean state :exceptions: - :exc:`css_parser.profiles.NoSuchProfileException`: If given `profile` cannot be found. """ if all: self._profilesProperties.clear() self._rawProfiles.clear() del self._profileNames[:] else: reset = False try: if (self._rawProfiles[profile]['macros']): reset = True del self._profilesProperties[profile] del self._rawProfiles[profile] del self._profileNames[self._profileNames.index(profile)] except KeyError: raise NoSuchProfileException('No profile %r.' % profile) else: if reset: # reset properties as macros were removed self._resetProperties() self.__update_knownNames() def propertiesByProfile(self, profiles=None): """Generator: Yield property names, if no `profiles` is given all profile's properties are used. :param profiles: a single profile name or a list of names. """ if not profiles: profiles = self.profiles elif isinstance(profiles, string_type): profiles = (profiles, ) try: for profile in sorted(profiles): for name in sorted(self._profilesProperties[profile].keys()): yield name except KeyError as e: raise NoSuchProfileException(e) def validate(self, name, value): """Check if `value` is valid for given property `name` using **any** profile. :param name: a property name :param value: a CSS value (string) :returns: if the `value` is valid for the given property `name` in any profile """ for profile in self.profiles: if name in self._profilesProperties[profile]: try: # custom validation errors are caught r = bool(self._profilesProperties[profile][name](value)) except Exception as e: # TODO: more specific exception? # Validate should not be fatal though! self._log.error(e, error=Exception) r = False if r: return r return False def validateWithProfile(self, name, value, profiles=None): """Check if `value` is valid for given property `name` returning ``(valid, profile)``. :param name: a property name :param value: a CSS value (string) :param profiles: internal parameter used by Property.validate only :returns: ``valid, matching, profiles`` where ``valid`` is if the `value` is valid for the given property `name` in any profile, ``matching==True`` if it is valid in the given `profiles` and ``profiles`` the profile names for which the value is valid (or ``[]`` if not valid at all) Example:: >>> css_parser.profile.defaultProfiles = css_parser.profile.CSS_LEVEL_2 >>> print css_parser.profile.validateWithProfile('color', 'rgba(1,1,1,1)') (True, False, Profiles.CSS3_COLOR) """ if name not in self.knownNames: return False, False, [] else: if not profiles: profiles = self.defaultProfiles elif isinstance(profiles, string_type): profiles = (profiles, ) for profilename in reversed(profiles): # check given profiles if name in self._profilesProperties[profilename]: validate = self._profilesProperties[profilename][name] try: if validate(value): return True, True, [profilename] except Exception as e: self._log.error(e, error=Exception) for profilename in (p for p in self._profileNames if p not in profiles): # check remaining profiles as well if name in self._profilesProperties[profilename]: validate = self._profilesProperties[profilename][name] try: if validate(value): return True, False, [profilename] except Exception as e: self._log.error(e, error=Exception) names = [] for profilename, properties in as_list(self._profilesProperties.items()): # return profile to which name belongs if name in as_list(properties.keys()): names.append(profilename) names.sort() return False, False, names properties = {} macros = {} """ Define some regular expression fragments that will be used as macros within the CSS property value regular expressions. """ macros[Profiles.CSS_LEVEL_2] = { 'background-color': r'{color}|transparent|inherit', 'background-image': r'{uri}|none|inherit', 'background-position': r'({percentage}|{length}|left|center|right)(\s*({percentage}|{length}|top|center|bottom))?|((top|center|bottom)\s*(left|center|right)?)|((left|center|right)\s*(top|center|bottom)?)|inherit', # noqa 'background-repeat': r'repeat|repeat-x|repeat-y|no-repeat|inherit', 'background-attachment': r'scroll|fixed|inherit', 'shape': r'rect\(({w}({length}|auto}){w},){3}{w}({length}|auto){w}\)', 'counter': r'counter\({w}{ident}{w}(?:,{w}{list-style-type}{w})?\)', 'identifier': r'{ident}', 'family-name': r'{string}|({ident}(\s+{ident})*)', 'generic-family': r'serif|sans-serif|cursive|fantasy|monospace', 'absolute-size': r'(x?x-)?(small|large)|medium', 'relative-size': r'smaller|larger', 'font-family': r'({family-name}({w},{w}{family-name})*)|inherit', 'font-size': r'{absolute-size}|{relative-size}|{positivelength}|{percentage}|inherit', 'font-style': r'normal|italic|oblique|inherit', 'font-variant': r'normal|small-caps|inherit', 'font-weight': r'normal|bold|bolder|lighter|[1-9]00|inherit', 'line-height': r'normal|{number}|{length}|{percentage}|inherit', 'list-style-image': r'{uri}|none|inherit', 'list-style-position': r'inside|outside|inherit', 'list-style-type': r'disc|circle|square|decimal|decimal-leading-zero|lower-roman|upper-roman|lower-greek|lower-(latin|alpha)|upper-(latin|alpha)|armenian|georgian|none|inherit', # noqa 'margin-width': r'{length}|{percentage}|auto', 'padding-width': r'{length}|{percentage}', 'specific-voice': r'{ident}', 'generic-voice': r'male|female|child', 'content': r'{string}|{uri}|{counter}|attr\({w}{ident}{w}\)|open-quote|close-quote|no-open-quote|no-close-quote', 'background-attrs': r'{background-color}|{background-image}|{background-repeat}|{background-attachment}|{background-position}', # noqa 'list-attrs': r'{list-style-type}|{list-style-position}|{list-style-image}', 'font-attrs': r'{font-style}|{font-variant}|{font-weight}', 'text-attrs': r'underline|overline|line-through|blink', 'overflow': r'visible|hidden|scroll|auto|inherit', } """ Define the regular expressions for validation all CSS values """ properties[Profiles.CSS_LEVEL_2] = { 'azimuth': r'{angle}|(behind\s+)?(left-side|far-left|left|center-left|center|center-right|right|far-right|right-side)(\s+behind)?|behind|leftwards|rightwards|inherit', # noqa 'background-attachment': r'{background-attachment}', 'background-color': r'{background-color}', 'background-image': r'{background-image}', 'background-position': r'{background-position}', 'background-repeat': r'{background-repeat}', # Each piece should only be allowed one time 'background': r'{background-attrs}(\s+{background-attrs})*|inherit', 'border-collapse': r'collapse|separate|inherit', 'border-spacing': r'{length}(\s+{length})?|inherit', 'bottom': r'{length}|{percentage}|auto|inherit', 'caption-side': r'top|bottom|inherit', 'clear': r'none|left|right|both|inherit', 'clip': r'{shape}|auto|inherit', 'color': r'{color}|inherit', 'content': r'none|normal|{content}(\s+{content})*|inherit', 'counter-increment': r'({ident}(\s+{integer})?)(\s+({ident}(\s+{integer})?))*|none|inherit', 'counter-reset': r'({ident}(\s+{integer})?)(\s+({ident}(\s+{integer})?))*|none|inherit', 'cue-after': r'{uri}|none|inherit', 'cue-before': r'{uri}|none|inherit', 'cue': r'({uri}|none|inherit){1,2}|inherit', 'direction': r'ltr|rtl|inherit', 'display': r'inline|block|list-item|run-in|inline-block|table|inline-table|table-row-group|table-header-group|table-barter-group|table-row|table-column-group|table-column|table-cell|table-caption|none|inherit', # noqa 'elevation': r'{angle}|below|level|above|higher|lower|inherit', 'empty-cells': r'show|hide|inherit', 'float': r'left|right|none|inherit', 'font-family': r'{font-family}', 'font-size': r'{font-size}', 'font-style': r'{font-style}', 'font-variant': r'{font-variant}', 'font-weight': r'{font-weight}', 'font': r'(({font-attrs}\s+)*{font-size}({w}/{w}{line-height})?\s+{font-family})|caption|icon|menu|message-box|small-caption|status-bar|inherit', # noqa 'height': r'{length}|{percentage}|auto|inherit', 'left': r'{length}|{percentage}|auto|inherit', 'letter-spacing': r'normal|{length}|inherit', 'line-height': r'{line-height}', 'list-style-image': r'{list-style-image}', 'list-style-position': r'{list-style-position}', 'list-style-type': r'{list-style-type}', 'list-style': r'{list-attrs}(\s+{list-attrs})*|inherit', 'margin-right': r'{margin-width}|inherit', 'margin-left': r'{margin-width}|inherit', 'margin-top': r'{margin-width}|inherit', 'margin-bottom': r'{margin-width}|inherit', 'margin': r'{margin-width}(\s+{margin-width}){0,3}|inherit', 'max-height': r'{length}|{percentage}|none|inherit', 'max-width': r'{length}|{percentage}|none|inherit', 'min-height': r'{length}|{percentage}|none|inherit', 'min-width': r'{length}|{percentage}|none|inherit', 'orphans': r'{integer}|inherit', 'overflow': r'{overflow}', 'padding-top': r'{padding-width}|inherit', 'padding-right': r'{padding-width}|inherit', 'padding-bottom': r'{padding-width}|inherit', 'padding-left': r'{padding-width}|inherit', 'padding': r'{padding-width}(\s+{padding-width}){0,3}|inherit', 'page-break-after': r'auto|always|avoid|left|right|inherit', 'page-break-before': r'auto|always|avoid|left|right|inherit', 'page-break-inside': r'avoid|auto|inherit', 'pause-after': r'{time}|{percentage}|inherit', 'pause-before': r'{time}|{percentage}|inherit', 'pause': r'({time}|{percentage}){1,2}|inherit', 'pitch-range': r'{number}|inherit', 'pitch': r'{frequency}|x-low|low|medium|high|x-high|inherit', 'play-during': r'{uri}(\s+(mix|repeat))*|auto|none|inherit', 'position': r'static|relative|absolute|fixed|inherit', 'quotes': r'({string}\s+{string})(\s+{string}\s+{string})*|none|inherit', 'richness': r'{number}|inherit', 'right': r'{length}|{percentage}|auto|inherit', 'speak-header': r'once|always|inherit', 'speak-numeral': r'digits|continuous|inherit', 'speak-punctuation': r'code|none|inherit', 'speak': r'normal|none|spell-out|inherit', 'speech-rate': r'{number}|x-slow|slow|medium|fast|x-fast|faster|slower|inherit', 'stress': r'{number}|inherit', 'table-layout': r'auto|fixed|inherit', 'text-align': r'left|right|center|justify|inherit', 'text-decoration': r'none|{text-attrs}(\s+{text-attrs})*|inherit', 'text-indent': r'{length}|{percentage}|inherit', 'text-transform': r'capitalize|uppercase|lowercase|none|inherit', 'top': r'{length}|{percentage}|auto|inherit', 'unicode-bidi': r'normal|embed|bidi-override|inherit', 'vertical-align': r'baseline|sub|super|top|text-top|middle|bottom|text-bottom|{percentage}|{length}|inherit', 'visibility': r'visible|hidden|collapse|inherit', 'voice-family': r'({specific-voice}|{generic-voice}{w},{w})*({specific-voice}|{generic-voice})|inherit', 'volume': r'{number}|{percentage}|silent|x-soft|soft|medium|loud|x-loud|inherit', 'white-space': r'normal|pre|nowrap|pre-wrap|pre-line|inherit', 'widows': r'{integer}|inherit', 'width': r'{length}|{percentage}|auto|inherit', 'word-spacing': r'normal|{length}|inherit', 'z-index': r'auto|{integer}|inherit', } macros[Profiles.CSS3_BACKGROUNDS_AND_BORDERS] = { 'border-style': 'none|hidden|dotted|dashed|solid|double|groove|ridge|inset|outset', 'border-width': '{length}|thin|medium|thick', 'b1': r'{border-width}?({w}{border-style})?({w}{color})?', 'b2': r'{border-width}?({w}{color})?({w}{border-style})?', 'b3': r'{border-style}?({w}{border-width})?({w}{color})?', 'b4': r'{border-style}?({w}{color})?({w}{border-width})?', 'b5': r'{color}?({w}{border-style})?({w}{border-width})?', 'b6': r'{color}?({w}{border-width})?({w}{border-style})?', 'border-attrs': r'{b1}|{b2}|{b3}|{b4}|{b5}|{b6}', 'border-radius-part': r'({length}|{percentage})(\s+({length}|{percentage}))?' } properties[Profiles.CSS3_BACKGROUNDS_AND_BORDERS] = { 'border-color': r'({color}|transparent)(\s+({color}|transparent)){0,3}|inherit', 'border-style': r'{border-style}(\s+{border-style}){0,3}|inherit', 'border-top': r'{border-attrs}|inherit', 'border-right': r'{border-attrs}|inherit', 'border-bottom': r'{border-attrs}|inherit', 'border-left': r'{border-attrs}|inherit', 'border-top-color': r'{color}|transparent|inherit', 'border-right-color': r'{color}|transparent|inherit', 'border-bottom-color': r'{color}|transparent|inherit', 'border-left-color': r'{color}|transparent|inherit', 'border-top-style': r'{border-style}|inherit', 'border-right-style': r'{border-style}|inherit', 'border-bottom-style': r'{border-style}|inherit', 'border-left-style': r'{border-style}|inherit', 'border-top-width': r'{border-width}|inherit', 'border-right-width': r'{border-width}|inherit', 'border-bottom-width': r'{border-width}|inherit', 'border-left-width': r'{border-width}|inherit', 'border-width': r'{border-width}(\s+{border-width}){0,3}|inherit', 'border': r'{border-attrs}|inherit', 'border-top-right-radius': '{border-radius-part}', 'border-bottom-right-radius': '{border-radius-part}', 'border-bottom-left-radius': '{border-radius-part}', 'border-top-left-radius': '{border-radius-part}', 'border-radius': '({length}{w}|{percentage}{w}){1,4}(/{w}({length}{w}|{percentage}{w}){1,4})?', 'box-shadow': 'none|{shadow}({w},{w}{shadow})*', } # CSS3 Basic User Interface Module macros[Profiles.CSS3_BASIC_USER_INTERFACE] = { 'border-style': macros[Profiles.CSS3_BACKGROUNDS_AND_BORDERS]['border-style'], 'border-width': macros[Profiles.CSS3_BACKGROUNDS_AND_BORDERS]['border-width'], 'outline-1': r'{outline-color}(\s+{outline-style})?(\s+{outline-width})?', 'outline-2': r'{outline-color}(\s+{outline-width})?(\s+{outline-style})?', 'outline-3': r'{outline-style}(\s+{outline-color})?(\s+{outline-width})?', 'outline-4': r'{outline-style}(\s+{outline-width})?(\s+{outline-color})?', 'outline-5': r'{outline-width}(\s+{outline-color})?(\s+{outline-style})?', 'outline-6': r'{outline-width}(\s+{outline-style})?(\s+{outline-color})?', 'outline-color': r'{color}|invert|inherit', 'outline-style': r'auto|{border-style}|inherit', 'outline-width': r'{border-width}|inherit', } properties[Profiles.CSS3_BASIC_USER_INTERFACE] = { 'box-sizing': r'content-box|border-box', 'cursor': r'((({uri}{w}({number}{w}{number}{w})?,{w})*)?(auto|default|none|context-menu|help|pointer|progress|wait|cell|crosshair|text|vertical-text|alias|copy|move|no-drop|not-allowed|(e|n|ne|nw|s|se|sw|w|ew|ns|nesw|nwse|col|row)-resize|all-scroll))|inherit', # noqa 'nav-index': r'auto|{number}|inherit', 'outline-color': r'{outline-color}', 'outline-style': r'{outline-style}', 'outline-width': r'{outline-width}', 'outline-offset': r'{length}|inherit', # 'outline': r'{outline-attrs}(\s+{outline-attrs})*|inherit', 'outline': r'{outline-1}|{outline-2}|{outline-3}|{outline-4}|{outline-5}|{outline-6}|inherit', 'resize': 'none|both|horizontal|vertical|inherit', } # CSS Box Module Level 3 macros[Profiles.CSS3_BOX] = { 'overflow': macros[Profiles.CSS_LEVEL_2]['overflow'] } properties[Profiles.CSS3_BOX] = { 'overflow': '{overflow}{w}{overflow}?|inherit', 'overflow-x': '{overflow}|inherit', 'overflow-y': '{overflow}|inherit' } # CSS Color Module Level 3 macros[Profiles.CSS3_COLOR] = { # orange and transparent in CSS 2.1 'namedcolor': r'(currentcolor|transparent|aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|orange|purple|red|silver|teal|white|yellow)', # noqa # orange? 'rgbacolor': r'rgba\({w}{int}{w}\,{w}{int}{w}\,{w}{int}{w}\,{w}{num}{w}\)|rgba\({w}{num}%{w}\,{w}{num}%{w}\,{w}{num}%{w}\,{w}{num}{w}\)', # noqa 'hslcolor': r'hsl\({w}{int}{w}\,{w}{num}%{w}\,{w}{num}%{w}\)|hsla\({w}{int}{w}\,{w}{num}%{w}\,{w}{num}%{w}\,{w}{num}{w}\)', # noqa 'x11color': r'aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen', # noqa 'uicolor': r'(ActiveBorder|ActiveCaption|AppWorkspace|Background|ButtonFace|ButtonHighlight|ButtonShadow|ButtonText|CaptionText|GrayText|Highlight|HighlightText|InactiveBorder|InactiveCaption|InactiveCaptionText|InfoBackground|InfoText|Menu|MenuText|Scrollbar|ThreeDDarkShadow|ThreeDFace|ThreeDHighlight|ThreeDLightShadow|ThreeDShadow|Window|WindowFrame|WindowText)', # noqa 'color': r'{namedcolor}|{hexcolor}|{rgbcolor}|{rgbacolor}|{hslcolor}|{x11color}|inherit', } properties[Profiles.CSS3_COLOR] = { 'opacity': r'{num}|inherit', } # CSS Fonts Module Level 3 http://www.w3.org/TR/css3-fonts/ macros[Profiles.CSS3_FONTS] = { # 'family-name': r'{string}|{ident}', 'family-name': r'{string}|({ident}(\s+{ident})*)', 'font-face-name': r'local\({w}{family-name}{w}\)', 'font-stretch-names': r'(ultra-condensed|extra-condensed|condensed|semi-condensed|semi-expanded|expanded|extra-expanded|ultra-expanded)', # noqa 'unicode-range': r'[uU]\+[0-9A-Fa-f?]{1,6}(\-[0-9A-Fa-f]{1,6})?' } properties[Profiles.CSS3_FONTS] = { 'font-size-adjust': r'{number}|none|inherit', 'font-stretch': r'normal|wider|narrower|{font-stretch-names}|inherit' } properties[Profiles.CSS3_FONT_FACE] = { 'font-family': '{family-name}', 'font-stretch': r'{font-stretch-names}', 'font-style': r'normal|italic|oblique', 'font-weight': r'normal|bold|[1-9]00', 'src': r'({uri}{w}(format\({w}{string}{w}(\,{w}{string}{w})*\))?|{font-face-name})({w},{w}({uri}{w}(format\({w}{string}{w}(\,{w}{string}{w})*\))?|{font-face-name}))*', # noqa 'unicode-range': '{unicode-range}({w},{w}{unicode-range})*' } # CSS3 Paged Media macros[Profiles.CSS3_PAGED_MEDIA] = { 'page-size': 'a5|a4|a3|b5|b4|letter|legal|ledger', 'page-orientation': 'portrait|landscape', 'page-1': '{page-size}(?:{w}{page-orientation})?', 'page-2': '{page-orientation}(?:{w}{page-size})?', 'page-size-orientation': '{page-1}|{page-2}', 'pagebreak': 'auto|always|avoid|left|right' } properties[Profiles.CSS3_PAGED_MEDIA] = { 'fit': 'fill|hidden|meet|slice', 'fit-position': r'auto|(({percentage}|{length})(\s*({percentage}|{length}))?|((top|center|bottom)\s*(left|center|right)?)|((left|center|right)\s*(top|center|bottom)?))', # noqa 'image-orientation': 'auto|{angle}', 'orphans': r'{integer}|inherit', 'page': 'auto|{ident}', 'page-break-before': '{pagebreak}|inherit', 'page-break-after': '{pagebreak}|inherit', 'page-break-inside': 'auto|avoid|inherit', 'size': '({length}{w}){1,2}|auto|{page-size-orientation}', 'widows': r'{integer}|inherit' } macros[Profiles.CSS3_TEXT] = { } properties[Profiles.CSS3_TEXT] = { 'text-shadow': 'none|{shadow}({w},{w}{shadow})*', } css-parser-1.0.4/src/css_parser/sac.py0000644000175000017500000004172613407702010020127 0ustar kovidkovid00000000000000#!/usr/bin/env python from __future__ import unicode_literals, division, absolute_import, print_function from . import tokenize2 from . import errorhandler import codecs from . import helper """A validating CSSParser""" __docformat__ = 'restructuredtext' __version__ = '$Id: parse.py 1754 2009-05-30 14:50:13Z cthedot $' import sys PY2 = sys.version_info[0] == 2 class ErrorHandler(object): """Basic class for CSS error handlers. This class class provides a default implementation ignoring warnings and recoverable errors and throwing a SAXParseException for fatal errors. If a CSS application needs to implement customized error handling, it must extend this class and then register an instance with the CSS parser using the parser's setErrorHandler method. The parser will then report all errors and warnings through this interface. The parser shall use this class instead of throwing an exception: it is up to the application whether to throw an exception for different types of errors and warnings. Note, however, that there is no requirement that the parser continue to provide useful information after a call to fatalError (in other words, a CSS driver class could catch an exception and report a fatalError). """ def __init__(self): self._log = errorhandler.ErrorHandler() def error(self, exception, token=None): self._log.error(exception, token, neverraise=True) def fatal(self, exception, token=None): self._log.fatal(exception, token) def warn(self, exception, token=None): self._log.warn(exception, token, neverraise=True) class DocumentHandler(object): """ void endFontFace() Receive notification of the end of a font face statement. void endMedia(SACMediaList media) Receive notification of the end of a media statement. void endPage(java.lang.String name, java.lang.String pseudo_page) Receive notification of the end of a media statement. void importStyle(java.lang.String uri, SACMediaList media, java.lang.String defaultNamespaceURI) Receive notification of a import statement in the style sheet. void startFontFace() Receive notification of the beginning of a font face statement. void startMedia(SACMediaList media) Receive notification of the beginning of a media statement. void startPage(java.lang.String name, java.lang.String pseudo_page) Receive notification of the beginning of a page statement. """ def __init__(self): def log(msg): sys.stderr.write('INFO\t%s\n' % msg) self._log = log def comment(self, text, line=None, col=None): "Receive notification of a comment." self._log("comment %r at [%s, %s]" % (text, line, col)) def startDocument(self, encoding): "Receive notification of the beginning of a style sheet." # source self._log("startDocument encoding=%s" % encoding) def endDocument(self, source=None, line=None, col=None): "Receive notification of the end of a document." self._log("endDocument EOF") def importStyle(self, uri, media, name, line=None, col=None): "Receive notification of a import statement in the style sheet." # defaultNamespaceURI??? self._log("importStyle at [%s, %s]" % (line, col)) def namespaceDeclaration(self, prefix, uri, line=None, col=None): "Receive notification of an unknown rule t-rule not supported by this parser." # prefix might be None! self._log("namespaceDeclaration at [%s, %s]" % (line, col)) def startSelector(self, selectors=None, line=None, col=None): "Receive notification of the beginning of a rule statement." # TODO selectorList! self._log("startSelector at [%s, %s]" % (line, col)) def endSelector(self, selectors=None, line=None, col=None): "Receive notification of the end of a rule statement." self._log("endSelector at [%s, %s]" % (line, col)) def property(self, name, value='TODO', important=False, line=None, col=None): "Receive notification of a declaration." # TODO: value is LexicalValue? self._log("property %r at [%s, %s]" % (name, line, col)) def ignorableAtRule(self, atRule, line=None, col=None): "Receive notification of an unknown rule t-rule not supported by this parser." self._log("ignorableAtRule %r at [%s, %s]" % (atRule, line, col)) class EchoHandler(DocumentHandler): "Echos all input to property `out`" def __init__(self): super(EchoHandler, self).__init__() self._out = [] out = property(lambda self: ''.join(self._out)) def startDocument(self, encoding): super(EchoHandler, self).startDocument(encoding) if 'utf-8' != encoding: self._out.append('@charset "%s";\n' % encoding) # def comment(self, text, line=None, col=None): # self._out.append(u'/*%s*/' % text) def importStyle(self, uri, media, name, line=None, col=None): "Receive notification of a import statement in the style sheet." # defaultNamespaceURI??? super(EchoHandler, self).importStyle(uri, media, name, line, col) self._out.append('@import %s%s%s;\n' % (helper.string(uri), '%s ' % media if media else '', '%s ' % name if name else '') ) def namespaceDeclaration(self, prefix, uri, line=None, col=None): super(EchoHandler, self).namespaceDeclaration(prefix, uri, line, col) self._out.append('@namespace %s%s;\n' % ('%s ' % prefix if prefix else '', helper.string(uri))) def startSelector(self, selectors=None, line=None, col=None): super(EchoHandler, self).startSelector(selectors, line, col) if selectors: self._out.append(', '.join(selectors)) self._out.append(' {\n') def endSelector(self, selectors=None, line=None, col=None): self._out.append(' }') def property(self, name, value, important=False, line=None, col=None): super(EchoHandler, self).property(name, value, line, col) self._out.append(' %s: %s%s;\n' % (name, value, ' !important' if important else '')) class Parser(object): """ java.lang.String getParserVersion() Returns a string about which CSS language is supported by this parser. boolean parsePriority(InputSource source) Parse a CSS priority value (e.g. LexicalUnit parsePropertyValue(InputSource source) Parse a CSS property value. void parseRule(InputSource source) Parse a CSS rule. SelectorList parseSelectors(InputSource source) Parse a comma separated list of selectors. void parseStyleDeclaration(InputSource source) Parse a CSS style declaration (without '{' and '}'). void parseStyleSheet(InputSource source) Parse a CSS document. void parseStyleSheet(java.lang.String uri) Parse a CSS document from a URI. void setConditionFactory(ConditionFactory conditionFactory) void setDocumentHandler(DocumentHandler handler) Allow an application to register a document event handler. void setErrorHandler(ErrorHandler handler) Allow an application to register an error event handler. void setLocale(java.util.Locale locale) Allow an application to request a locale for errors and warnings. void setSelectorFactory(SelectorFactory selectorFactory) """ def __init__(self, documentHandler=None, errorHandler=None): self._tokenizer = tokenize2.Tokenizer() if documentHandler: self.setDocumentHandler(documentHandler) else: self.setDocumentHandler(DocumentHandler()) if errorHandler: self.setErrorHandler(errorHandler) else: self.setErrorHandler(ErrorHandler()) def parseString(self, cssText, encoding=None): if isinstance(cssText, str): cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0] tokens = self._tokenizer.tokenize(cssText, fullsheet=True) def COMMENT(val, line, col): self._handler.comment(val[2:-2], line, col) def EOF(val, line, col): self._handler.endDocument(val, line, col) def simple(t): map = {'COMMENT': COMMENT, 'S': lambda val, line, col: None, 'EOF': EOF} type_, val, line, col = t if type_ in map: map[type_](val, line, col) return True else: return False # START PARSING t = next(tokens) type_, val, line, col = t encoding = 'utf-8' if 'CHARSET_SYM' == type_: # @charset "encoding"; # S encodingtoken = next(tokens) semicolontoken = next(tokens) if 'STRING' == type_: encoding = helper.stringvalue(val) # ; if 'STRING' == encodingtoken[0] and semicolontoken: encoding = helper.stringvalue(encodingtoken[1]) else: self._errorHandler.fatal('Invalid @charset') t = next(tokens) type_, val, line, col = t self._handler.startDocument(encoding) while True: start = (line, col) try: if simple(t): pass elif 'ATKEYWORD' == type_ or type_ in ('PAGE_SYM', 'MEDIA_SYM', 'FONT_FACE_SYM'): atRule = [val] braces = 0 while True: # read till end ; # TODO: or {} t = next(tokens) type_, val, line, col = t atRule.append(val) if ';' == val and not braces: break elif '{' == val: braces += 1 elif '}' == val: braces -= 1 if braces == 0: break self._handler.ignorableAtRule(''.join(atRule), *start) elif 'IMPORT_SYM' == type_: # import URI or STRING media? name? uri, media, name = None, None, None while True: t = next(tokens) type_, val, line, col = t if 'STRING' == type_: uri = helper.stringvalue(val) elif 'URI' == type_: uri = helper.urivalue(val) elif ';' == val: break if uri: self._handler.importStyle(uri, media, name) else: self._errorHandler.error('Invalid @import' ' declaration at %r' % (start,)) elif 'NAMESPACE_SYM' == type_: prefix, uri = None, None while True: t = next(tokens) type_, val, line, col = t if 'IDENT' == type_: prefix = val elif 'STRING' == type_: uri = helper.stringvalue(val) elif 'URI' == type_: uri = helper.urivalue(val) elif ';' == val: break if uri: self._handler.namespaceDeclaration(prefix, uri, *start) else: self._errorHandler.error('Invalid @namespace' ' declaration at %r' % (start,)) else: # CSSSTYLERULE selector = [] selectors = [] while True: # selectors[, selector]* { if 'S' == type_: selector.append(' ') elif simple(t): pass elif ',' == val: selectors.append(''.join(selector).strip()) selector = [] elif '{' == val: selectors.append(''.join(selector).strip()) self._handler.startSelector(selectors, *start) break else: selector.append(val) t = next(tokens) type_, val, line, col = t end = None while True: # name: value [!important][;name: value [!important]]*;? name, value, important = None, [], False while True: # name: t = next(tokens) type_, val, line, col = t if 'S' == type_: pass elif simple(t): pass elif 'IDENT' == type_: if name: self._errorHandler.error('more than one property name', t) else: name = val elif ':' == val: if not name: self._errorHandler.error('no property name', t) break elif ';' == val: self._errorHandler.error('premature end of property', t) end = val break elif '}' == val: if name: self._errorHandler.error('premature end of property', t) end = val break else: self._errorHandler.error('unexpected property name token %r' % val, t) while not ';' == end and not '}' == end: # value !;} t = next(tokens) type_, val, line, col = t if 'S' == type_: value.append(' ') elif simple(t): pass elif '!' == val or ';' == val or '}' == val: value = ''.join(value).strip() if not value: self._errorHandler.error('premature end of property (no value)', t) end = val break else: value.append(val) while '!' == end: # !important t = next(tokens) type_, val, line, col = t if simple(t): pass elif 'IDENT' == type_ and not important: important = True elif ';' == val or '}' == val: end = val break else: self._errorHandler.error('unexpected priority token %r' % val) if name and value: self._handler.property(name, value, important) if '}' == end: self._handler.endSelector(selectors, line=line, col=col) break else: # reset end = None else: self._handler.endSelector(selectors, line=line, col=col) t = next(tokens) type_, val, line, col = t except StopIteration: break def setDocumentHandler(self, handler): "Allow an application to register a document event `handler`." self._handler = handler def setErrorHandler(self, handler): "TODO" self._errorHandler = handler css-parser-1.0.4/src/css_parser/script.py0000644000175000017500000003153213407702533020671 0ustar kovidkovid00000000000000from __future__ import (absolute_import, division, print_function, unicode_literals) import codecs import errno import logging import os import sys import css_parser import css_parser.encutils as encutils """classes and functions used by css_parser scripts """ __all__ = ['CSSCapture', 'csscombine'] __docformat__ = 'restructuredtext' __version__ = '$Id: parse.py 1323 2008-07-06 18:13:57Z cthedot $' if sys.version_info[0] >= 3: from html.parser import HTMLParser as htmlparser_HTMLParser from urllib.parse import urlsplit as urllib_urlsplit from urllib.parse import urljoin as urllib_urljoin from urllib.request import urlopen as urllib_urlopen from urllib.request import Request as urllib_Request from urllib.error import HTTPError as urllib_HTTPError else: from HTMLParser import HTMLParser as htmlparser_HTMLParser from urlparse import urlsplit as urllib_urlsplit from urlparse import urljoin as urllib_urljoin from urllib2 import urlopen as urllib_urlopen from urllib2 import HTTPError as urllib_HTTPError from urllib2 import Request as urllib_Request # types of sheets in HTML LINK = 0 # STYLE = 1 # class CSSCaptureHTMLParser(htmlparser_HTMLParser): """CSSCapture helper: Parse given data for link and style elements""" curtag = '' sheets = [] # (type, [atts, cssText]) def _loweratts(self, atts): return dict([(a.lower(), v.lower()) for a, v in atts]) def handle_starttag(self, tag, atts): if tag == 'link': atts = self._loweratts(atts) if 'text/css' == atts.get('type', ''): self.sheets.append((LINK, atts)) elif tag == 'style': # also get content of style atts = self._loweratts(atts) if 'text/css' == atts.get('type', ''): self.sheets.append((STYLE, [atts, ''])) self.curtag = tag else: # close as only intersting