pax_global_header00006660000000000000000000000064144522424200014511gustar00rootroot0000000000000052 comment=ae6398d219ba4b717e50f64be94f0c8e9768e16b uap-python-0.18.0/000077500000000000000000000000001445224242000137035ustar00rootroot00000000000000uap-python-0.18.0/.github/000077500000000000000000000000001445224242000152435ustar00rootroot00000000000000uap-python-0.18.0/.github/workflows/000077500000000000000000000000001445224242000173005ustar00rootroot00000000000000uap-python-0.18.0/.github/workflows/ci.yml000066400000000000000000000053621445224242000204240ustar00rootroot00000000000000name: CI on: push: branches: [ '*' ] pull_request: branches: [ '*' ] jobs: checks: runs-on: ubuntu-latest strategy: fail-fast: false steps: - name: Checkout working copy uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install checkers run: | python -mpip install --upgrade pip python -mpip install black flake8 - name: flake run: flake8 . - name: black run: black --check --diff --color --quiet . compile: runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.8"] pyyaml-version: ["5.1.*", "5.4.*", "6.0.*", "6.*"] exclude: - python-version: 2.7 pyyaml-version: 6.0.* - python-version: 2.7 pyyaml-version: 6.* steps: - name: Checkout working copy uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependency run: | python -mpip install --upgrade pip python -mpip install pyyaml==${{ matrix.pyyaml-version }} - name: Build regexes.py run: python setup.py build_regexes -i - name: Check results run: | # check that _regexes exists, and .eggs does not (== setuptools used our dependency) test -e ua_parser/_regexes.py -a ! -e .eggs test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.8"] steps: - name: Checkout working copy uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install test dependencies run: | python -mpip install --upgrade pip # if binary wheels are not available for the current package install libyaml # NB: cyaml is outright broken on pypy so exclude that if ! ${{matrix.python-version == 'pypy-3.8'}}; then if ! pip download --only-binary pyyaml -rrequirements_dev.txt > /dev/null 2>&1; then sudo apt install libyaml-dev fi fi python -mpip install -r requirements_dev.txt - name: install package in environment run: python setup.py develop - name: run tests run: pytest -v -Werror - name: run doctests # pprint formatting was changed a lot in 3.5 if: ${{ matrix.python-version != '2.7' }} run: python -mdoctest README.rst uap-python-0.18.0/.gitignore000066400000000000000000000001221445224242000156660ustar00rootroot00000000000000*.pyc *.egg-info/ .eggs/ .cache/ .tox/ build/ dist/ tmp/ regexes.yaml _regexes.py uap-python-0.18.0/.gitmodules000066400000000000000000000001461445224242000160610ustar00rootroot00000000000000[submodule "uap-core"] path = uap-core url = https://github.com/ua-parser/uap-core branch = master uap-python-0.18.0/MANIFEST.in000066400000000000000000000000771445224242000154450ustar00rootroot00000000000000include README.rst include ua_parser/LICENSE global-exclude *~ uap-python-0.18.0/Makefile000066400000000000000000000010411445224242000153370ustar00rootroot00000000000000all: test test: clean @mkdir -p tmp @PYTHONPATH=tmp python setup.py develop -d tmp @# run all tests @PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py @# run a single test @#PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py ParseTest.testStringsDeviceBrandModel clean: @find . -name '*.pyc' -delete @rm -rf tmp \ ua_parser.egg-info \ dist \ build \ ua_parser/_regexes.py format: @black . release: clean python setup.py sdist bdist_wheel twine upload -s dist/* .PHONY: all test clean format release uap-python-0.18.0/README.rst000066400000000000000000000101401445224242000153660ustar00rootroot00000000000000uap-python ========== A python implementation of the UA Parser (https://github.com/ua-parser, formerly https://github.com/tobie/ua-parser) Build Status ------------ .. image:: https://github.com/ua-parser/uap-python/actions/workflows/ci.yml/badge.svg :alt: CI on the master branch Installing ---------- Install via pip ~~~~~~~~~~~~~~~ Just run: .. code-block:: sh $ pip install ua-parser Manual install ~~~~~~~~~~~~~~ In the top-level directory run: .. code-block:: sh $ python setup.py install Change Log --------------- Because this repo is mostly a python wrapper for the User Agent String Parser repo (https://github.com/ua-parser/uap-core), the changes made to this repo are best described by the update diffs in that project. Please see the diffs for this submodule (https://github.com/ua-parser/uap-core/releases) for a list of what has changed between versions of this package. Getting Started --------------- Retrieve data on a user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.Parse(ua_string) >>> pp.pprint(parsed_string) { 'device': {'brand': 'Apple', 'family': 'Mac', 'model': 'Mac'}, 'os': { 'family': 'Mac OS X', 'major': '10', 'minor': '9', 'patch': '4', 'patch_minor': None}, 'string': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 ' 'Safari/537.36', 'user_agent': { 'family': 'Chrome', 'major': '41', 'minor': '0', 'patch': '2272'}} Extract browser data from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseUserAgent(ua_string) >>> pp.pprint(parsed_string) {'family': 'Chrome', 'major': '41', 'minor': '0', 'patch': '2272'} .. ⚠️Before 0.15, the convenience parsers (``ParseUserAgent``, ``ParseOs``, and ``ParseDevice``) were not cached, which could result in degraded performances when parsing large amounts of identical user-agents (which might occur for real-world datasets). For these versions (up to 0.10 included), prefer using ``Parse`` and extracting the sub-component you need from the resulting dictionary. Extract OS information from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseOS(ua_string) >>> pp.pprint(parsed_string) { 'family': 'Mac OS X', 'major': '10', 'minor': '9', 'patch': '4', 'patch_minor': None} Extract Device information from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseDevice(ua_string) >>> pp.pprint(parsed_string) {'brand': 'Apple', 'family': 'Mac', 'model': 'Mac'} Copyright --------- Copyright 2008 Google Inc. See ua\_parser/LICENSE for more information uap-python-0.18.0/requirements_dev.txt000066400000000000000000000000311445224242000200170ustar00rootroot00000000000000pytest pyyaml tox==3.9.0 uap-python-0.18.0/setup.cfg000066400000000000000000000000341445224242000155210ustar00rootroot00000000000000[bdist_wheel] universal = 1 uap-python-0.18.0/setup.py000066400000000000000000000202471445224242000154220ustar00rootroot00000000000000#!/usr/bin/env python # flake8: noqa import os from distutils import log from distutils.core import Command from distutils.command.build import build as _build from setuptools import setup from setuptools.command.develop import develop as _develop from setuptools.command.sdist import sdist as _sdist from setuptools.command.install import install as _install def check_output(*args, **kwargs): from subprocess import Popen proc = Popen(*args, **kwargs) output, _ = proc.communicate() rv = proc.poll() assert rv == 0, output class build_regexes(Command): description = "build supporting regular expressions from uap-core" user_options = [ ("work-path=", "w", "The working directory for source files. Defaults to ."), ("build-lib=", "b", "directory for script runtime modules"), ( "inplace", "i", "ignore build-lib and put compiled javascript files into the source " + "directory alongside your pure Python modules", ), ( "force", "f", "Force rebuilding of static content. Defaults to rebuilding on version " "change detection.", ), ] boolean_options = ["force"] def initialize_options(self): self.build_lib = None self.force = None self.work_path = None self.inplace = None def finalize_options(self): install = self.distribution.get_command_obj("install") sdist = self.distribution.get_command_obj("sdist") build_ext = self.distribution.get_command_obj("build_ext") if self.inplace is None: self.inplace = ( (build_ext.inplace or install.finalized or sdist.finalized) and 1 or 0 ) if self.inplace: self.build_lib = "." else: self.set_undefined_options("build", ("build_lib", "build_lib")) if self.work_path is None: self.work_path = os.path.realpath(os.path.join(os.path.dirname(__file__))) def run(self): work_path = self.work_path if not os.path.exists(os.path.join(work_path, ".git")): return log.info("initializing git submodules") check_output(["git", "submodule", "init"], cwd=work_path) check_output(["git", "submodule", "update"], cwd=work_path) yaml_src = os.path.join(work_path, "uap-core", "regexes.yaml") if not os.path.exists(yaml_src): raise RuntimeError( "Unable to find regexes.yaml, should be at %r" % yaml_src ) def force_bytes(text): if text is None: return text return text.encode("utf8") def write_params(fields): # strip trailing None values while len(fields) > 1 and fields[-1] is None: fields.pop() for field in fields: fp.write((" %r,\n" % field).encode("utf-8")) import yaml log.info("compiling regexes.yaml -> _regexes.py") with open(yaml_src, "rb") as fp: regexes = yaml.safe_load(fp) lib_dest = os.path.join(self.build_lib, "ua_parser") if not os.path.exists(lib_dest): os.makedirs(lib_dest) py_dest = os.path.join(lib_dest, "_regexes.py") with open(py_dest, "wb") as fp: # fmt: off fp.write(b"# -*- coding: utf-8 -*-\n") fp.write(b"############################################\n") fp.write(b"# NOTICE: This file is autogenerated from #\n") fp.write(b"# regexes.yaml. Do not edit by hand, #\n") fp.write(b"# instead, re-run `setup.py build_regexes` #\n") fp.write(b"############################################\n") fp.write(b"\n") fp.write(b"from __future__ import absolute_import, unicode_literals\n") fp.write(b"from .user_agent_parser import (\n") fp.write(b" UserAgentParser, DeviceParser, OSParser,\n") fp.write(b")\n") fp.write(b"\n") fp.write(b"__all__ = ('USER_AGENT_PARSERS', 'DEVICE_PARSERS', 'OS_PARSERS')\n") fp.write(b"\n") fp.write(b"USER_AGENT_PARSERS = [\n") for device_parser in regexes["user_agent_parsers"]: fp.write(b" UserAgentParser(\n") write_params([ device_parser["regex"], device_parser.get("family_replacement"), device_parser.get("v1_replacement"), device_parser.get("v2_replacement"), ]) fp.write(b" ),\n") fp.write(b"]\n") fp.write(b"\n") fp.write(b"DEVICE_PARSERS = [\n") for device_parser in regexes["device_parsers"]: fp.write(b" DeviceParser(\n") write_params([ device_parser["regex"], device_parser.get("regex_flag"), device_parser.get("device_replacement"), device_parser.get("brand_replacement"), device_parser.get("model_replacement"), ]) fp.write(b" ),\n") fp.write(b"]\n") fp.write(b"\n") fp.write(b"OS_PARSERS = [\n") for device_parser in regexes["os_parsers"]: fp.write(b" OSParser(\n") write_params([ device_parser["regex"], device_parser.get("os_replacement"), device_parser.get("os_v1_replacement"), device_parser.get("os_v2_replacement"), device_parser.get("os_v3_replacement"), device_parser.get("os_v4_replacement"), ]) fp.write(b" ),\n") fp.write(b"]\n") # fmt: on self.update_manifest() def update_manifest(self): sdist = self.distribution.get_command_obj("sdist") if not sdist.finalized: return sdist.filelist.files.append("ua_parser/_regexes.py") class develop(_develop): def run(self): self.run_command("build_regexes") _develop.run(self) class install(_install): def run(self): self.run_command("build_regexes") _install.run(self) class build(_build): def run(self): self.run_command("build_regexes") _build.run(self) class sdist(_sdist): sub_commands = _sdist.sub_commands + [("build_regexes", None)] cmdclass = { "sdist": sdist, "develop": develop, "build": build, "install": install, "build_regexes": build_regexes, } setup( name="ua-parser", version="0.18.0", description="Python port of Browserscope's user agent parser", author="PBS", author_email="no-reply@pbs.org", packages=["ua_parser"], package_dir={"": "."}, license="Apache 2.0", zip_safe=False, url="https://github.com/ua-parser/uap-python", include_package_data=True, setup_requires=["pyyaml"], install_requires=[], cmdclass=cmdclass, classifiers=[ "Development Status :: 4 - Beta", "Environment :: Web Environment", "Intended Audience :: Developers", "Operating System :: OS Independent", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], ) uap-python-0.18.0/tox.ini000066400000000000000000000012521445224242000152160ustar00rootroot00000000000000[tox] envlist = py27, py36, py37, py38, py39, py310, pypy3.8, docs, flake8, black skipsdist = True [testenv] usedevelop = True deps = -rrequirements_dev.txt commands = pytest -Werror {posargs} python -mdoctest README.rst [testenv:py27] # no doctesting in 2.7 because of formatting divergences commands = pytest {posargs} [testenv:docs] skip_install = True deps = docutils Pygments commands = python setup.py check -s --restructuredtext --metadata [testenv:flake8] skip_install = True deps = flake8 commands = flake8 {posargs} [testenv:black] skip_install = True deps = black commands = black --check --diff . [flake8] max_line_length = 88 filename = ua_parser/ uap-python-0.18.0/ua_parser/000077500000000000000000000000001445224242000156645ustar00rootroot00000000000000uap-python-0.18.0/ua_parser/LICENSE000066400000000000000000000010461445224242000166720ustar00rootroot00000000000000Copyright 2008 Google Inc. Licensed under the Apache License, Version 2.0 (the 'License') you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.uap-python-0.18.0/ua_parser/__init__.py000066400000000000000000000000251445224242000177720ustar00rootroot00000000000000VERSION = (0, 16, 1) uap-python-0.18.0/ua_parser/user_agent_parser.py000066400000000000000000000462431445224242000217570ustar00rootroot00000000000000# Copyright 2009 Google Inc. # # Licensed under the Apache License, Version 2.0 (the 'License') # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Python implementation of the UA parser.""" from __future__ import absolute_import import os import re import sys import warnings __author__ = "Lindsey Simon " class UserAgentParser(object): def __init__( self, pattern, family_replacement=None, v1_replacement=None, v2_replacement=None ): """Initialize UserAgentParser. Args: pattern: a regular expression string family_replacement: a string to override the matched family (optional) v1_replacement: a string to override the matched v1 (optional) v2_replacement: a string to override the matched v2 (optional) """ self.pattern = pattern self.user_agent_re = re.compile(self.pattern) self.family_replacement = family_replacement self.v1_replacement = v1_replacement self.v2_replacement = v2_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): family, v1, v2, v3 = None, None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.family_replacement: if re.search(r"\$1", self.family_replacement): family = re.sub(r"\$1", match.group(1), self.family_replacement) else: family = self.family_replacement else: family = match.group(1) if self.v1_replacement: v1 = self.v1_replacement elif match.lastindex and match.lastindex >= 2: v1 = match.group(2) or None if self.v2_replacement: v2 = self.v2_replacement elif match.lastindex and match.lastindex >= 3: v2 = match.group(3) or None if match.lastindex and match.lastindex >= 4: v3 = match.group(4) or None return family, v1, v2, v3 class OSParser(object): def __init__( self, pattern, os_replacement=None, os_v1_replacement=None, os_v2_replacement=None, os_v3_replacement=None, os_v4_replacement=None, ): """Initialize UserAgentParser. Args: pattern: a regular expression string os_replacement: a string to override the matched os (optional) os_v1_replacement: a string to override the matched v1 (optional) os_v2_replacement: a string to override the matched v2 (optional) os_v3_replacement: a string to override the matched v3 (optional) os_v4_replacement: a string to override the matched v4 (optional) """ self.pattern = pattern self.user_agent_re = re.compile(self.pattern) self.os_replacement = os_replacement self.os_v1_replacement = os_v1_replacement self.os_v2_replacement = os_v2_replacement self.os_v3_replacement = os_v3_replacement self.os_v4_replacement = os_v4_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): os, os_v1, os_v2, os_v3, os_v4 = None, None, None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.os_replacement: os = MultiReplace(self.os_replacement, match) elif match.lastindex: os = match.group(1) if self.os_v1_replacement: os_v1 = MultiReplace(self.os_v1_replacement, match) elif match.lastindex and match.lastindex >= 2: os_v1 = match.group(2) if self.os_v2_replacement: os_v2 = MultiReplace(self.os_v2_replacement, match) elif match.lastindex and match.lastindex >= 3: os_v2 = match.group(3) if self.os_v3_replacement: os_v3 = MultiReplace(self.os_v3_replacement, match) elif match.lastindex and match.lastindex >= 4: os_v3 = match.group(4) if self.os_v4_replacement: os_v4 = MultiReplace(self.os_v4_replacement, match) elif match.lastindex and match.lastindex >= 5: os_v4 = match.group(5) return os, os_v1, os_v2, os_v3, os_v4 def MultiReplace(string, match): def _repl(m): index = int(m.group(1)) - 1 group = match.groups() if index < len(group): return group[index] return "" _string = re.sub(r"\$(\d)", _repl, string) _string = re.sub(r"^\s+|\s+$", "", _string) if _string == "": return None return _string class DeviceParser(object): def __init__( self, pattern, regex_flag=None, device_replacement=None, brand_replacement=None, model_replacement=None, ): """Initialize UserAgentParser. Args: pattern: a regular expression string device_replacement: a string to override the matched device (optional) """ self.pattern = pattern if regex_flag == "i": self.user_agent_re = re.compile(self.pattern, re.IGNORECASE) else: self.user_agent_re = re.compile(self.pattern) self.device_replacement = device_replacement self.brand_replacement = brand_replacement self.model_replacement = model_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): device, brand, model = None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.device_replacement: device = MultiReplace(self.device_replacement, match) else: device = match.group(1) if self.brand_replacement: brand = MultiReplace(self.brand_replacement, match) if self.model_replacement: model = MultiReplace(self.model_replacement, match) elif len(match.groups()) > 0: model = match.group(1) return device, brand, model MAX_CACHE_SIZE = 200 _PARSE_CACHE = {} _UA_TYPES = str if sys.version_info < (3,): _UA_TYPES = (str, unicode) def _lookup(ua, args): if not isinstance(ua, _UA_TYPES): raise TypeError("Expected user agent to be a string, got %r" % ua) key = (ua, tuple(sorted(args.items()))) entry = _PARSE_CACHE.get(key) if entry is not None: return entry if len(_PARSE_CACHE) >= MAX_CACHE_SIZE: _PARSE_CACHE.clear() v = _PARSE_CACHE[key] = {"string": ua} return v def _cached(ua, args, key, fn): entry = _lookup(ua, args) r = entry.get(key) if not r: r = entry[key] = fn(ua, args) return r def Parse(user_agent_string, **jsParseBits): """Parse all the things Args: user_agent_string: the full user agent string Returns: A dictionary containing all parsed bits """ entry = _lookup(user_agent_string, jsParseBits) # entry is complete, return directly if len(entry) == 4: return entry # entry is partially or entirely empty if "user_agent" not in entry: entry["user_agent"] = _ParseUserAgent(user_agent_string, jsParseBits) if "os" not in entry: entry["os"] = _ParseOS(user_agent_string, jsParseBits) if "device" not in entry: entry["device"] = _ParseDevice(user_agent_string, jsParseBits) return entry def ParseUserAgent(user_agent_string, **jsParseBits): """Parses the user-agent string for user agent (browser) info. Args: user_agent_string: The full user-agent string. Returns: A dictionary containing parsed bits. """ return _cached(user_agent_string, jsParseBits, "user_agent", _ParseUserAgent) def _ParseUserAgent(user_agent_string, jsParseBits): if jsParseBits: warnings.warn( "javascript overrides are deprecated and will be removed next release", category=DeprecationWarning, stacklevel=2, ) if ( "js_user_agent_family" in jsParseBits and jsParseBits["js_user_agent_family"] != "" ): family = jsParseBits["js_user_agent_family"] v1 = jsParseBits.get("js_user_agent_v1") or None v2 = jsParseBits.get("js_user_agent_v2") or None v3 = jsParseBits.get("js_user_agent_v3") or None else: for uaParser in USER_AGENT_PARSERS: family, v1, v2, v3 = uaParser.Parse(user_agent_string) if family: break # Override for Chrome Frame IFF Chrome is enabled. if "js_user_agent_string" in jsParseBits: js_user_agent_string = jsParseBits["js_user_agent_string"] if ( js_user_agent_string and js_user_agent_string.find("Chrome/") > -1 and user_agent_string.find("chromeframe") > -1 ): jsOverride = {} jsOverride = ParseUserAgent(js_user_agent_string) family = "Chrome Frame (%s %s)" % (family, v1) v1 = jsOverride["major"] v2 = jsOverride["minor"] v3 = jsOverride["patch"] family = family or "Other" return { "family": family, "major": v1 or None, "minor": v2 or None, "patch": v3 or None, } def ParseOS(user_agent_string, **jsParseBits): """Parses the user-agent string for operating system info Args: user_agent_string: The full user-agent string. Returns: A dictionary containing parsed bits. """ return _cached(user_agent_string, jsParseBits, "os", _ParseOS) def _ParseOS(user_agent_string, jsParseBits): if jsParseBits: warnings.warn( "javascript overrides are deprecated and will be removed next release", category=DeprecationWarning, stacklevel=2, ) for osParser in OS_PARSERS: os, os_v1, os_v2, os_v3, os_v4 = osParser.Parse(user_agent_string) if os: break os = os or "Other" return { "family": os, "major": os_v1, "minor": os_v2, "patch": os_v3, "patch_minor": os_v4, } def ParseDevice(user_agent_string, **jsParseBits): """Parses the user-agent string for device info. Args: user_agent_string: The full user-agent string. Returns: A dictionary containing parsed bits. """ return _cached(user_agent_string, jsParseBits, "device", _ParseDevice) def _ParseDevice(user_agent_string, jsParseBits): if jsParseBits: warnings.warn( "javascript overrides are deprecated and will be removed next release", category=DeprecationWarning, stacklevel=2, ) for deviceParser in DEVICE_PARSERS: device, brand, model = deviceParser.Parse(user_agent_string) if device: break if device is None: device = "Other" return {"family": device, "brand": brand, "model": model} def PrettyUserAgent(family, v1=None, v2=None, v3=None): """Pretty user agent string.""" if v3: if v3[0].isdigit(): return "%s %s.%s.%s" % (family, v1, v2, v3) else: return "%s %s.%s%s" % (family, v1, v2, v3) elif v2: return "%s %s.%s" % (family, v1, v2) elif v1: return "%s %s" % (family, v1) return family def PrettyOS(os, os_v1=None, os_v2=None, os_v3=None, os_v4=None): """Pretty os string.""" if os_v4: return "%s %s.%s.%s.%s" % (os, os_v1, os_v2, os_v3, os_v4) if os_v3: if os_v3[0].isdigit(): return "%s %s.%s.%s" % (os, os_v1, os_v2, os_v3) else: return "%s %s.%s%s" % (os, os_v1, os_v2, os_v3) elif os_v2: return "%s %s.%s" % (os, os_v1, os_v2) elif os_v1: return "%s %s" % (os, os_v1) return os def ParseWithJSOverrides( user_agent_string, js_user_agent_string=None, js_user_agent_family=None, js_user_agent_v1=None, js_user_agent_v2=None, js_user_agent_v3=None, ): """backwards compatible. use one of the other Parse methods instead!""" warnings.warn( "Use Parse (or a specialised parser)", DeprecationWarning, stacklevel=2 ) # Override via JS properties. if js_user_agent_family is not None and js_user_agent_family != "": family = js_user_agent_family v1 = None v2 = None v3 = None if js_user_agent_v1 is not None: v1 = js_user_agent_v1 if js_user_agent_v2 is not None: v2 = js_user_agent_v2 if js_user_agent_v3 is not None: v3 = js_user_agent_v3 else: for parser in USER_AGENT_PARSERS: family, v1, v2, v3 = parser.Parse(user_agent_string) if family: break # Override for Chrome Frame IFF Chrome is enabled. if ( js_user_agent_string and js_user_agent_string.find("Chrome/") > -1 and user_agent_string.find("chromeframe") > -1 ): family = "Chrome Frame (%s %s)" % (family, v1) ua_dict = ParseUserAgent(js_user_agent_string) v1 = ua_dict["major"] v2 = ua_dict["minor"] v3 = ua_dict["patch"] return family or "Other", v1, v2, v3 def Pretty(family, v1=None, v2=None, v3=None): """backwards compatible. use PrettyUserAgent instead!""" warnings.warn("Use PrettyUserAgent", DeprecationWarning, stacklevel=2) if v3: if v3[0].isdigit(): return "%s %s.%s.%s" % (family, v1, v2, v3) else: return "%s %s.%s%s" % (family, v1, v2, v3) elif v2: return "%s %s.%s" % (family, v1, v2) elif v1: return "%s %s" % (family, v1) return family def GetFilters( user_agent_string, js_user_agent_string=None, js_user_agent_family=None, js_user_agent_v1=None, js_user_agent_v2=None, js_user_agent_v3=None, ): """Return the optional arguments that should be saved and used to query. js_user_agent_string is always returned if it is present. We really only need it for Chrome Frame. However, I added it in the generally case to find other cases when it is different. When the recording of js_user_agent_string was added, we created new records for all new user agents. Since we only added js_document_mode for the IE 9 preview case, it did not cause new user agent records the way js_user_agent_string did. js_document_mode has since been removed in favor of individual property overrides. Args: user_agent_string: The full user-agent string. js_user_agent_string: JavaScript ua string from client-side js_user_agent_family: This is an override for the family name to deal with the fact that IE platform preview (for instance) cannot be distinguished by user_agent_string, but only in javascript. js_user_agent_v1: v1 override - see above. js_user_agent_v2: v1 override - see above. js_user_agent_v3: v1 override - see above. Returns: {js_user_agent_string: '[...]', js_family_name: '[...]', etc...} """ filters = {} filterdict = { "js_user_agent_string": js_user_agent_string, "js_user_agent_family": js_user_agent_family, "js_user_agent_v1": js_user_agent_v1, "js_user_agent_v2": js_user_agent_v2, "js_user_agent_v3": js_user_agent_v3, } for key, value in filterdict.items(): if value is not None and value != "": filters[key] = value return filters # Build the list of user agent parsers from YAML UA_PARSER_YAML = os.environ.get("UA_PARSER_YAML") if UA_PARSER_YAML: # This will raise an ImportError if missing, obviously since it's no # longer a requirement import yaml try: # Try and use libyaml bindings if available since faster, # pyyaml doesn't do it by default (yaml/pyyaml#436) from yaml import CSafeLoader as SafeLoader except ImportError: from yaml import SafeLoader with open(UA_PARSER_YAML, "rb") as fp: regexes = yaml.load(fp, Loader=SafeLoader) USER_AGENT_PARSERS = [] for _ua_parser in regexes["user_agent_parsers"]: _regex = _ua_parser["regex"] _family_replacement = _ua_parser.get("family_replacement") _v1_replacement = _ua_parser.get("v1_replacement") _v2_replacement = _ua_parser.get("v2_replacement") USER_AGENT_PARSERS.append( UserAgentParser( _regex, _family_replacement, _v1_replacement, _v2_replacement ) ) OS_PARSERS = [] for _os_parser in regexes["os_parsers"]: _regex = _os_parser["regex"] _os_replacement = _os_parser.get("os_replacement") _os_v1_replacement = _os_parser.get("os_v1_replacement") _os_v2_replacement = _os_parser.get("os_v2_replacement") _os_v3_replacement = _os_parser.get("os_v3_replacement") _os_v4_replacement = _os_parser.get("os_v4_replacement") OS_PARSERS.append( OSParser( _regex, _os_replacement, _os_v1_replacement, _os_v2_replacement, _os_v3_replacement, _os_v4_replacement, ) ) DEVICE_PARSERS = [] for _device_parser in regexes["device_parsers"]: _regex = _device_parser["regex"] _regex_flag = _device_parser.get("regex_flag") _device_replacement = _device_parser.get("device_replacement") _brand_replacement = _device_parser.get("brand_replacement") _model_replacement = _device_parser.get("model_replacement") DEVICE_PARSERS.append( DeviceParser( _regex, _regex_flag, _device_replacement, _brand_replacement, _model_replacement, ) ) # Clean our our temporary vars explicitly # so they can't be reused or imported del regexes del yaml del SafeLoader else: # Just load our pre-compiled versions from ._regexes import USER_AGENT_PARSERS, DEVICE_PARSERS, OS_PARSERS uap-python-0.18.0/ua_parser/user_agent_parser_test.py000066400000000000000000000261711445224242000230140ustar00rootroot00000000000000# Copyright 2008 Google Inc. # # Licensed under the Apache License, Version 2.0 (the 'License') # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """User Agent Parser Unit Tests. Run: # python -m user_agent_parser_test (runs all the tests, takes awhile) or like: # python -m user_agent_parser_test ParseTest.testBrowserscopeStrings """ from __future__ import unicode_literals, absolute_import __author__ = "slamm@google.com (Stephen Lamm)" import logging import os import platform import re import sys import unittest import warnings import yaml if platform.python_implementation() == "PyPy": from yaml import SafeLoader else: try: from yaml import CSafeLoader as SafeLoader except ImportError: logging.getLogger(__name__).warning( "PyYaml C extension not available to run tests, this will result " "in dramatic tests slowdown." ) from yaml import SafeLoader from ua_parser import user_agent_parser TEST_RESOURCES_DIR = os.path.join( os.path.abspath(os.path.dirname(__file__)), "../uap-core" ) class ParseTest(unittest.TestCase): def testBrowserscopeStrings(self): self.runUserAgentTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "tests/test_ua.yaml") ) def testBrowserscopeStringsOS(self): self.runOSTestsFromYAML(os.path.join(TEST_RESOURCES_DIR, "tests/test_os.yaml")) def testStringsOS(self): self.runOSTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "test_resources/additional_os_tests.yaml") ) def testStringsDevice(self): self.runDeviceTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "tests/test_device.yaml") ) def testMozillaStrings(self): self.runUserAgentTestsFromYAML( os.path.join( TEST_RESOURCES_DIR, "test_resources/firefox_user_agent_strings.yaml" ) ) # NOTE: The YAML file used here is one output by makePGTSComparisonYAML() # below, as opposed to the pgts_browser_list-orig.yaml file. The -orig # file is by no means perfect, but identifies many browsers that we # classify as "Other". This test itself is mostly useful to know when # somthing in UA parsing changes. An effort should be made to try and # reconcile the differences between the two YAML files. def testPGTSStrings(self): self.runUserAgentTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "test_resources/pgts_browser_list.yaml") ) def testParseAll(self): user_agent_string = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; fr; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5,gzip(gfe),gzip(gfe)" expected = { "device": {"family": "Mac", "brand": "Apple", "model": "Mac"}, "os": { "family": "Mac OS X", "major": "10", "minor": "4", "patch": None, "patch_minor": None, }, "user_agent": { "family": "Firefox", "major": "3", "minor": "5", "patch": "5", }, "string": user_agent_string, } result = user_agent_parser.Parse(user_agent_string) self.assertEqual( result, expected, "UA: {0}\n expected<{1}> != actual<{2}>".format( user_agent_string, expected, result ), ) # Run a set of test cases from a YAML file def runUserAgentTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] # The expected results expected = { "family": test_case["family"], "major": test_case["major"], "minor": test_case["minor"], "patch": test_case["patch"], } result = {} result = user_agent_parser.ParseUserAgent(user_agent_string) self.assertEqual( result, expected, "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format( user_agent_string, expected["family"], expected["major"], expected["minor"], expected["patch"], result["family"], result["major"], result["minor"], result["patch"], ), ) self.assertLessEqual( len(user_agent_parser._PARSE_CACHE), user_agent_parser.MAX_CACHE_SIZE, "verify that the cache size never exceeds the configured setting", ) def runOSTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] # The expected results expected = { "family": test_case["family"], "major": test_case["major"], "minor": test_case["minor"], "patch": test_case["patch"], "patch_minor": test_case["patch_minor"], } result = user_agent_parser.ParseOS(user_agent_string) self.assertEqual( result, expected, "UA: {0}\n expected<{1} {2} {3} {4} {5}> != actual<{6} {7} {8} {9} {10}>".format( user_agent_string, expected["family"], expected["major"], expected["minor"], expected["patch"], expected["patch_minor"], result["family"], result["major"], result["minor"], result["patch"], result["patch_minor"], ), ) def runDeviceTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] # The expected results expected = { "family": test_case["family"], "brand": test_case["brand"], "model": test_case["model"], } result = user_agent_parser.ParseDevice(user_agent_string) self.assertEqual( result, expected, "UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format( user_agent_string, expected["family"], expected["brand"], expected["model"], result["family"], result["brand"], result["model"], ), ) class GetFiltersTest(unittest.TestCase): def testGetFiltersNoMatchesGiveEmptyDict(self): user_agent_string = "foo" filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string=None ) self.assertEqual({}, filters) def testGetFiltersJsUaPassedThrough(self): user_agent_string = "foo" filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string="bar" ) self.assertEqual({"js_user_agent_string": "bar"}, filters) def testGetFiltersJsUserAgentFamilyAndVersions(self): user_agent_string = ( "Mozilla/4.0 (compatible; MSIE 8.0; " "Windows NT 5.1; Trident/4.0; GTB6; .NET CLR 2.0.50727; " ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)" ) filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string="bar", js_user_agent_family="foo" ) self.assertEqual( {"js_user_agent_string": "bar", "js_user_agent_family": "foo"}, filters ) class TestDeprecationWarnings(unittest.TestCase): def setUp(self): """In Python 2.7, catch_warnings apparently does not do anything if the warning category is not active, whereas in 3(.6 and up) it seems to work out of the box. """ super(TestDeprecationWarnings, self).setUp() warnings.simplefilter("always", DeprecationWarning) def tearDown(self): # not ideal as it discards all other warnings updates from the # process, should really copy the contents of # `warnings.filters`, then reset-it. warnings.resetwarnings() super(TestDeprecationWarnings, self).tearDown() def test_parser_deprecation(self): with warnings.catch_warnings(record=True) as ws: user_agent_parser.ParseWithJSOverrides("") self.assertEqual(len(ws), 1) self.assertEqual(ws[0].category, DeprecationWarning) def test_printer_deprecation(self): with warnings.catch_warnings(record=True) as ws: user_agent_parser.Pretty("") self.assertEqual(len(ws), 1) self.assertEqual(ws[0].category, DeprecationWarning) def test_js_bits_deprecation(self): for parser, count in [ (user_agent_parser.Parse, 3), (user_agent_parser.ParseUserAgent, 1), (user_agent_parser.ParseOS, 1), (user_agent_parser.ParseDevice, 1), ]: user_agent_parser._PARSE_CACHE.clear() with warnings.catch_warnings(record=True) as ws: parser("some random thing", js_attribute=True) self.assertEqual(len(ws), count) for w in ws: self.assertEqual(w.category, DeprecationWarning) class ErrTest(unittest.TestCase): @unittest.skipIf( sys.version_info < (3,), "bytes and str are not differentiated in P2" ) def test_bytes(self): with self.assertRaises(TypeError): user_agent_parser.Parse(b"") def test_int(self): with self.assertRaises(TypeError): user_agent_parser.Parse(0) def test_list(self): with self.assertRaises(TypeError): user_agent_parser.Parse([]) def test_tuple(self): with self.assertRaises(TypeError): user_agent_parser.Parse(()) if __name__ == "__main__": unittest.main() uap-python-0.18.0/uap-core/000077500000000000000000000000001445224242000154165ustar00rootroot00000000000000