pax_global_header00006660000000000000000000000064136256037470014527gustar00rootroot0000000000000052 comment=d40c2e38da1facf2458f0259daa58ba32d8d9fd9 uap-python-0.10.0/000077500000000000000000000000001362560374700137115ustar00rootroot00000000000000uap-python-0.10.0/.gitignore000066400000000000000000000001221362560374700156740ustar00rootroot00000000000000*.pyc *.egg-info/ .eggs/ .cache/ .tox/ build/ dist/ tmp/ regexes.yaml _regexes.py uap-python-0.10.0/.gitmodules000066400000000000000000000001461362560374700160670ustar00rootroot00000000000000[submodule "uap-core"] path = uap-core url = https://github.com/ua-parser/uap-core branch = master uap-python-0.10.0/.travis.yml000066400000000000000000000007551362560374700160310ustar00rootroot00000000000000sudo: false language: python install: - pip install -r requirements_dev.txt matrix: include: - python: 2.7 env: - TOX_ENV=py27 - python: 2.7 env: - TOX_ENV=py27-flake8 - python: 2.7 env: - TOX_ENV=docs - python: 3.6 env: - TOX_ENV=py36 - python: 3.6 env: - TOX_ENV=py36-flake8 - python: 3.6 env: - TOX_ENV=py36-black - python: 3.7 env: - TOX_ENV=py37 - python: 3.8 env: - TOX_ENV=py38 script: tox -e $TOX_ENV uap-python-0.10.0/MANIFEST.in000066400000000000000000000000771362560374700154530ustar00rootroot00000000000000include README.rst include ua_parser/LICENSE global-exclude *~ uap-python-0.10.0/Makefile000066400000000000000000000010411362560374700153450ustar00rootroot00000000000000all: test test: clean @mkdir -p tmp @PYTHONPATH=tmp python setup.py develop -d tmp @# run all tests @PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py @# run a single test @#PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py ParseTest.testStringsDeviceBrandModel clean: @find . -name '*.pyc' -delete @rm -rf tmp \ ua_parser.egg-info \ dist \ build \ ua_parser/_regexes.py format: @black . release: clean python setup.py sdist bdist_wheel twine upload -s dist/* .PHONY: all test clean format release uap-python-0.10.0/README.rst000066400000000000000000000072641362560374700154110ustar00rootroot00000000000000uap-python ========== A python implementation of the UA Parser (https://github.com/ua-parser, formerly https://github.com/tobie/ua-parser) Build Status ------------ .. image:: https://travis-ci.org/ua-parser/uap-python.svg :target: https://travis-ci.org/ua-parser/uap-python Installing ---------- Install via pip ~~~~~~~~~~~~~~~ Just run: .. code-block:: sh $ pip install ua-parser Manual install ~~~~~~~~~~~~~~ In the top-level directory run: .. code-block:: sh $ python setup.py install Change Log --------------- Because this repo is mostly a python wrapper for the User Agent String Parser repo (https://github.com/ua-parser/uap-core), the changes made to this repo are best described by the update diffs in that project. Please see the diffs for this submodule (https://github.com/ua-parser/uap-core/releases) for a list of what has changed between versions of this package. Getting Started --------------- Retrieve data on a user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.Parse(ua_string) >>> pp.pprint(parsed_string) { 'device': { 'brand': None, 'family': 'Other', 'model': None}, 'os': { 'family': 'Mac OS X', 'major': '10', 'minor': '9', 'patch': '4', 'patch_minor': None}, 'string': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36', 'user_agent': { 'family': 'Chrome', 'major': '41', 'minor': '0', 'patch': '2272'}} Extract browser data from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseUserAgent(ua_string) >>> pp.pprint(parsed_string) { 'family': 'Chrome', 'major': '41', 'minor': '0', 'patch': '2272'} Extract OS information from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseOS(ua_string) >>> pp.pprint(parsed_string) { 'family': 'Mac OS X', 'major': '10', 'minor': '9', 'patch': '4', 'patch_minor': None} Extract Device information from user-agent string ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: python >>> from ua_parser import user_agent_parser >>> import pprint >>> pp = pprint.PrettyPrinter(indent=4) >>> ua_string = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36' >>> parsed_string = user_agent_parser.ParseDevice(ua_string) >>> pp.pprint(parsed_string) { 'brand': None, 'family': 'Other', 'model': None} Copyright --------- Copyright 2008 Google Inc. See ua\_parser/LICENSE for more information uap-python-0.10.0/requirements.txt000066400000000000000000000000141362560374700171700ustar00rootroot00000000000000pyyaml==5.1 uap-python-0.10.0/requirements_dev.txt000066400000000000000000000000131362560374700200250ustar00rootroot00000000000000tox==3.9.0 uap-python-0.10.0/setup.cfg000066400000000000000000000000341362560374700155270ustar00rootroot00000000000000[bdist_wheel] universal = 1 uap-python-0.10.0/setup.py000066400000000000000000000202711362560374700154250ustar00rootroot00000000000000#!/usr/bin/env python # flake8: noqa import os from distutils import log from distutils.core import Command from distutils.command.build import build as _build from setuptools import setup from setuptools.command.develop import develop as _develop from setuptools.command.sdist import sdist as _sdist from setuptools.command.install import install as _install def check_output(*args, **kwargs): from subprocess import Popen proc = Popen(*args, **kwargs) output, _ = proc.communicate() rv = proc.poll() assert rv == 0, output class build_regexes(Command): description = "build supporting regular expressions from uap-core" user_options = [ ("work-path=", "w", "The working directory for source files. Defaults to ."), ("build-lib=", "b", "directory for script runtime modules"), ( "inplace", "i", "ignore build-lib and put compiled javascript files into the source " + "directory alongside your pure Python modules", ), ( "force", "f", "Force rebuilding of static content. Defaults to rebuilding on version " "change detection.", ), ] boolean_options = ["force"] def initialize_options(self): self.build_lib = None self.force = None self.work_path = None self.inplace = None def finalize_options(self): install = self.distribution.get_command_obj("install") sdist = self.distribution.get_command_obj("sdist") build_ext = self.distribution.get_command_obj("build_ext") if self.inplace is None: self.inplace = ( (build_ext.inplace or install.finalized or sdist.finalized) and 1 or 0 ) if self.inplace: self.build_lib = "." else: self.set_undefined_options("build", ("build_lib", "build_lib")) if self.work_path is None: self.work_path = os.path.realpath(os.path.join(os.path.dirname(__file__))) def run(self): work_path = self.work_path if not os.path.exists(os.path.join(work_path, ".git")): return log.info("initializing git submodules") check_output(["git", "submodule", "init"], cwd=work_path) check_output(["git", "submodule", "update"], cwd=work_path) yaml_src = os.path.join(work_path, "uap-core", "regexes.yaml") if not os.path.exists(yaml_src): raise RuntimeError( "Unable to find regexes.yaml, should be at %r" % yaml_src ) def force_bytes(text): if text is None: return text return text.encode("utf8") import yaml py_dest = os.path.join(self.build_lib, "ua_parser", "_regexes.py") log.info("compiling regexes.yaml -> _regexes.py") with open(yaml_src, "rb") as fp: regexes = yaml.safe_load(fp) with open(py_dest, "wb") as fp: # fmt: off fp.write(b"# -*- coding: utf-8 -*-\n") fp.write(b"############################################\n") fp.write(b"# NOTICE: This file is autogenerated from #\n") fp.write(b"# regexes.yaml. Do not edit by hand, #\n") fp.write(b"# instead, re-run `setup.py build_regexes` #\n") fp.write(b"############################################\n") fp.write(b"\n") fp.write(b"from __future__ import absolute_import, unicode_literals\n") fp.write(b"from .user_agent_parser import (\n") fp.write(b" UserAgentParser, DeviceParser, OSParser,\n") fp.write(b")\n") fp.write(b"\n") fp.write(b"__all__ = (\n") fp.write(b" 'USER_AGENT_PARSERS', 'DEVICE_PARSERS', 'OS_PARSERS',\n") fp.write(b")\n") fp.write(b"\n") fp.write(b"USER_AGENT_PARSERS = [\n") for device_parser in regexes["user_agent_parsers"]: fp.write(b" UserAgentParser(\n") fp.write(force_bytes(" %r,\n" % device_parser["regex"])) fp.write(force_bytes(" %r,\n" % device_parser.get("family_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("v1_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("v2_replacement"))) fp.write(b" ),\n") fp.write(b"]\n") fp.write(b"\n") fp.write(b"DEVICE_PARSERS = [\n") for device_parser in regexes["device_parsers"]: fp.write(b" DeviceParser(\n") fp.write(force_bytes(" %r,\n" % device_parser["regex"])) fp.write(force_bytes(" %r,\n" % device_parser.get("regex_flag"))) fp.write(force_bytes(" %r,\n" % device_parser.get("device_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("brand_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("model_replacement"))) fp.write(b" ),\n") fp.write(b"]\n") fp.write(b"\n") fp.write(b"OS_PARSERS = [\n") for device_parser in regexes["os_parsers"]: fp.write(b" OSParser(\n") fp.write(force_bytes(" %r,\n" % device_parser["regex"])) fp.write(force_bytes(" %r,\n" % device_parser.get("os_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("os_v1_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("os_v2_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("os_v3_replacement"))) fp.write(force_bytes(" %r,\n" % device_parser.get("os_v4_replacement"))) fp.write(b" ),\n") fp.write(b"]\n") # fmt: on self.update_manifest() def update_manifest(self): sdist = self.distribution.get_command_obj("sdist") if not sdist.finalized: return sdist.filelist.files.append("ua_parser/_regexes.py") class develop(_develop): def run(self): self.run_command("build_regexes") _develop.run(self) class install(_install): def run(self): self.run_command("build_regexes") _install.run(self) class build(_build): def run(self): self.run_command("build_regexes") _build.run(self) class sdist(_sdist): sub_commands = _sdist.sub_commands + [("build_regexes", None)] cmdclass = { "sdist": sdist, "develop": develop, "build": build, "install": install, "build_regexes": build_regexes, } setup( name="ua-parser", version="0.10.0", description="Python port of Browserscope's user agent parser", author="PBS", author_email="no-reply@pbs.org", packages=["ua_parser"], package_dir={"": "."}, license="Apache 2.0", zip_safe=False, url="https://github.com/ua-parser/uap-python", include_package_data=True, setup_requires=["pyyaml"], install_requires=[], cmdclass=cmdclass, classifiers=[ "Development Status :: 4 - Beta", "Environment :: Web Environment", "Intended Audience :: Developers", "Operating System :: OS Independent", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], ) uap-python-0.10.0/tox.ini000066400000000000000000000012751362560374700152310ustar00rootroot00000000000000[tox] envlist = py27, py36, py37, py38, docs, py27-flake8, py36-flake8, py36-black [testenv] deps = -rrequirements.txt commands = python setup.py develop python ua_parser/user_agent_parser_test.py [testenv:docs] basepython = python2.7 deps = docutils Pygments commands = python setup.py check -s --restructuredtext --metadata [testenv:py27-flake8] basepython = python2.7 deps = flake8 commands = flake8 {posargs} [testenv:py36-flake8] basepython = python3.6 deps = flake8 commands = flake8 {posargs} [testenv:py36-black] basepython = python3.6 deps = black commands = black --check . [flake8] max_line_length = 88 exclude = .git,.tox,dist,docs,_regexes.py,*_test.py,.eggs uap-python-0.10.0/ua_parser/000077500000000000000000000000001362560374700156725ustar00rootroot00000000000000uap-python-0.10.0/ua_parser/LICENSE000066400000000000000000000010461362560374700167000ustar00rootroot00000000000000Copyright 2008 Google Inc. Licensed under the Apache License, Version 2.0 (the 'License') you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.uap-python-0.10.0/ua_parser/__init__.py000066400000000000000000000000251362560374700200000ustar00rootroot00000000000000VERSION = (0, 10, 0) uap-python-0.10.0/ua_parser/user_agent_parser.py000066400000000000000000000430201362560374700217530ustar00rootroot00000000000000# Copyright 2009 Google Inc. # # Licensed under the Apache License, Version 2.0 (the 'License') # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Python implementation of the UA parser.""" from __future__ import absolute_import import os import re __author__ = "Lindsey Simon " class UserAgentParser(object): def __init__( self, pattern, family_replacement=None, v1_replacement=None, v2_replacement=None ): """Initialize UserAgentParser. Args: pattern: a regular expression string family_replacement: a string to override the matched family (optional) v1_replacement: a string to override the matched v1 (optional) v2_replacement: a string to override the matched v2 (optional) """ self.pattern = pattern self.user_agent_re = re.compile(self.pattern) self.family_replacement = family_replacement self.v1_replacement = v1_replacement self.v2_replacement = v2_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): family, v1, v2, v3 = None, None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.family_replacement: if re.search(r"\$1", self.family_replacement): family = re.sub(r"\$1", match.group(1), self.family_replacement) else: family = self.family_replacement else: family = match.group(1) if self.v1_replacement: v1 = self.v1_replacement elif match.lastindex and match.lastindex >= 2: v1 = match.group(2) or None if self.v2_replacement: v2 = self.v2_replacement elif match.lastindex and match.lastindex >= 3: v2 = match.group(3) or None if match.lastindex and match.lastindex >= 4: v3 = match.group(4) or None return family, v1, v2, v3 class OSParser(object): def __init__( self, pattern, os_replacement=None, os_v1_replacement=None, os_v2_replacement=None, os_v3_replacement=None, os_v4_replacement=None, ): """Initialize UserAgentParser. Args: pattern: a regular expression string os_replacement: a string to override the matched os (optional) os_v1_replacement: a string to override the matched v1 (optional) os_v2_replacement: a string to override the matched v2 (optional) os_v3_replacement: a string to override the matched v3 (optional) os_v4_replacement: a string to override the matched v4 (optional) """ self.pattern = pattern self.user_agent_re = re.compile(self.pattern) self.os_replacement = os_replacement self.os_v1_replacement = os_v1_replacement self.os_v2_replacement = os_v2_replacement self.os_v3_replacement = os_v3_replacement self.os_v4_replacement = os_v4_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): os, os_v1, os_v2, os_v3, os_v4 = None, None, None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.os_replacement: os = MultiReplace(self.os_replacement, match) elif match.lastindex: os = match.group(1) if self.os_v1_replacement: os_v1 = MultiReplace(self.os_v1_replacement, match) elif match.lastindex and match.lastindex >= 2: os_v1 = match.group(2) if self.os_v2_replacement: os_v2 = MultiReplace(self.os_v2_replacement, match) elif match.lastindex and match.lastindex >= 3: os_v2 = match.group(3) if self.os_v3_replacement: os_v3 = MultiReplace(self.os_v3_replacement, match) elif match.lastindex and match.lastindex >= 4: os_v3 = match.group(4) if self.os_v4_replacement: os_v4 = MultiReplace(self.os_v4_replacement, match) elif match.lastindex and match.lastindex >= 5: os_v4 = match.group(5) return os, os_v1, os_v2, os_v3, os_v4 def MultiReplace(string, match): def _repl(m): index = int(m.group(1)) - 1 group = match.groups() if index < len(group): return group[index] return "" _string = re.sub(r"\$(\d)", _repl, string) _string = re.sub(r"^\s+|\s+$", "", _string) if _string == "": return None return _string class DeviceParser(object): def __init__( self, pattern, regex_flag=None, device_replacement=None, brand_replacement=None, model_replacement=None, ): """Initialize UserAgentParser. Args: pattern: a regular expression string device_replacement: a string to override the matched device (optional) """ self.pattern = pattern if regex_flag == "i": self.user_agent_re = re.compile(self.pattern, re.IGNORECASE) else: self.user_agent_re = re.compile(self.pattern) self.device_replacement = device_replacement self.brand_replacement = brand_replacement self.model_replacement = model_replacement def MatchSpans(self, user_agent_string): match_spans = [] match = self.user_agent_re.search(user_agent_string) if match: match_spans = [ match.span(group_index) for group_index in range(1, match.lastindex + 1) ] return match_spans def Parse(self, user_agent_string): device, brand, model = None, None, None match = self.user_agent_re.search(user_agent_string) if match: if self.device_replacement: device = MultiReplace(self.device_replacement, match) else: device = match.group(1) if self.brand_replacement: brand = MultiReplace(self.brand_replacement, match) if self.model_replacement: model = MultiReplace(self.model_replacement, match) elif len(match.groups()) > 0: model = match.group(1) return device, brand, model MAX_CACHE_SIZE = 20 _parse_cache = {} def Parse(user_agent_string, **jsParseBits): """ Parse all the things Args: user_agent_string: the full user agent string jsParseBits: javascript override bits Returns: A dictionary containing all parsed bits """ jsParseBits = jsParseBits or {} key = (user_agent_string, repr(jsParseBits)) cached = _parse_cache.get(key) if cached is not None: return cached if len(_parse_cache) > MAX_CACHE_SIZE: _parse_cache.clear() v = { "user_agent": ParseUserAgent(user_agent_string, **jsParseBits), "os": ParseOS(user_agent_string, **jsParseBits), "device": ParseDevice(user_agent_string, **jsParseBits), "string": user_agent_string, } _parse_cache[key] = v return v def ParseUserAgent(user_agent_string, **jsParseBits): """ Parses the user-agent string for user agent (browser) info. Args: user_agent_string: The full user-agent string. jsParseBits: javascript override bits. Returns: A dictionary containing parsed bits. """ if ( "js_user_agent_family" in jsParseBits and jsParseBits["js_user_agent_family"] != "" ): family = jsParseBits["js_user_agent_family"] v1 = jsParseBits.get("js_user_agent_v1") or None v2 = jsParseBits.get("js_user_agent_v2") or None v3 = jsParseBits.get("js_user_agent_v3") or None else: for uaParser in USER_AGENT_PARSERS: family, v1, v2, v3 = uaParser.Parse(user_agent_string) if family: break # Override for Chrome Frame IFF Chrome is enabled. if "js_user_agent_string" in jsParseBits: js_user_agent_string = jsParseBits["js_user_agent_string"] if ( js_user_agent_string and js_user_agent_string.find("Chrome/") > -1 and user_agent_string.find("chromeframe") > -1 ): jsOverride = {} jsOverride = ParseUserAgent(js_user_agent_string) family = "Chrome Frame (%s %s)" % (family, v1) v1 = jsOverride["major"] v2 = jsOverride["minor"] v3 = jsOverride["patch"] family = family or "Other" return { "family": family, "major": v1 or None, "minor": v2 or None, "patch": v3 or None, } def ParseOS(user_agent_string, **jsParseBits): """ Parses the user-agent string for operating system info Args: user_agent_string: The full user-agent string. jsParseBits: javascript override bits. Returns: A dictionary containing parsed bits. """ for osParser in OS_PARSERS: os, os_v1, os_v2, os_v3, os_v4 = osParser.Parse(user_agent_string) if os: break os = os or "Other" return { "family": os, "major": os_v1, "minor": os_v2, "patch": os_v3, "patch_minor": os_v4, } def ParseDevice(user_agent_string): """ Parses the user-agent string for device info. Args: user_agent_string: The full user-agent string. ua_family: The parsed user agent family name. Returns: A dictionary containing parsed bits. """ for deviceParser in DEVICE_PARSERS: device, brand, model = deviceParser.Parse(user_agent_string) if device: break if device is None: device = "Other" return {"family": device, "brand": brand, "model": model} def PrettyUserAgent(family, v1=None, v2=None, v3=None): """Pretty user agent string.""" if v3: if v3[0].isdigit(): return "%s %s.%s.%s" % (family, v1, v2, v3) else: return "%s %s.%s%s" % (family, v1, v2, v3) elif v2: return "%s %s.%s" % (family, v1, v2) elif v1: return "%s %s" % (family, v1) return family def PrettyOS(os, os_v1=None, os_v2=None, os_v3=None, os_v4=None): """Pretty os string.""" if os_v4: return "%s %s.%s.%s.%s" % (os, os_v1, os_v2, os_v3, os_v4) if os_v3: if os_v3[0].isdigit(): return "%s %s.%s.%s" % (os, os_v1, os_v2, os_v3) else: return "%s %s.%s%s" % (os, os_v1, os_v2, os_v3) elif os_v2: return "%s %s.%s" % (os, os_v1, os_v2) elif os_v1: return "%s %s" % (os, os_v1) return os def ParseWithJSOverrides( user_agent_string, js_user_agent_string=None, js_user_agent_family=None, js_user_agent_v1=None, js_user_agent_v2=None, js_user_agent_v3=None, ): """ backwards compatible. use one of the other Parse methods instead! """ # Override via JS properties. if js_user_agent_family is not None and js_user_agent_family != "": family = js_user_agent_family v1 = None v2 = None v3 = None if js_user_agent_v1 is not None: v1 = js_user_agent_v1 if js_user_agent_v2 is not None: v2 = js_user_agent_v2 if js_user_agent_v3 is not None: v3 = js_user_agent_v3 else: for parser in USER_AGENT_PARSERS: family, v1, v2, v3 = parser.Parse(user_agent_string) if family: break # Override for Chrome Frame IFF Chrome is enabled. if ( js_user_agent_string and js_user_agent_string.find("Chrome/") > -1 and user_agent_string.find("chromeframe") > -1 ): family = "Chrome Frame (%s %s)" % (family, v1) ua_dict = ParseUserAgent(js_user_agent_string) v1 = ua_dict["major"] v2 = ua_dict["minor"] v3 = ua_dict["patch"] return family or "Other", v1, v2, v3 def Pretty(family, v1=None, v2=None, v3=None): """ backwards compatible. use PrettyUserAgent instead! """ if v3: if v3[0].isdigit(): return "%s %s.%s.%s" % (family, v1, v2, v3) else: return "%s %s.%s%s" % (family, v1, v2, v3) elif v2: return "%s %s.%s" % (family, v1, v2) elif v1: return "%s %s" % (family, v1) return family def GetFilters( user_agent_string, js_user_agent_string=None, js_user_agent_family=None, js_user_agent_v1=None, js_user_agent_v2=None, js_user_agent_v3=None, ): """Return the optional arguments that should be saved and used to query. js_user_agent_string is always returned if it is present. We really only need it for Chrome Frame. However, I added it in the generally case to find other cases when it is different. When the recording of js_user_agent_string was added, we created new records for all new user agents. Since we only added js_document_mode for the IE 9 preview case, it did not cause new user agent records the way js_user_agent_string did. js_document_mode has since been removed in favor of individual property overrides. Args: user_agent_string: The full user-agent string. js_user_agent_string: JavaScript ua string from client-side js_user_agent_family: This is an override for the family name to deal with the fact that IE platform preview (for instance) cannot be distinguished by user_agent_string, but only in javascript. js_user_agent_v1: v1 override - see above. js_user_agent_v2: v1 override - see above. js_user_agent_v3: v1 override - see above. Returns: {js_user_agent_string: '[...]', js_family_name: '[...]', etc...} """ filters = {} filterdict = { "js_user_agent_string": js_user_agent_string, "js_user_agent_family": js_user_agent_family, "js_user_agent_v1": js_user_agent_v1, "js_user_agent_v2": js_user_agent_v2, "js_user_agent_v3": js_user_agent_v3, } for key, value in filterdict.items(): if value is not None and value != "": filters[key] = value return filters # Build the list of user agent parsers from YAML UA_PARSER_YAML = os.environ.get("UA_PARSER_YAML") if UA_PARSER_YAML: # This will raise an ImportError if missing, obviously since it's no # longer a requirement import yaml try: # Try and use libyaml bindings if available since faster from yaml import CSafeLoader as SafeLoader except ImportError: from yaml import SafeLoader with open(UA_PARSER_YAML) as fp: regexes = yaml.load(fp, Loader=SafeLoader) USER_AGENT_PARSERS = [] for _ua_parser in regexes["user_agent_parsers"]: _regex = _ua_parser["regex"] _family_replacement = _ua_parser.get("family_replacement") _v1_replacement = _ua_parser.get("v1_replacement") _v2_replacement = _ua_parser.get("v2_replacement") USER_AGENT_PARSERS.append( UserAgentParser( _regex, _family_replacement, _v1_replacement, _v2_replacement ) ) OS_PARSERS = [] for _os_parser in regexes["os_parsers"]: _regex = _os_parser["regex"] _os_replacement = _os_parser.get("os_replacement") _os_v1_replacement = _os_parser.get("os_v1_replacement") _os_v2_replacement = _os_parser.get("os_v2_replacement") _os_v3_replacement = _os_parser.get("os_v3_replacement") _os_v4_replacement = _os_parser.get("os_v4_replacement") OS_PARSERS.append( OSParser( _regex, _os_replacement, _os_v1_replacement, _os_v2_replacement, _os_v3_replacement, _os_v4_replacement, ) ) DEVICE_PARSERS = [] for _device_parser in regexes["device_parsers"]: _regex = _device_parser["regex"] _regex_flag = _device_parser.get("regex_flag") _device_replacement = _device_parser.get("device_replacement") _brand_replacement = _device_parser.get("brand_replacement") _model_replacement = _device_parser.get("model_replacement") DEVICE_PARSERS.append( DeviceParser( _regex, _regex_flag, _device_replacement, _brand_replacement, _model_replacement, ) ) # Clean our our temporary vars explicitly # so they can't be reused or imported del regexes del yaml del SafeLoader else: # Just load our pre-compiled versions from ._regexes import USER_AGENT_PARSERS, DEVICE_PARSERS, OS_PARSERS uap-python-0.10.0/ua_parser/user_agent_parser_test.py000066400000000000000000000240631362560374700230200ustar00rootroot00000000000000#!/usr/bin/python2.5 # # Copyright 2008 Google Inc. # # Licensed under the Apache License, Version 2.0 (the 'License') # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """User Agent Parser Unit Tests. Run: # python -m user_agent_parser_test (runs all the tests, takes awhile) or like: # python -m user_agent_parser_test ParseTest.testBrowserscopeStrings """ from __future__ import unicode_literals, absolute_import __author__ = "slamm@google.com (Stephen Lamm)" import os import re import unittest import yaml try: # Try and use libyaml bindings if available since faster from yaml import CSafeLoader as SafeLoader except ImportError: from yaml import SafeLoader from ua_parser import user_agent_parser TEST_RESOURCES_DIR = os.path.join( os.path.abspath(os.path.dirname(__file__)), "../uap-core" ) class ParseTest(unittest.TestCase): def testBrowserscopeStrings(self): self.runUserAgentTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "tests/test_ua.yaml") ) def testBrowserscopeStringsOS(self): self.runOSTestsFromYAML(os.path.join(TEST_RESOURCES_DIR, "tests/test_os.yaml")) def testStringsOS(self): self.runOSTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "test_resources/additional_os_tests.yaml") ) def testStringsDevice(self): self.runDeviceTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "tests/test_device.yaml") ) def testMozillaStrings(self): self.runUserAgentTestsFromYAML( os.path.join( TEST_RESOURCES_DIR, "test_resources/firefox_user_agent_strings.yaml" ) ) # NOTE: The YAML file used here is one output by makePGTSComparisonYAML() # below, as opposed to the pgts_browser_list-orig.yaml file. The -orig # file is by no means perfect, but identifies many browsers that we # classify as "Other". This test itself is mostly useful to know when # somthing in UA parsing changes. An effort should be made to try and # reconcile the differences between the two YAML files. def testPGTSStrings(self): self.runUserAgentTestsFromYAML( os.path.join(TEST_RESOURCES_DIR, "test_resources/pgts_browser_list.yaml") ) def testParseAll(self): user_agent_string = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; fr; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5,gzip(gfe),gzip(gfe)" expected = { "device": {"family": "Mac", "brand": "Apple", "model": "Mac"}, "os": { "family": "Mac OS X", "major": "10", "minor": "4", "patch": None, "patch_minor": None, }, "user_agent": { "family": "Firefox", "major": "3", "minor": "5", "patch": "5", }, "string": user_agent_string, } result = user_agent_parser.Parse(user_agent_string) self.assertEqual( result, expected, "UA: {0}\n expected<{1}> != actual<{2}>".format( user_agent_string, expected, result ), ) # Make a YAML file for manual comparsion with pgts_browser_list-orig.yaml def makePGTSComparisonYAML(self): import codecs outfile = codecs.open("outfile.yaml", "w", "utf-8") print >> outfile, "test_cases:" yamlFile = open(os.path.join(TEST_RESOURCES_DIR, "pgts_browser_list.yaml")) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) (family, major, minor, patch) = user_agent_parser.ParseUserAgent( user_agent_string, **kwds ) # Escape any double-quotes in the UA string user_agent_string = re.sub(r'"', '\\"', user_agent_string) print >> outfile, ' - user_agent_string: "' + user_agent_string + '"' + "\n" + ' family: "' + family + '"\n' + " major: " + ( "" if (major is None) else "'" + major + "'" ) + "\n" + " minor: " + ( "" if (minor is None) else "'" + minor + "'" ) + "\n" + " patch: " + ( "" if (patch is None) else "'" + patch + "'" ) outfile.close() # Run a set of test cases from a YAML file def runUserAgentTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) # The expected results expected = { "family": test_case["family"], "major": test_case["major"], "minor": test_case["minor"], "patch": test_case["patch"], } result = {} result = user_agent_parser.ParseUserAgent(user_agent_string, **kwds) self.assertEqual( result, expected, "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format( user_agent_string, expected["family"], expected["major"], expected["minor"], expected["patch"], result["family"], result["major"], result["minor"], result["patch"], ), ) def runOSTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) # The expected results expected = { "family": test_case["family"], "major": test_case["major"], "minor": test_case["minor"], "patch": test_case["patch"], "patch_minor": test_case["patch_minor"], } result = user_agent_parser.ParseOS(user_agent_string, **kwds) self.assertEqual( result, expected, "UA: {0}\n expected<{1} {2} {3} {4} {5}> != actual<{6} {7} {8} {9} {10}>".format( user_agent_string, expected["family"], expected["major"], expected["minor"], expected["patch"], expected["patch_minor"], result["family"], result["major"], result["minor"], result["patch"], result["patch_minor"], ), ) def runDeviceTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) # The expected results expected = { "family": test_case["family"], "brand": test_case["brand"], "model": test_case["model"], } result = user_agent_parser.ParseDevice(user_agent_string, **kwds) self.assertEqual( result, expected, "UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format( user_agent_string, expected["family"], expected["brand"], expected["model"], result["family"], result["brand"], result["model"], ), ) class GetFiltersTest(unittest.TestCase): def testGetFiltersNoMatchesGiveEmptyDict(self): user_agent_string = "foo" filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string=None ) self.assertEqual({}, filters) def testGetFiltersJsUaPassedThrough(self): user_agent_string = "foo" filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string="bar" ) self.assertEqual({"js_user_agent_string": "bar"}, filters) def testGetFiltersJsUserAgentFamilyAndVersions(self): user_agent_string = ( "Mozilla/4.0 (compatible; MSIE 8.0; " "Windows NT 5.1; Trident/4.0; GTB6; .NET CLR 2.0.50727; " ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)" ) filters = user_agent_parser.GetFilters( user_agent_string, js_user_agent_string="bar", js_user_agent_family="foo" ) self.assertEqual( {"js_user_agent_string": "bar", "js_user_agent_family": "foo"}, filters ) if __name__ == "__main__": unittest.main() uap-python-0.10.0/uap-core/000077500000000000000000000000001362560374700154245ustar00rootroot00000000000000