MechanicalSoup-0.10.0/0000775000175000017500000000000013235454600014335 5ustar danhdanh00000000000000MechanicalSoup-0.10.0/mechanicalsoup/0000775000175000017500000000000013235454600017330 5ustar danhdanh00000000000000MechanicalSoup-0.10.0/mechanicalsoup/stateful_browser.py0000664000175000017500000003445113235454401023302 0ustar danhdanh00000000000000from __future__ import print_function from six.moves import urllib from .browser import Browser from .utils import LinkNotFoundError from .form import Form import sys import re import bs4 class _BrowserState: def __init__(self, page=None, url=None, form=None, request=None): self.page = page self.url = url self.form = form self.request = request class StatefulBrowser(Browser): """An extension of :class:`Browser` that stores the browser's state and provides many convenient functions for interacting with HTML elements. It is the primary tool in MechanicalSoup for interfacing with websites. :param session: Attach a pre-existing requests Session instead of constructing a new one. :param soup_config: Configuration passed to BeautifulSoup to affect the way HTML is parsed. Defaults to ``{'features': 'lxml'}``. If overriden, it is highly recommended to `specify a parser `__. Otherwise, BeautifulSoup will issue a warning and pick one for you, but the parser it chooses may be different on different machines. :param requests_adapters: Configuration passed to requests, to affect the way HTTP requests are performed. :param raise_on_404: If True, raise :class:`LinkNotFoundError` when visiting a page triggers a 404 Not Found error. :param user_agent: Set the user agent header to this value. All arguments are forwarded to :func:`Browser`. Examples :: browser = mechanicalsoup.StatefulBrowser( soup_config={'features': 'lxml'}, # Use the lxml HTML parser raise_on_404=True, user_agent='MyBot/0.1: mysite.example.com/bot_info', ) browser.open(url) # ... browser.close() Once not used anymore, the browser can be closed using :func:`~Browser.close`. """ def __init__(self, *args, **kwargs): super(StatefulBrowser, self).__init__(*args, **kwargs) self.__debug = False self.__verbose = 0 self.__state = _BrowserState() def set_debug(self, debug): """Set the debug mode (off by default). Set to True to enable debug mode. When active, some actions will launch a browser on the current page on failure to let you inspect the page content. """ self.__debug = debug def get_debug(self): """Get the debug mode (off by default).""" return self.__debug def set_verbose(self, verbose): """Set the verbosity level (an integer). * 0 means no verbose output. * 1 shows one dot per visited page (looks like a progress bar) * >= 1 shows each visited URL. """ self.__verbose = verbose def get_verbose(self): """Get the verbosity level. See :func:`set_verbose()`.""" return self.__verbose def get_url(self): """Get the URL of the currently visited page.""" return self.__state.url def get_current_form(self): """Get the currently selected form as a :class:`Form` object. See :func:`select_form`. """ return self.__state.form def __setitem__(self, name, value): """Call item assignment on the currently selected form. See :func:`Form.__setitem__`. """ self.get_current_form()[name] = value def new_control(self, type, name, value, **kwargs): """Call :func:`Form.new_control` on the currently selected form.""" return self.get_current_form().new_control(type, name, value, **kwargs) def get_current_page(self): """Get the current page as a soup object.""" return self.__state.page def absolute_url(self, url): """Return the absolute URL made from the current URL and ``url``. The current URL is only used to provide any missing components of ``url``, as in the `.urljoin() method of urllib.parse `__. """ return urllib.parse.urljoin(self.get_url(), url) def open(self, url, *args, **kwargs): """Open the URL and store the Browser's state in this object. All arguments are forwarded to :func:`Browser.get`. :return: Forwarded from :func:`Browser.get`. """ if self.__verbose == 1: sys.stdout.write('.') sys.stdout.flush() elif self.__verbose >= 2: print(url) resp = self.get(url, *args, **kwargs) self.__state = _BrowserState(page=resp.soup, url=resp.url, request=resp.request) return resp def open_fake_page(self, page_text, url=None, soup_config=None): """Mock version of :func:`open`. Behave as if opening a page whose text is ``page_text``, but do not perform any network access. If ``url`` is set, pretend it is the page's URL. Useful mainly for testing. """ soup_config = soup_config or self.soup_config self.__state = _BrowserState( page=bs4.BeautifulSoup(page_text, **soup_config), url=url) def open_relative(self, url, *args, **kwargs): """Like :func:`open`, but ``url`` can be relative to the currently visited page. """ return self.open(self.absolute_url(url), *args, **kwargs) def refresh(self): """Reload the current page with the same request as originally done. Any change (`select_form`, or any value filled-in in the form) made to the current page before refresh is discarded. :raise ValueError: Raised if no refreshable page is loaded, e.g., when using the shallow ``Browser`` wrapper functions. :return: Response of the request.""" old_request = self.__state.request if old_request is None: raise ValueError('The current page is not refreshable. Either no ' 'page is opened or low-level browser methods ' 'were used to do so') resp = self.session.send(old_request) Browser.add_soup(resp, self.soup_config) self.__state = _BrowserState(page=resp.soup, url=resp.url, request=resp.request) return resp def select_form(self, selector="form", nr=0): """Select a form in the current page. :param selector: CSS selector or a bs4.element.Tag object to identify the form to select. If not specified, ``selector`` defaults to "form", which is useful if, e.g., there is only one form on the page. For ``selector`` syntax, see the `.select() method in BeautifulSoup `__. :param nr: A zero-based index specifying which form among those that match ``selector`` will be selected. Useful when one or more forms have the same attributes as the form you want to select, and its position on the page is the only way to uniquely identify it. Default is the first matching form (``nr=0``). :return: The selected form as a soup object. It can also be retrieved later with :func:`get_current_form`. """ if isinstance(selector, bs4.element.Tag): if selector.name != "form": raise LinkNotFoundError() self.__state.form = Form(selector) else: # nr is a 0-based index for consistency with mechanize found_forms = self.get_current_page().select(selector, limit=nr + 1) if len(found_forms) != nr + 1: if self.__debug: print('select_form failed for', selector) self.launch_browser() raise LinkNotFoundError() self.__state.form = Form(found_forms[-1]) return self.get_current_form() def submit_selected(self, btnName=None, *args, **kwargs): """Submit the form that was selected with :func:`select_form`. :return: Forwarded from :func:`Browser.submit`. If there are multiple submit input/button elements, passes ``btnName`` to :func:`Form.choose_submit` on the current form to choose between them. All other arguments are forwarded to :func:`Browser.submit`. """ if btnName is not None: self.get_current_form().choose_submit(btnName) referer = self.get_url() if referer is not None: if 'headers' in kwargs: kwargs['headers']['Referer'] = referer else: kwargs['headers'] = {'Referer': referer} resp = self.submit(self.__state.form, url=self.__state.url, *args, **kwargs) self.__state = _BrowserState(page=resp.soup, url=resp.url, request=resp.request) return resp def list_links(self, *args, **kwargs): """Display the list of links in the current page. Arguments are forwarded to :func:`links`. """ print("Links in the current page:") for l in self.links(*args, **kwargs): print(" ", l) def links(self, url_regex=None, link_text=None, *args, **kwargs): """Return links in the page, as a list of bs4.element.Tag objects. To return links matching specific criteria, specify ``url_regex`` to match the *href*-attribute, or ``link_text`` to match the *text*-attribute of the Tag. All other arguments are forwarded to the `.find_all() method in BeautifulSoup `__. """ all_links = self.get_current_page().find_all( 'a', href=True, *args, **kwargs) if url_regex is not None: all_links = [a for a in all_links if re.search(url_regex, a['href'])] if link_text is not None: all_links = [a for a in all_links if a.text == link_text] return all_links def find_link(self, *args, **kwargs): """Find and return a link, as a bs4.element.Tag object. The search can be refined by specifying any argument that is accepted by :func:`links`. If several links match, return the first one found. If no link is found, raise :class:`LinkNotFoundError`. """ links = self.links(*args, **kwargs) if len(links) == 0: raise LinkNotFoundError() else: return links[0] def _find_link_internal(self, link, args, kwargs): """Wrapper around find_link that deals with convenience special-cases: * If ``link`` has an *href*-attribute, then return it. If not, consider it as a ``url_regex`` argument. * If searching for the link fails and debug is active, launch a browser. """ if hasattr(link, 'attrs') and 'href' in link.attrs: return link # Check if "link" parameter should be treated as "url_regex" # but reject obtaining it from both places. if link and 'url_regex' in kwargs: raise ValueError('link parameter cannot be treated as ' 'url_regex because url_regex is already ' 'present in keyword arguments') else: kwargs['url_regex'] = link try: return self.find_link(*args, **kwargs) except LinkNotFoundError: if self.get_debug(): print('find_link failed for', kwargs) self.list_links() self.launch_browser() raise def follow_link(self, link=None, *args, **kwargs): """Follow a link. If ``link`` is a bs4.element.Tag (i.e. from a previous call to :func:`links` or :func:`find_link`), then follow the link. If ``link`` doesn't have a *href*-attribute or is None, treat ``link`` as a url_regex and look it up with :func:`find_link`. Any additional arguments specified are forwarded to this function. If the link is not found, raise :class:`LinkNotFoundError`. Before raising, if debug is activated, list available links in the page and launch a browser. :return: Forwarded from :func:`open_relative`. """ link = self._find_link_internal(link, args, kwargs) referer = self.get_url() headers = {'Referer': referer} if referer else None return self.open_relative(link['href'], headers=headers) def download_link(self, link=None, file=None, *args, **kwargs): """Downloads the contents of a link to a file. This function behaves similarly to :func:`follow_link`, but the browser state will not change when calling this function. :param file: Filesystem path where the page contents will be downloaded. If the file already exists, it will be overwritten. Other arguments are the same as :func:`follow_link` (``link`` can either be a bs4.element.Tag or a URL regex, other arguments are forwarded to :func:`find_link`). :return: `requests.Response `__ object. """ link = self._find_link_internal(link, args, kwargs) url = self.absolute_url(link['href']) referer = self.get_url() headers = {'Referer': referer} if referer else None response = self.session.get(url, headers=headers) if self.raise_on_404 and response.status_code == 404: raise LinkNotFoundError() # Save the response content to file if file is not None: with open(file, 'wb') as f: f.write(response.content) return response def launch_browser(self, soup=None): """Launch a browser to display a page, for debugging purposes. :param: soup: Page contents to display, supplied as a bs4 soup object. Defaults to the current page of the ``StatefulBrowser`` instance. """ if soup is None: soup = self.get_current_page() super(StatefulBrowser, self).launch_browser(soup) MechanicalSoup-0.10.0/mechanicalsoup/utils.py0000664000175000017500000000103613235454401021041 0ustar danhdanh00000000000000class LinkNotFoundError(BaseException): """Exception raised when mechanicalsoup fails to find something. This happens in situations like (non-exhaustive list): * :func:`~mechanicalsoup.StatefulBrowser.find_link` is called, but no link is found. * The browser was configured with raise_on_404=True and a 404 error is triggered while browsing. * The user tried to fill-in a field which doesn't exist in a form (e.g. browser["name"] = "val" with browser being a StatefulBrowser). """ pass MechanicalSoup-0.10.0/mechanicalsoup/form.py0000664000175000017500000003242413235454401020651 0ustar danhdanh00000000000000from __future__ import print_function import copy from .utils import LinkNotFoundError from bs4 import BeautifulSoup class InvalidFormMethod(LinkNotFoundError): """This exception is raised when a method of :class:`Form` is used for an HTML element that is of the wrong type (or is malformed). It is caught within :func:`Form.set` to perform element type deduction. It is derived from :class:`LinkNotFoundError` so that a single base class can be used to catch all exceptions specific to this module. """ pass class Form(object): """Build a fillable form. :param form: A bs4.element.Tag corresponding to an HTML form element. The Form class is responsible for preparing HTML forms for submission. It handles the following types of elements: input (text, checkbox, radio), select, and textarea. Each type is set by a method named after the type (e.g. :func:`~Form.set_select`), and then there are convenience methods (e.g. :func:`~Form.set`) that do type-deduction and set the value using the appropriate method. It also handles submit-type elements using :func:`~Form.choose_submit`. """ def __init__(self, form): self.form = form # Aliases for backwards compatibility # (Included specifically in __init__ to suppress them in Sphinx docs) self.attach = self.set_input self.input = self.set_input self.textarea = self.set_textarea def set_input(self, data): """Fill-in a set of fields in a form. Example: filling-in a login/password form .. code-block:: python form.set_input({"login": username, "password": password}) This will find the input element named "login" and give it the value ``username``, and the input element named "password" and give it the value ``password``. """ for (name, value) in data.items(): i = self.form.find("input", {"name": name}) if not i: raise InvalidFormMethod("No input field named " + name) i["value"] = value def uncheck_all(self, name): """Remove the *checked*-attribute of all input elements with a *name*-attribute given by ``name``. """ for option in self.form.find_all("input", {"name": name}): if "checked" in option.attrs: del option.attrs["checked"] def check(self, data): """For backwards compatibility, this method handles checkboxes and radio buttons in a single call. It will not uncheck any checkboxes unless explicitly specified by ``data``, in contrast with the default behavior of :func:`~Form.set_checkbox`. """ for (name, value) in data.items(): try: self.set_checkbox({name: value}, uncheck_other_boxes=False) continue except InvalidFormMethod: pass try: self.set_radio({name: value}) continue except InvalidFormMethod: pass raise LinkNotFoundError("No input checkbox/radio named " + name) def set_checkbox(self, data, uncheck_other_boxes=True): """Set the *checked*-attribute of input elements of type "checkbox" specified by ``data`` (i.e. check boxes). :param data: Dict of ``{name: value, ...}``. In the family of checkboxes whose *name*-attribute is ``name``, check the box whose *value*-attribute is ``value``. All boxes in the family can be checked (unchecked) if ``value`` is True (False). To check multiple specific boxes, let ``value`` be a tuple or list. :param uncheck_other_boxes: If True (default), before checking any boxes specified by ``data``, uncheck the entire checkbox family. Consider setting to False if some boxes are checked by default when the HTML is served. """ for (name, value) in data.items(): checkboxes = self.form.find_all("input", {"name": name}, type="checkbox") if not checkboxes: raise InvalidFormMethod("No input checkbox named " + name) # uncheck if requested if uncheck_other_boxes: self.uncheck_all(name) # Wrap individual values (e.g. int, str) in a 1-element tuple. if not isinstance(value, list) and not isinstance(value, tuple): value = (value,) # Check or uncheck one or more boxes for choice in value: choice_str = str(choice) # Allow for example literal numbers for checkbox in checkboxes: if checkbox.attrs.get("value", "on") == choice_str: checkbox["checked"] = "" break # Allow specifying True or False to check/uncheck elif choice is True: checkbox["checked"] = "" break elif choice is False: if "checked" in checkbox.attrs: del checkbox.attrs["checked"] break else: raise LinkNotFoundError( "No input checkbox named %s with choice %s" % (name, choice) ) def set_radio(self, data): """Set the *checked*-attribute of input elements of type "radio" specified by ``data`` (i.e. select radio buttons). :param data: Dict of ``{name: value, ...}``. In the family of radio buttons whose *name*-attribute is ``name``, check the radio button whose *value*-attribute is ``value``. Only one radio button in the family can be checked. """ for (name, value) in data.items(): radios = self.form.find_all("input", {"name": name}, type="radio") if not radios: raise InvalidFormMethod("No input radio named " + name) # only one radio button can be checked self.uncheck_all(name) # Check the appropriate radio button (value cannot be a list/tuple) for radio in radios: if radio.attrs.get("value", "on") == str(value): radio["checked"] = "" break else: raise LinkNotFoundError( "No input radio named %s with choice %s" % (name, value) ) def set_textarea(self, data): """Set the *string*-attribute of the first textarea element specified by ``data`` (i.e. set the text of a textarea). :param data: Dict of ``{name: value, ...}``. The textarea whose *name*-attribute is ``name`` will have its *string*-attribute set to ``value``. """ for (name, value) in data.items(): t = self.form.find("textarea", {"name": name}) if not t: raise InvalidFormMethod("No textarea named " + name) t.string = value def set_select(self, data): """Set the *selected*-attribute of the first option element specified by ``data`` (i.e. select an option from a dropdown). :param data: Dict of ``{name: value, ...}``. Find the select element whose *name*-attribute is ``name``. Then select from among its children the option element whose *value*-attribute is ``value``. If the select element's *multiple*-attribute is set, then ``value`` can be a list or tuple to select multiple options. """ for (name, value) in data.items(): select = self.form.find("select", {"name": name}) if not select: raise InvalidFormMethod("No select named " + name) # Deselect all options first for option in select.find_all("option"): if "selected" in option.attrs: del option.attrs["selected"] # Wrap individual values in a 1-element tuple. # If value is a list/tuple, select must be a ``) will be added using :func:`~Form.new_control`. Example: filling-in a login/password form with EULA checkbox .. code-block:: python form.set("login", username) form.set("password", password) form.set("eula-checkbox", True) Example: uploading a file through a ```` field (provide the path to the local file, and its content will be uploaded): .. code-block:: python form.set("tagname") = path_to_local_file """ for func in ("checkbox", "radio", "input", "textarea", "select"): try: getattr(self, "set_" + func)({name: value}) return except InvalidFormMethod: pass if force: self.new_control('text', name, value=value) return raise LinkNotFoundError("No valid element named " + name) def new_control(self, type, name, value, **kwargs): """Add a new input element to the form. The arguments set the attributes of the new element. """ old_input = self.form.find_all('input', {'name': name}) for old in old_input: old.decompose() old_textarea = self.form.find_all('textarea', {'name': name}) for old in old_textarea: old.decompose() # We don't have access to the original soup object (just the # Tag), so we instantiate a new BeautifulSoup() to call # new_tag(). We're only building the soup object, not parsing # anything, so the parser doesn't matter. Specify the one # included in Python to avoid having dependency issue. control = BeautifulSoup("", "html.parser").new_tag('input') control['type'] = type control['name'] = name control['value'] = value for k, v in kwargs.items(): control[k] = v self.form.append(control) return control def choose_submit(self, submit): """Selects the input (or button) element to use for form submission. :param submit: The bs4.element.Tag (or just its *name*-attribute) that identifies the submit element to use. To simulate a normal web browser, only one submit element must be sent. Therefore, this does not need to be called if there is only one submit element in the form. If the element is not found or if multiple elements match, raise a :class:`LinkNotFoundError` exception. Example: :: browser = mechanicalsoup.StatefulBrowser() browser.open(url) form = browser.select_form() form.choose_submit('form_name_attr') browser.submit_selected() """ found = False inps = self.form.select('input[type="submit"], button[type="submit"]') for inp in inps: if inp == submit or (inp.has_attr('name') and inp['name'] == submit): if found: raise LinkNotFoundError( "Multiple submit elements match: {0}".format(submit) ) found = True continue del inp['name'] if not found: raise LinkNotFoundError( "Specified submit element not found: {0}".format(submit) ) def print_summary(self): """Print a summary of the form. May help finding which fields need to be filled-in. """ for input in self.form.find_all( ("input", "textarea", "select")): input_copy = copy.copy(input) # Text between the opening tag and the closing tag often # contains a lot of spaces that we don't want here. for subtag in input_copy.find_all() + [input_copy]: if subtag.string: subtag.string = subtag.string.strip() print(input_copy) MechanicalSoup-0.10.0/mechanicalsoup/__version__.py0000664000175000017500000000041513235454505022167 0ustar danhdanh00000000000000__title__ = 'MechanicalSoup' __description__ = 'A Python library for automating interaction with websites' __url__ = 'https://mechanicalsoup.readthedocs.io/' __github_url__ = 'https://github.com/MechanicalSoup/MechanicalSoup' __version__ = '0.10.0' __license__ = 'MIT' MechanicalSoup-0.10.0/mechanicalsoup/__init__.py0000664000175000017500000000046513235454401021445 0ustar danhdanh00000000000000from .utils import LinkNotFoundError from .browser import Browser from .form import Form, InvalidFormMethod from .stateful_browser import StatefulBrowser from .__version__ import __version__ __all__ = ['StatefulBrowser', 'LinkNotFoundError', 'Browser', 'Form', 'InvalidFormMethod', '__version__'] MechanicalSoup-0.10.0/mechanicalsoup/browser.py0000664000175000017500000002224513235454401021371 0ustar danhdanh00000000000000import requests import bs4 from six.moves import urllib from six import string_types from .form import Form import webbrowser import tempfile from .utils import LinkNotFoundError from .__version__ import __version__, __title__ import weakref class Browser(object): """Builds a Browser. :param session: Attach a pre-existing requests Session instead of constructing a new one. :param soup_config: Configuration passed to BeautifulSoup to affect the way HTML is parsed. Defaults to ``{'features': 'lxml'}``. If overriden, it is highly recommended to `specify a parser `__. Otherwise, BeautifulSoup will issue a warning and pick one for you, but the parser it chooses may be different on different machines. :param requests_adapters: Configuration passed to requests, to affect the way HTTP requests are performed. :param raise_on_404: If True, raise :class:`LinkNotFoundError` when visiting a page triggers a 404 Not Found error. :param user_agent: Set the user agent header to this value. See also: :func:`StatefulBrowser` """ def __init__(self, session=None, soup_config={'features': 'lxml'}, requests_adapters=None, raise_on_404=False, user_agent=None): self.raise_on_404 = raise_on_404 self.session = session or requests.Session() if hasattr(weakref, 'finalize'): self._finalize = weakref.finalize(self.session, self.close) else: # pragma: no cover # Python < 3 does not have weakref.finalize, but these # versions accept calling session.close() within __del__ self._finalize = self.close self.set_user_agent(user_agent) if requests_adapters is not None: for adaptee, adapter in requests_adapters.items(): self.session.mount(adaptee, adapter) self.soup_config = soup_config or dict() @staticmethod def add_soup(response, soup_config): """Attaches a soup object to a requests response.""" if "text/html" in response.headers.get("Content-Type", ""): response.soup = bs4.BeautifulSoup(response.content, **soup_config) else: response.soup = None def set_cookiejar(self, cookiejar): """Replaces the current cookiejar in the requests session. Since the session handles cookies automatically without calling this function, only use this when default cookie handling is insufficient. :param cookiejar: Any `cookielib.CookieJar `__ compatible object. """ self.session.cookies = cookiejar def get_cookiejar(self): """Gets the cookiejar from the requests session.""" return self.session.cookies def set_user_agent(self, user_agent): """Replaces the current user agent in the requests session headers.""" # set a default user_agent if not specified if user_agent is None: requests_ua = requests.utils.default_user_agent() user_agent = '%s (%s/%s)' % (requests_ua, __title__, __version__) # the requests module uses a case-insensitive dict for session headers self.session.headers['User-agent'] = user_agent def request(self, *args, **kwargs): """Straightforward wrapper around `requests.Session.request `__. :return: `requests.Response `__ object with a *soup*-attribute added by :func:`add_soup`. This is a low-level function that should not be called for basic usage (use :func:`get` or :func:`post` instead). Use it if you need an HTTP verb that MechanicalSoup doesn't manage (e.g. MKCOL) for example. """ response = self.session.request(*args, **kwargs) Browser.add_soup(response, self.soup_config) return response def get(self, *args, **kwargs): """Straightforward wrapper around `requests.Session.get `__. :return: `requests.Response `__ object with a *soup*-attribute added by :func:`add_soup`. """ response = self.session.get(*args, **kwargs) if self.raise_on_404 and response.status_code == 404: raise LinkNotFoundError() Browser.add_soup(response, self.soup_config) return response def post(self, *args, **kwargs): """Straightforward wrapper around `requests.Session.post `__. :return: `requests.Response `__ object with a *soup*-attribute added by :func:`add_soup`. """ response = self.session.post(*args, **kwargs) Browser.add_soup(response, self.soup_config) return response def _request(self, form, url=None, **kwargs): method = str(form.get("method", "get")) action = form.get("action") url = urllib.parse.urljoin(url, action) if url is None: # This happens when both `action` and `url` are None. raise ValueError('no URL to submit to') # read http://www.w3.org/TR/html5/forms.html data = kwargs.pop("data", dict()) files = kwargs.pop("files", dict()) for input in form.select("input[name], button[name]"): name = input.get("name") if input.get("type") in ("radio", "checkbox"): if "checked" not in input.attrs: continue value = input.get("value", "on") else: # web browsers use empty string for inputs with missing values value = input.get("value", "") if input.get("type") == "checkbox": data.setdefault(name, []).append(value) elif input.get("type") == "file": # read http://www.cs.tut.fi/~jkorpela/forms/file.html # in web browsers, file upload only happens if the form"s (or # submit button"s) enctype attribute is set to # "multipart/form-data". we don"t care, simplify. if not value: continue if isinstance(value, string_types): value = open(value, "rb") files[name] = value else: data[name] = value for textarea in form.select("textarea"): name = textarea.get("name") if not name: continue data[name] = textarea.text for select in form.select("select"): name = select.get("name") if not name: continue multiple = "multiple" in select.attrs values = [] for i, option in enumerate(select.select("option")): if (i == 0 and not multiple) or "selected" in option.attrs: values.append(option.get("value", "")) if multiple: data[name] = values elif values: data[name] = values[-1] if method.lower() == "get": kwargs["params"] = data else: kwargs["data"] = data return self.session.request(method, url, files=files, **kwargs) def submit(self, form, url=None, **kwargs): """Prepares and sends a form request. :param form: The filled-out form. :param url: URL of the page the form is on. If the form action is a relative path, then this must be specified. :param \*\*kwargs: Arguments forwarded to `requests.Session.request `__. :return: `requests.Response `__ object with a *soup*-attribute added by :func:`add_soup`. """ if isinstance(form, Form): form = form.form response = self._request(form, url, **kwargs) Browser.add_soup(response, self.soup_config) return response def launch_browser(self, soup): """Launch a browser to display a page, for debugging purposes. :param: soup: Page contents to display, supplied as a bs4 soup object. """ with tempfile.NamedTemporaryFile(delete=False) as file: file.write(soup.encode()) webbrowser.open('file://' + file.name) def close(self): """Close the current session, if still open.""" if self.session is not None: self.session.cookies.clear() self.session.close() self.session = None def __del__(self): self._finalize() def __enter__(self): return self def __exit__(self, *args): self.close() MechanicalSoup-0.10.0/tests/0000775000175000017500000000000013235454600015477 5ustar danhdanh00000000000000MechanicalSoup-0.10.0/tests/test_browser.py0000664000175000017500000001214313235454401020573 0ustar danhdanh00000000000000import setpath # noqa:F401, must come before 'import mechanicalsoup' import mechanicalsoup import sys from bs4 import BeautifulSoup import tempfile from requests.cookies import RequestsCookieJar import pytest def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get(httpbin + "/forms/post") form = page.soup.form form.find("input", {"name": "custname"})["value"] = "Philip J. Fry" # leave custtel blank without value assert "value" not in form.find("input", {"name": "custtel"}).attrs form.find("input", {"name": "size", "value": "medium"})["checked"] = "" form.find("input", {"name": "topping", "value": "cheese"})["checked"] = "" form.find("input", {"name": "topping", "value": "onion"})["checked"] = "" form.find("textarea", {"name": "comments"}).insert(0, "freezer") response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer" assert json["headers"]["User-Agent"].startswith('python-requests/') assert 'MechanicalSoup' in json["headers"]["User-Agent"] form_html = """
Pizza Size

Small

Medium

Large

Pizza Toppings

Bacon

Extra Cheese

Onion

Mushroom

""" def test__request(): form = BeautifulSoup(form_html, "lxml").form browser = mechanicalsoup.Browser() response = browser._request(form) data = response.json()['form'] assert data["customer"] == "Philip J. Fry" assert data["telephone"] == "555" assert data["comments"] == "freezer" assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["shape"] == "square" assert "application/x-www-form-urlencoded" in response.request.headers[ "Content-Type"] def test__request_file(): form = BeautifulSoup(form_html, "lxml").form # create a temporary file for testing file upload pic_path = tempfile.mkstemp()[1] with open(pic_path, "w") as f: f.write(":-)") form.find("input", {"name": "pic"})["value"] = pic_path browser = mechanicalsoup.Browser() response = browser._request(form) # Check that only "files" includes a "pic" keyword in the response for key, value in response.json().items(): if key == "files": assert value["pic"] == ":-)" else: assert (value is None) or ("pic" not in value) assert "multipart/form-data" in response.request.headers["Content-Type"] def test_no_404(httpbin): browser = mechanicalsoup.Browser() resp = browser.get(httpbin + "/nosuchpage") assert resp.status_code == 404 def test_404(httpbin): browser = mechanicalsoup.Browser(raise_on_404=True) with pytest.raises(mechanicalsoup.LinkNotFoundError): resp = browser.get(httpbin + "/nosuchpage") resp = browser.get(httpbin.url) assert resp.status_code == 200 def test_set_cookiejar(httpbin): """Set cookies locally and test that they are received remotely.""" # construct a phony cookiejar and attach it to the session jar = RequestsCookieJar() jar.set('field', 'value') assert jar.get('field') == 'value' browser = mechanicalsoup.Browser() browser.set_cookiejar(jar) resp = browser.get(httpbin + "/cookies") assert resp.json() == {'cookies': {'field': 'value'}} def test_get_cookiejar(httpbin): """Test that cookies set by the remote host update our session.""" browser = mechanicalsoup.Browser() resp = browser.get(httpbin + "/cookies/set?k1=v1&k2=v2") assert resp.json() == {'cookies': {'k1': 'v1', 'k2': 'v2'}} jar = browser.get_cookiejar() assert jar.get('k1') == 'v1' assert jar.get('k2') == 'v2' def test_post(httpbin): browser = mechanicalsoup.Browser() data = {'color': 'blue', 'colorblind': 'True'} resp = browser.post(httpbin + "/post", data) assert(resp.status_code == 200 and resp.json()['form'] == data) if __name__ == '__main__': pytest.main(sys.argv) MechanicalSoup-0.10.0/tests/conftest.py0000664000175000017500000000031013235454401017667 0ustar danhdanh00000000000000import pytest # This file is automatically discovered by pytest to define # shared fixtures only once. @pytest.fixture def httpbin(): from utils import HttpbinRemote return HttpbinRemote() MechanicalSoup-0.10.0/tests/utils.py0000664000175000017500000000447313235454401017220 0ustar danhdanh00000000000000import mechanicalsoup import requests_mock try: from urllib.parse import parse_qsl except ImportError: from urlparse import parse_qsl """ Utilities for testing MechanicalSoup. """ choose_submit_form = '''
''' def setup_mock_browser(expected_post=None, text=choose_submit_form): url = 'mock://form.com' browser, mock = prepare_mock_browser() mock_get(mock, url, text) if expected_post is not None: mock_post(mock, url + '/post', expected_post) return browser, url def prepare_mock_browser(scheme='mock'): mock = requests_mock.Adapter() browser = mechanicalsoup.StatefulBrowser(requests_adapters={scheme: mock}) return browser, mock def mock_get(mocked_adapter, url, reply, content_type='text/html', **kwargs): headers = {'Content-Type': content_type} mocked_adapter.register_uri('GET', url, headers=headers, text=reply, **kwargs) def mock_post(mocked_adapter, url, expected, reply='Success!'): def text_callback(request, context): # Python 2's parse_qsl doesn't like None argument query = parse_qsl(request.text) if request.text else () assert (set(query) == set(expected)) return reply mocked_adapter.register_uri('POST', url, text=text_callback) class HttpbinRemote: """Drop-in replacement for pytest-httpbin's httpbin fixture that uses the remote httpbin server instead of a local one.""" def __init__(self): self.url = "http://httpbin.org" def __add__(self, x): return self.url + x MechanicalSoup-0.10.0/tests/test_stateful_browser.py0000664000175000017500000005233613235454401022512 0ustar danhdanh00000000000000import os import tempfile import json import setpath # noqa:F401, must come before 'import mechanicalsoup' import mechanicalsoup import sys import re from bs4 import BeautifulSoup from utils import setup_mock_browser, prepare_mock_browser, mock_get import pytest import webbrowser def test_request_forward(): browser, url = setup_mock_browser(expected_post=[('var1', 'val1'), ('var2', 'val2')]) r = browser.request('POST', url + '/post', data={'var1': 'val1', 'var2': 'val2'}) assert r.text == 'Success!' def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.StatefulBrowser() browser.set_user_agent('testing MechanicalSoup') browser.open(httpbin.url) for link in browser.links(): if link["href"] == "/": browser.follow_link(link) break browser.follow_link("forms/post") assert browser.get_url() == httpbin + "/forms/post" browser.select_form("form") browser["custname"] = "Customer Name Here" browser["size"] = "medium" browser["topping"] = ("cheese", "bacon") # Change our mind to make sure old boxes are unticked browser["topping"] = ("cheese", "onion") browser["comments"] = "Some comment here" browser.get_current_form().set("nosuchfield", "new value", True) response = browser.submit_selected() json = response.json() data = json["form"] assert data["custname"] == "Customer Name Here" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert set(data["topping"]) == set(("cheese", "onion")) assert data["comments"] == "Some comment here" assert data["nosuchfield"] == "new value" assert json["headers"]["User-Agent"] == 'testing MechanicalSoup' # Ensure we haven't blown away any regular headers expected_headers = ('Content-Length', 'Host', 'Content-Type', 'Connection', 'Accept', 'User-Agent', 'Accept-Encoding') assert set(expected_headers).issubset(json["headers"].keys()) def test_no_404(httpbin): browser = mechanicalsoup.StatefulBrowser() resp = browser.open(httpbin + "/nosuchpage") assert resp.status_code == 404 def test_404(httpbin): browser = mechanicalsoup.StatefulBrowser(raise_on_404=True) with pytest.raises(mechanicalsoup.LinkNotFoundError): resp = browser.open(httpbin + "/nosuchpage") resp = browser.open(httpbin.url) assert resp.status_code == 200 def test_user_agent(httpbin): browser = mechanicalsoup.StatefulBrowser(user_agent='007') resp = browser.open(httpbin + "/user-agent") assert resp.json() == {'user-agent': '007'} def test_open_relative(httpbin): # Open an arbitrary httpbin page to set the current URL browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin + "/html") # Open a relative page and make sure remote host and browser agree on URL resp = browser.open_relative("/get") assert resp.json()['url'] == httpbin + "/get" assert browser.get_url() == httpbin + "/get" # Test passing additional kwargs to the session resp = browser.open_relative("/basic-auth/me/123", auth=('me', '123')) assert browser.get_url() == httpbin + "/basic-auth/me/123" assert resp.json() == {"authenticated": True, "user": "me"} def test_links(): browser = mechanicalsoup.StatefulBrowser() html = '''A Blue Link A Red Link''' expected = [BeautifulSoup(html, "lxml").a] browser.open_fake_page(html) # Test StatefulBrowser.links url_regex argument assert browser.links(url_regex="bl") == expected assert browser.links(url_regex="bluish") == [] # Test StatefulBrowser.links link_text argument assert browser.links(link_text="A Blue Link") == expected assert browser.links(link_text="Blue") == [] # Test StatefulBrowser.links kwargs passed to BeautifulSoup.find_all assert browser.links(string=re.compile('Blue')) == expected assert browser.links(class_="bluelink") == expected assert browser.links(id="blue_link") == expected assert browser.links(id="blue") == [] # Test returning a non-singleton two_links = browser.links(id=re.compile('_link')) assert len(two_links) == 2 assert two_links == BeautifulSoup(html, "lxml").find_all('a') @pytest.mark.parametrize("expected_post", [ pytest.param( [ ('comment', 'Selecting an input submit'), ('diff', 'Review Changes'), ('text', 'Setting some text!') ], id='input'), pytest.param( [ ('comment', 'Selecting a button submit'), ('cancel', 'Cancel'), ('text', '= Heading =\n\nNew page here!\n') ], id='button'), ]) def test_submit_btnName(expected_post): '''Tests that the btnName argument chooses the submit button.''' browser, url = setup_mock_browser(expected_post=expected_post) browser.open(url) browser.select_form('#choose-submit-form') browser['text'] = expected_post[2][1] browser['comment'] = expected_post[0][1] res = browser.submit_selected(btnName=expected_post[1][0]) assert(res.status_code == 200 and res.text == 'Success!') def test_get_set_debug(): browser = mechanicalsoup.StatefulBrowser() # Debug mode is off by default assert(not browser.get_debug()) browser.set_debug(True) assert(browser.get_debug()) def test_list_links(capsys): # capsys is a pytest fixture that allows us to inspect the std{err,out} browser = mechanicalsoup.StatefulBrowser() links = ''' Link #1 Link #2 ''' browser.open_fake_page('{0}'.format(links)) browser.list_links() out, err = capsys.readouterr() expected = 'Links in the current page:{0}'.format(links) assert out == expected def test_launch_browser(mocker): browser = mechanicalsoup.StatefulBrowser() browser.set_debug(True) browser.open_fake_page('') mocker.patch('webbrowser.open') with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.follow_link('nosuchlink') # mock.assert_called_once() not available on some versions :-( assert webbrowser.open.call_count == 1 mocker.resetall() with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.select_form('nosuchlink') # mock.assert_called_once() not available on some versions :-( assert webbrowser.open.call_count == 1 def test_find_link(): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page('') with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.find_link('nosuchlink') def test_verbose(capsys): '''Tests that the btnName argument chooses the submit button.''' browser, url = setup_mock_browser() browser.open(url) out, err = capsys.readouterr() assert out == "" assert err == "" assert browser.get_verbose() == 0 browser.set_verbose(1) browser.open(url) out, err = capsys.readouterr() assert out == "." assert err == "" assert browser.get_verbose() == 1 browser.set_verbose(2) browser.open(url) out, err = capsys.readouterr() assert out == "mock://form.com\n" assert err == "" assert browser.get_verbose() == 2 def test_new_control(): browser = mechanicalsoup.StatefulBrowser() browser.open("http://httpbin.org/forms/post") browser.select_form("form") with pytest.raises(mechanicalsoup.LinkNotFoundError): # The control doesn't exist, yet. browser["temperature"] = "cold" browser["size"] = "large" # Existing radio browser["comments"] = "This is a comment" # Existing textarea browser.new_control("text", "temperature", "warm") browser.new_control("textarea", "size", "Sooo big !") browser.new_control("text", "comments", "This is an override comment") browser.new_control("checkbox", "foo", "valval", checked="checked") tag = browser.get_current_form().form.find("input", {"name": "foo"}) assert tag.attrs["checked"] == "checked" browser["temperature"] = "hot" response = browser.submit_selected() json = response.json() data = json["form"] print(data) assert data["temperature"] == "hot" assert data["size"] == "Sooo big !" assert data["comments"] == "This is an override comment" assert data["foo"] == "valval" submit_form_noaction = '''
''' def test_form_noaction(): browser, url = setup_mock_browser() browser.open_fake_page(submit_form_noaction) browser.select_form('#choose-submit-form') with pytest.raises(ValueError, message="no URL to submit to"): browser.submit_selected() submit_form_noname = '''
''' def test_form_noname(): browser, url = setup_mock_browser(expected_post=[]) browser.open_fake_page(submit_form_noname, url=url) browser.select_form('#choose-submit-form') response = browser.submit_selected() assert(response.status_code == 200 and response.text == 'Success!') submit_form_multiple = '''
''' def test_form_multiple(): browser, url = setup_mock_browser(expected_post=[('foo', 'tempeh'), ('foo', 'tofu')]) browser.open_fake_page(submit_form_multiple, url=url) browser.select_form('#choose-submit-form') response = browser.submit_selected() assert(response.status_code == 200 and response.text == 'Success!') def test_upload_file(httpbin): browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin + "/forms/post") # Create two temporary files to upload def make_file(content): path = tempfile.mkstemp()[1] with open(path, "w") as f: f.write(content) return path path1, path2 = (make_file(content) for content in ("first file content", "second file content")) # The form doesn't have a type=file field, but the target action # does show it => add the fields ourselves. browser.select_form() browser.new_control("file", "first", path1) browser.new_control("file", "second", "") browser["second"] = path2 browser.get_current_form().print_summary() response = browser.submit_selected() files = response.json()["files"] assert files["first"] == "first file content" assert files["second"] == "second file content" def test_with(): """Test that __enter__/__exit__ properly create/close the browser.""" with mechanicalsoup.StatefulBrowser() as browser: assert browser.session is not None assert browser.session is None def test_select_form_nr(): """Test the nr option of select_form.""" forms = """
""" with mechanicalsoup.StatefulBrowser() as browser: browser.open_fake_page(forms) form = browser.select_form() assert form.form['id'] == "a" form = browser.select_form(nr=1) assert form.form['id'] == "b" form = browser.select_form(nr=2) assert form.form['id'] == "c" with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.select_form(nr=3) def test_select_form_tag_object(): """Test tag object as selector parameter type""" forms = """

""" soup = BeautifulSoup(forms, "lxml") with mechanicalsoup.StatefulBrowser() as browser: browser.open_fake_page(forms) form = browser.select_form(soup.find("form", {"id": "b"})) assert form.form['id'] == "b" with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.select_form(soup.find("p")) def test_referer_follow_link(httpbin): browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin.url) response = browser.follow_link("/headers") referer = response.json()["headers"]["Referer"] actual_ref = re.sub('/*$', '', referer) expected_ref = re.sub('/*$', '', httpbin.url) assert actual_ref == expected_ref submit_form_headers = '''
''' def test_referer_submit(httpbin): browser = mechanicalsoup.StatefulBrowser() ref = "https://example.com/my-referer" page = submit_form_headers.format(httpbin.url + "/headers") browser.open_fake_page(page, url=ref) browser.select_form() response = browser.submit_selected() headers = response.json()["headers"] referer = headers["Referer"] actual_ref = re.sub('/*$', '', referer) assert actual_ref == ref def test_referer_submit_headers(httpbin): browser = mechanicalsoup.StatefulBrowser() ref = "https://example.com/my-referer" page = submit_form_headers.format(httpbin.url + "/headers") browser.open_fake_page(page, url=ref) browser.select_form() response = browser.submit_selected( headers={'X-Test-Header': 'x-test-value'}) headers = response.json()["headers"] referer = headers["Referer"] actual_ref = re.sub('/*$', '', referer) assert actual_ref == ref assert headers['X-Test-Header'] == 'x-test-value' def test_link_arg_text(httpbin): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page('Link', httpbin.url) browser.follow_link(link_text='Link') assert browser.get_url() == httpbin + '/get' def test_link_arg_regex(httpbin): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page('Link', httpbin.url) browser.follow_link(url_regex='.*') assert browser.get_url() == httpbin + '/get' def test_link_arg_multiregex(httpbin): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page('Link', httpbin.url) with pytest.raises(ValueError, match="link parameter cannot be .*"): browser.follow_link('foo', url_regex='bar') def file_get_contents(filename): with open(filename, "rb") as f: return f.read() def test_download_link(httpbin): """Test downloading the contents of a link to file.""" browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin.url) tmpdir = tempfile.mkdtemp() tmpfile = tmpdir + '/nosuchfile.png' current_url = browser.get_url() current_page = browser.get_current_page() response = browser.download_link(file=tmpfile, link='image/png') # Check that the browser state has not changed assert browser.get_url() == current_url assert browser.get_current_page() == current_page # Check that the file was downloaded assert os.path.isfile(tmpfile) assert file_get_contents(tmpfile) == response.content # Check that we actually downloaded a PNG file assert response.content[:4] == b'\x89PNG' def test_download_link_nofile(httpbin): """Test downloading the contents of a link without saving it.""" browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin.url) current_url = browser.get_url() current_page = browser.get_current_page() response = browser.download_link(link='image/png') # Check that the browser state has not changed assert browser.get_url() == current_url assert browser.get_current_page() == current_page # Check that we actually downloaded a PNG file assert response.content[:4] == b'\x89PNG' def test_download_link_to_existing_file(httpbin): """Test downloading the contents of a link to an existing file.""" browser = mechanicalsoup.StatefulBrowser() browser.open(httpbin.url) tmpdir = tempfile.mkdtemp() tmpfile = tmpdir + '/existing.png' with open(tmpfile, "w") as f: f.write("initial content") current_url = browser.get_url() current_page = browser.get_current_page() response = browser.download_link('image/png', tmpfile) # Check that the browser state has not changed assert browser.get_url() == current_url assert browser.get_current_page() == current_page # Check that the file was downloaded assert os.path.isfile(tmpfile) assert file_get_contents(tmpfile) == response.content # Check that we actually downloaded a PNG file assert response.content[:4] == b'\x89PNG' def test_download_link_404(httpbin): """Test downloading the contents of a broken link.""" browser = mechanicalsoup.StatefulBrowser(raise_on_404=True) browser.open_fake_page('Link', url=httpbin.url) tmpdir = tempfile.mkdtemp() tmpfile = tmpdir + '/nosuchfile.txt' current_url = browser.get_url() current_page = browser.get_current_page() with pytest.raises(mechanicalsoup.LinkNotFoundError): browser.download_link(file=tmpfile, link_text='Link') # Check that the browser state has not changed assert browser.get_url() == current_url assert browser.get_current_page() == current_page # Check that the file was not downloaded assert not os.path.exists(tmpfile) def test_download_link_referer(httpbin): """Test downloading the contents of a link to file.""" browser = mechanicalsoup.StatefulBrowser() ref = httpbin + "/my-referer" browser.open_fake_page('Link', url=ref) tmpfile = tempfile.NamedTemporaryFile() current_url = browser.get_url() current_page = browser.get_current_page() browser.download_link(file=tmpfile.name, link_text='Link') # Check that the browser state has not changed assert browser.get_url() == current_url assert browser.get_current_page() == current_page # Check that the file was downloaded with open(tmpfile.name) as f: json_data = json.load(f) headers = json_data["headers"] assert headers["Referer"] == ref def test_refresh_open(): url = 'mock://example.com' initial_page = BeautifulSoup('

Fake empty page

', 'lxml') reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') browser, adapter = prepare_mock_browser() mock_get(adapter, url=url, reply=str(initial_page)) browser.open(url) mock_get(adapter, url=url, reply=str(reload_page), additional_matcher=lambda r: 'Referer' not in r.headers) browser.refresh() assert browser.get_url() == url assert browser.get_current_page() == reload_page def test_refresh_follow_link(): url = 'mock://example.com' follow_url = 'mock://example.com/followed' initial_content = 'Link'.format(url=follow_url) initial_page = BeautifulSoup(initial_content, 'lxml') reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') browser, adapter = prepare_mock_browser() mock_get(adapter, url=url, reply=str(initial_page)) mock_get(adapter, url=follow_url, reply=str(initial_page)) browser.open(url) browser.follow_link() refer_header = {'Referer': url} mock_get(adapter, url=follow_url, reply=str(reload_page), request_headers=refer_header) browser.refresh() assert browser.get_url() == follow_url assert browser.get_current_page() == reload_page def test_refresh_form_not_retained(): url = 'mock://example.com' initial_content = '
Here comes the form
' initial_page = BeautifulSoup(initial_content, 'lxml') reload_page = BeautifulSoup('

Fake reloaded page

', 'lxml') browser, adapter = prepare_mock_browser() mock_get(adapter, url=url, reply=str(initial_page)) browser.open(url) browser.select_form() mock_get(adapter, url=url, reply=str(reload_page), additional_matcher=lambda r: 'Referer' not in r.headers) browser.refresh() assert browser.get_url() == url assert browser.get_current_page() == reload_page assert browser.get_current_form() is None def test_refresh_error(): browser = mechanicalsoup.StatefulBrowser() # Test no page with pytest.raises(ValueError): browser.refresh() # Test fake page with pytest.raises(ValueError): browser.open_fake_page('

Fake empty page

', url='http://fake.com') browser.refresh() if __name__ == '__main__': pytest.main(sys.argv) MechanicalSoup-0.10.0/tests/test_form.py0000664000175000017500000003054213235454401020056 0ustar danhdanh00000000000000import setpath # noqa:F401, must come before 'import mechanicalsoup' import mechanicalsoup from utils import setup_mock_browser import sys import pytest def test_submit_online(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get(httpbin + "/forms/post") form = mechanicalsoup.Form(page.soup.form) input_data = {"custname": "Philip J. Fry"} form.input(input_data) check_data = {"size": "large", "topping": ["cheese"]} form.check(check_data) check_data = {"size": "medium", "topping": "onion"} form.check(check_data) form.textarea({"comments": "warm"}) form.textarea({"comments": "actually, no, not warm"}) form.textarea({"comments": "freezer"}) response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer" def test_submit_set(httpbin): """Complete and submit the pizza form at http://httpbin.org/forms/post """ browser = mechanicalsoup.Browser() page = browser.get(httpbin + "/forms/post") form = mechanicalsoup.Form(page.soup.form) form["custname"] = "Philip J. Fry" form["size"] = "medium" form["topping"] = ("cheese", "onion") form["comments"] = "freezer" response = browser.submit(form, page.url) # helpfully the form submits to http://httpbin.org/post which simply # returns the request headers in json format json = response.json() data = json["form"] assert data["custname"] == "Philip J. Fry" assert data["custtel"] == "" # web browser submits "" for input left blank assert data["size"] == "medium" assert data["topping"] == ["cheese", "onion"] assert data["comments"] == "freezer" @pytest.mark.parametrize("expected_post", [ pytest.param( [ ('comment', 'Testing preview page'), ('preview', 'Preview Page'), ('text', 'Setting some text!') ], id='preview'), pytest.param( [ ('comment', 'Created new page'), ('save', 'Submit changes'), ('text', '= Heading =\n\nNew page here!\n') ], id='save'), pytest.param( [ ('comment', 'Testing choosing cancel button'), ('cancel', 'Cancel'), ('text', '= Heading =\n\nNew page here!\n') ], id='cancel'), ]) def test_choose_submit(expected_post): browser, url = setup_mock_browser(expected_post=expected_post) browser.open(url) form = browser.select_form('#choose-submit-form') browser['text'] = expected_post[2][1] browser['comment'] = expected_post[0][1] form.choose_submit(expected_post[1][0]) res = browser.submit_selected() assert(res.status_code == 200 and res.text == 'Success!') choose_submit_fail_form = '''
''' @pytest.mark.parametrize("select_name", [ pytest.param({'name': 'does_not_exist', 'fails': True}, id='not found'), pytest.param({'name': 'test_submit', 'fails': False}, id='found'), ]) def test_choose_submit_fail(select_name): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page(choose_submit_fail_form) form = browser.select_form('#choose-submit-form') if select_name['fails']: with pytest.raises(mechanicalsoup.utils.LinkNotFoundError): form.choose_submit(select_name['name']) else: form.choose_submit(select_name['name']) choose_submit_multiple_match_form = '''
''' def test_choose_submit_multiple_match(): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page(choose_submit_multiple_match_form) form = browser.select_form('#choose-submit-form') with pytest.raises(mechanicalsoup.utils.LinkNotFoundError): form.choose_submit('test_submit') submit_form_noaction = '''
''' def test_form_noaction(): browser, url = setup_mock_browser() browser.open_fake_page(submit_form_noaction, url=url) form = browser.select_form('#choose-submit-form') form['text1'] = 'newText1' res = browser.submit_selected() assert(res.status_code == 200 and browser.get_url() == url) submit_form_action = '''
''' def test_form_action(): browser, url = setup_mock_browser() # for info about example.com see: https://tools.ietf.org/html/rfc2606 browser.open_fake_page(submit_form_action, url="http://example.com/invalid/") form = browser.select_form('#choose-submit-form') form['text1'] = 'newText1' res = browser.submit_selected() assert(res.status_code == 200 and browser.get_url() == url) set_select_form = '''
''' @pytest.mark.parametrize("option", [ pytest.param({'result': [('entree', 'tofu')], 'default': True}, id='default'), pytest.param({'result': [('entree', 'curry')], 'default': False}, id='selected'), ]) def test_set_select(option): '''Test the branch of Form.set that finds "select" elements.''' browser, url = setup_mock_browser(expected_post=option['result'], text=set_select_form) browser.open(url) browser.select_form('form') if not option['default']: browser[option['result'][0][0]] = option['result'][0][1] res = browser.submit_selected() assert(res.status_code == 200 and res.text == 'Success!') set_select_multiple_form = '''
''' @pytest.mark.parametrize("options", [ pytest.param('bass', id='select one (str)'), pytest.param(('bass',), id='select one (tuple)'), pytest.param(('piano', 'violin'), id='select two'), ]) def test_set_select_multiple(options): """Test a This is a checkbox ''' def test_form_check_uncheck(): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page(page_with_radio, url="http://example.com/invalid/") form = browser.select_form('form') assert "checked" not in form.form.find("input", {"name": "foo"}).attrs form["foo"] = True assert form.form.find("input", {"name": "foo"}).attrs["checked"] == "" # Test explicit unchecking (skipping the call to Form.uncheck_all) form.set_checkbox({"foo": False}, uncheck_other_boxes=False) assert "checked" not in form.form.find("input", {"name": "foo"}).attrs page_with_various_fields = '''
Pizza Toppings

Small

Medium

Large

''' def test_form_print_summary(capsys): browser = mechanicalsoup.StatefulBrowser() browser.open_fake_page(page_with_various_fields, url="http://example.com/invalid/") browser.select_form("form") browser.get_current_form().print_summary() out, err = capsys.readouterr() # Different versions of bs4 show either or # . Normalize before comparing. out = out.replace('>', '/>') assert out == """ """ assert err == "" def test_issue180(): """Test that a KeyError is not raised when Form.choose_submit is called on a form where a submit element is missing its name-attribute.""" browser = mechanicalsoup.StatefulBrowser() html = '''
''' browser.open_fake_page(html) form = browser.select_form() with pytest.raises(mechanicalsoup.utils.LinkNotFoundError): form.choose_submit('not_found') if __name__ == '__main__': pytest.main(sys.argv) MechanicalSoup-0.10.0/tests/setpath.py0000664000175000017500000000037613235454401017526 0ustar danhdanh00000000000000"""Add the main directory of the project to sys.path, so that uninstalled version is tested.""" import sys import os TEST_DIR = os.path.abspath(os.path.dirname(__file__)) PROJ_DIR = os.path.dirname(TEST_DIR) sys.path.insert(0, os.path.join(PROJ_DIR)) MechanicalSoup-0.10.0/tests/requirements.txt0000664000175000017500000000007213235454401020761 0ustar danhdanh00000000000000pytest pytest-cov pytest-flake8 pytest-mock requests_mock MechanicalSoup-0.10.0/README.rst0000664000175000017500000001056313235454401016030 0ustar danhdanh00000000000000MechanicalSoup ============== Home page --------- https://mechanicalsoup.readthedocs.io/ Overview -------- A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do JavaScript. MechanicalSoup was created by `M Hickford `__, who was a fond user of the `Mechanize `__ library. Unfortunately, Mechanize is `incompatible with Python 3 `__ and its development stalled for several years. MechanicalSoup provides a similar API, built on Python giants `Requests `__ (for HTTP sessions) and `BeautifulSoup `__ (for document navigation). Since 2017 it is a project actively maintained by a small team including `@hemberger `__ and `@moy `__. |Gitter Chat| Installation ------------ |Latest Version| |Supported Versions| PyPy and PyPy3 are also supported (and tested against). Download and install the latest released version from `PyPI `__:: pip install MechanicalSoup Download and install the development version from `GitHub `__:: pip install git+https://github.com/MechanicalSoup/MechanicalSoup Installing from source (installs the version in the current working directory):: python setup.py install (In all cases, add ``--user`` to the ``install`` command to install in the current user's home directory.) Documentation ------------- The full documentation is available on https://mechanicalsoup.readthedocs.io/. You may want to jump directly to the `automatically generated API documentation `__. Example ------- From ``__, code to get the results from a DuckDuckGo search: .. code:: python """Example usage of MechanicalSoup to get the results from DuckDuckGo.""" import mechanicalsoup # Connect to duckduckgo browser = mechanicalsoup.StatefulBrowser() browser.open("https://duckduckgo.com/") # Fill-in the search form browser.select_form('#search_form_homepage') browser["q"] = "MechanicalSoup" browser.submit_selected() # Display the results for link in browser.get_current_page().select('a.result__a'): print(link.text, '->', link.attrs['href']) More examples are available in ``__. For an example with a more complex form (checkboxes, radio buttons and textareas), read ``__ and ``__. Development ----------- |Build Status| |Coverage Status| |Requirements Status| |Documentation Status| |CII Best Practices| Instructions for building, testing and contributing to MechanicalSoup: see ``__. Common problems --------------- Read the `FAQ `__. .. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup .. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup .. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master .. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest .. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge :target: https://bestpractices.coreinfrastructure.org/projects/1334) .. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg :target: https://gitter.im/MechanicalSoup/Lobby MechanicalSoup-0.10.0/MANIFEST.in0000664000175000017500000000020513235454401016067 0ustar danhdanh00000000000000include LICENSE README.rst recursive-include tests *.py include examples/example*.py include requirements.txt tests/requirements.txt MechanicalSoup-0.10.0/setup.cfg0000664000175000017500000000050613235454600016157 0ustar danhdanh00000000000000[aliases] test = pytest [bdist_wheel] universal = 1 [tool:pytest] addopts = --cov --cov-config .coveragerc --flake8 -v flake8-ignore = docs/*.py ALL python_files = tests/*.py [build_sphinx] source-dir = docs/ build-dir = docs/_build all-files = 1 fresh-env = 1 [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 MechanicalSoup-0.10.0/PKG-INFO0000664000175000017500000001453413235454600015441 0ustar danhdanh00000000000000Metadata-Version: 1.1 Name: MechanicalSoup Version: 0.10.0 Summary: A Python library for automating interaction with websites Home-page: https://mechanicalsoup.readthedocs.io/ Author: UNKNOWN Author-email: UNKNOWN License: MIT Description: MechanicalSoup ============== Home page --------- https://mechanicalsoup.readthedocs.io/ Overview -------- A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do JavaScript. MechanicalSoup was created by `M Hickford `__, who was a fond user of the `Mechanize `__ library. Unfortunately, Mechanize is `incompatible with Python 3 `__ and its development stalled for several years. MechanicalSoup provides a similar API, built on Python giants `Requests `__ (for HTTP sessions) and `BeautifulSoup `__ (for document navigation). Since 2017 it is a project actively maintained by a small team including `@hemberger `__ and `@moy `__. |Gitter Chat| Installation ------------ |Latest Version| |Supported Versions| PyPy and PyPy3 are also supported (and tested against). Download and install the latest released version from `PyPI `__:: pip install MechanicalSoup Download and install the development version from `GitHub `__:: pip install git+https://github.com/MechanicalSoup/MechanicalSoup Installing from source (installs the version in the current working directory):: python setup.py install (In all cases, add ``--user`` to the ``install`` command to install in the current user's home directory.) Documentation ------------- The full documentation is available on https://mechanicalsoup.readthedocs.io/. You may want to jump directly to the `automatically generated API documentation `__. Example ------- From `examples/expl_duck_duck_go.py `__, code to get the results from a DuckDuckGo search: .. code:: python """Example usage of MechanicalSoup to get the results from DuckDuckGo.""" import mechanicalsoup # Connect to duckduckgo browser = mechanicalsoup.StatefulBrowser() browser.open("https://duckduckgo.com/") # Fill-in the search form browser.select_form('#search_form_homepage') browser["q"] = "MechanicalSoup" browser.submit_selected() # Display the results for link in browser.get_current_page().select('a.result__a'): print(link.text, '->', link.attrs['href']) More examples are available in `examples/ `__. For an example with a more complex form (checkboxes, radio buttons and textareas), read `tests/test_browser.py `__ and `tests/test_form.py `__. Development ----------- |Build Status| |Coverage Status| |Requirements Status| |Documentation Status| |CII Best Practices| Instructions for building, testing and contributing to MechanicalSoup: see `CONTRIBUTING.rst `__. Common problems --------------- Read the `FAQ `__. .. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup .. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup .. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master .. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest .. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge :target: https://bestpractices.coreinfrastructure.org/projects/1334) .. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg :target: https://gitter.im/MechanicalSoup/Lobby Platform: UNKNOWN Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 MechanicalSoup-0.10.0/LICENSE0000664000175000017500000000205213235454401015340 0ustar danhdanh00000000000000The MIT License (MIT) Copyright (c) 2014 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. MechanicalSoup-0.10.0/setup.py0000664000175000017500000000432713235454401016054 0ustar danhdanh00000000000000from setuptools import setup # Always prefer setuptools over distutils from codecs import open # To use a consistent encoding from os import path import re def requirements_from_file(filename): """Parses a pip requirements file into a list.""" return [line.strip() for line in open(filename, 'r') if line.strip() and not line.strip().startswith('--')] def read(fname, URL): """Read the content of a file.""" readme = open(path.join(path.dirname(__file__), fname)).read() if hasattr(readme, 'decode'): # In Python 3, turn bytes into str. readme = readme.decode('utf8') # turn relative links into absolute ones readme = re.sub(r'`<([^>]*)>`__', r'`\1 <' + URL + r"/blob/master/\1>`__", readme) return readme here = path.abspath(path.dirname(__file__)) about = {} with open(path.join(here, 'mechanicalsoup', '__version__.py'), 'r', 'utf-8') as f: exec(f.read(), about) setup( name=about['__title__'], # useful: python setup.py sdist bdist_wheel upload version=about['__version__'], description=about['__description__'], long_description=read('README.rst', about['__github_url__']), url=about['__url__'], license=about['__license__'], classifiers=[ 'License :: OSI Approved :: MIT License', # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], packages=['mechanicalsoup'], # List run-time dependencies here. These will be installed by pip # when your project is installed. For an analysis of # "install_requires" vs pip's requirements files see: # https://packaging.python.org/en/latest/requirements.html install_requires=requirements_from_file('requirements.txt'), setup_requires=['pytest-runner'], tests_require=requirements_from_file('tests/requirements.txt'), ) MechanicalSoup-0.10.0/requirements.txt0000664000175000017500000000005713235454401017622 0ustar danhdanh00000000000000requests >= 2.0 beautifulsoup4 six >= 1.4 lxml MechanicalSoup-0.10.0/examples/0000775000175000017500000000000013235454600016153 5ustar danhdanh00000000000000MechanicalSoup-0.10.0/examples/example.py0000664000175000017500000000241513235454401020161 0ustar danhdanh00000000000000"""Example app to login to GitHub using the StatefulBrowser class.""" from __future__ import print_function import argparse import mechanicalsoup from getpass import getpass parser = argparse.ArgumentParser(description="Login to GitHub.") parser.add_argument("username") args = parser.parse_args() args.password = getpass("Please enter your GitHub password: ") browser = mechanicalsoup.StatefulBrowser( soup_config={'features': 'lxml'}, raise_on_404=True, user_agent='MyBot/0.1: mysite.example.com/bot_info', ) # Uncomment for a more verbose output: # browser.set_verbose(2) browser.open("https://github.com") browser.follow_link("login") browser.select_form('#login form') browser["login"] = args.username browser["password"] = args.password resp = browser.submit_selected() # Uncomment to launch a web browser on the current page: # browser.launch_browser() # verify we are now logged in page = browser.get_current_page() messages = page.find("div", class_="flash-messages") if messages: print(messages.text) assert page.select(".logout-form") print(page.title.text) # verify we remain logged in (thanks to cookies) as we browse the rest of # the site page3 = browser.open("https://github.com/MechanicalSoup/MechanicalSoup") assert page3.soup.select(".logout-form") MechanicalSoup-0.10.0/examples/example_manual.py0000664000175000017500000000273613235454401021524 0ustar danhdanh00000000000000"""Example app to login to GitHub, using the plain Browser class. See example.py for an example using the more advanced StatefulBrowser.""" import argparse import mechanicalsoup parser = argparse.ArgumentParser(description="Login to GitHub.") parser.add_argument("username") parser.add_argument("password") args = parser.parse_args() browser = mechanicalsoup.Browser(soup_config={'features': 'lxml'}) # request github login page. the result is a requests.Response object # http://docs.python-requests.org/en/latest/user/quickstart/#response-content login_page = browser.get("https://github.com/login") # similar to assert login_page.ok but with full status code in case of # failure. login_page.raise_for_status() # login_page.soup is a BeautifulSoup object # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup # we grab the login form login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form')) # specify username and password login_form.input({"login": args.username, "password": args.password}) # submit form page2 = browser.submit(login_form, login_page.url) # verify we are now logged in messages = page2.soup.find("div", class_="flash-messages") if messages: print(messages.text) assert page2.soup.select(".logout-form") print(page2.soup.title.text) # verify we remain logged in (thanks to cookies) as we browse the rest of # the site page3 = browser.get("https://github.com/MechanicalSoup/MechanicalSoup") assert page3.soup.select(".logout-form") MechanicalSoup-0.10.0/MechanicalSoup.egg-info/0000775000175000017500000000000013235454600020722 5ustar danhdanh00000000000000MechanicalSoup-0.10.0/MechanicalSoup.egg-info/requires.txt0000664000175000017500000000005713235454600023324 0ustar danhdanh00000000000000requests >= 2.0 beautifulsoup4 six >= 1.4 lxml MechanicalSoup-0.10.0/MechanicalSoup.egg-info/dependency_links.txt0000664000175000017500000000000113235454600024770 0ustar danhdanh00000000000000 MechanicalSoup-0.10.0/MechanicalSoup.egg-info/SOURCES.txt0000664000175000017500000000114413235454600022606 0ustar danhdanh00000000000000LICENSE MANIFEST.in README.rst requirements.txt setup.cfg setup.py MechanicalSoup.egg-info/PKG-INFO MechanicalSoup.egg-info/SOURCES.txt MechanicalSoup.egg-info/dependency_links.txt MechanicalSoup.egg-info/requires.txt MechanicalSoup.egg-info/top_level.txt examples/example.py examples/example_manual.py mechanicalsoup/__init__.py mechanicalsoup/__version__.py mechanicalsoup/browser.py mechanicalsoup/form.py mechanicalsoup/stateful_browser.py mechanicalsoup/utils.py tests/conftest.py tests/requirements.txt tests/setpath.py tests/test_browser.py tests/test_form.py tests/test_stateful_browser.py tests/utils.pyMechanicalSoup-0.10.0/MechanicalSoup.egg-info/PKG-INFO0000664000175000017500000001453413235454600022026 0ustar danhdanh00000000000000Metadata-Version: 1.1 Name: MechanicalSoup Version: 0.10.0 Summary: A Python library for automating interaction with websites Home-page: https://mechanicalsoup.readthedocs.io/ Author: UNKNOWN Author-email: UNKNOWN License: MIT Description: MechanicalSoup ============== Home page --------- https://mechanicalsoup.readthedocs.io/ Overview -------- A Python library for automating interaction with websites. MechanicalSoup automatically stores and sends cookies, follows redirects, and can follow links and submit forms. It doesn't do JavaScript. MechanicalSoup was created by `M Hickford `__, who was a fond user of the `Mechanize `__ library. Unfortunately, Mechanize is `incompatible with Python 3 `__ and its development stalled for several years. MechanicalSoup provides a similar API, built on Python giants `Requests `__ (for HTTP sessions) and `BeautifulSoup `__ (for document navigation). Since 2017 it is a project actively maintained by a small team including `@hemberger `__ and `@moy `__. |Gitter Chat| Installation ------------ |Latest Version| |Supported Versions| PyPy and PyPy3 are also supported (and tested against). Download and install the latest released version from `PyPI `__:: pip install MechanicalSoup Download and install the development version from `GitHub `__:: pip install git+https://github.com/MechanicalSoup/MechanicalSoup Installing from source (installs the version in the current working directory):: python setup.py install (In all cases, add ``--user`` to the ``install`` command to install in the current user's home directory.) Documentation ------------- The full documentation is available on https://mechanicalsoup.readthedocs.io/. You may want to jump directly to the `automatically generated API documentation `__. Example ------- From `examples/expl_duck_duck_go.py `__, code to get the results from a DuckDuckGo search: .. code:: python """Example usage of MechanicalSoup to get the results from DuckDuckGo.""" import mechanicalsoup # Connect to duckduckgo browser = mechanicalsoup.StatefulBrowser() browser.open("https://duckduckgo.com/") # Fill-in the search form browser.select_form('#search_form_homepage') browser["q"] = "MechanicalSoup" browser.submit_selected() # Display the results for link in browser.get_current_page().select('a.result__a'): print(link.text, '->', link.attrs['href']) More examples are available in `examples/ `__. For an example with a more complex form (checkboxes, radio buttons and textareas), read `tests/test_browser.py `__ and `tests/test_form.py `__. Development ----------- |Build Status| |Coverage Status| |Requirements Status| |Documentation Status| |CII Best Practices| Instructions for building, testing and contributing to MechanicalSoup: see `CONTRIBUTING.rst `__. Common problems --------------- Read the `FAQ `__. .. |Latest Version| image:: https://img.shields.io/pypi/v/MechanicalSoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Supported Versions| image:: https://img.shields.io/pypi/pyversions/mechanicalsoup.svg :target: https://pypi.python.org/pypi/MechanicalSoup/ .. |Build Status| image:: https://travis-ci.org/MechanicalSoup/MechanicalSoup.svg?branch=master :target: https://travis-ci.org/MechanicalSoup/MechanicalSoup .. |Coverage Status| image:: https://codecov.io/gh/MechanicalSoup/MechanicalSoup/branch/master/graph/badge.svg :target: https://codecov.io/gh/MechanicalSoup/MechanicalSoup .. |Requirements Status| image:: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements.svg?branch=master :target: https://requires.io/github/MechanicalSoup/MechanicalSoup/requirements/?branch=master .. |Documentation Status| image:: https://readthedocs.org/projects/mechanicalsoup/badge/?version=latest :target: https://mechanicalsoup.readthedocs.io/en/latest/?badge=latest .. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/1334/badge :target: https://bestpractices.coreinfrastructure.org/projects/1334) .. |Gitter Chat| image:: https://badges.gitter.im/MechanicalSoup/MechanicalSoup.svg :target: https://gitter.im/MechanicalSoup/Lobby Platform: UNKNOWN Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 MechanicalSoup-0.10.0/MechanicalSoup.egg-info/top_level.txt0000664000175000017500000000001713235454600023452 0ustar danhdanh00000000000000mechanicalsoup