pax_global_header00006660000000000000000000000064136461573650014532gustar00rootroot0000000000000052 comment=7d369e41fb00127ac278a911dd8025906be46738 elastalert-0.2.4/000077500000000000000000000000001364615736500136755ustar00rootroot00000000000000elastalert-0.2.4/.editorconfig000066400000000000000000000003301364615736500163460ustar00rootroot00000000000000root = true [*] end_of_line = lf insert_final_newline = true charset = utf-8 [*.py] indent_style = space indent_size = 4 [Makefile] indent_style = tab [{*.json,*.yml,*.yaml}] indent_style = space indent_size = 2 elastalert-0.2.4/.gitignore000066400000000000000000000002401364615736500156610ustar00rootroot00000000000000config.yaml .tox/ .coverage .idea/* .cache/ __pycache__/ *.pyc virtualenv_run/ *.egg-info/ dist/ venv/ env/ docs/build/ build/ .pytest_cache/ my_rules *.swp *~ elastalert-0.2.4/.pre-commit-config.yaml000066400000000000000000000013161364615736500201570ustar00rootroot00000000000000repos: - repo: git://github.com/pre-commit/pre-commit-hooks sha: v1.1.1 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: autopep8-wrapper args: - -i - --ignore=E265,E309,E501 - id: flake8 - id: check-yaml - id: debug-statements - id: requirements-txt-fixer - id: name-tests-test - repo: git://github.com/asottile/reorder_python_imports sha: v0.3.5 hooks: - id: reorder-python-imports - repo: git://github.com/Yelp/detect-secrets sha: 0.9.1 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] exclude: .*tests/.*|.*yelp/testing/.*|\.pre-commit-config\.yaml elastalert-0.2.4/.secrets.baseline000066400000000000000000000010761364615736500171330ustar00rootroot00000000000000{ "exclude_regex": ".*tests/.*|.*yelp/testing/.*|\\.pre-commit-config\\.yaml", "generated_at": "2018-07-06T22:54:22Z", "plugins_used": [ { "base64_limit": 4.5, "name": "Base64HighEntropyString" }, { "hex_limit": 3, "name": "HexHighEntropyString" }, { "name": "PrivateKeyDetector" } ], "results": { ".travis.yml": [ { "hashed_secret": "4f7a1ea04dafcbfee994ee1d08857b8aaedf8065", "line_number": 14, "type": "Base64 High Entropy String" } ] }, "version": "0.9.1" } elastalert-0.2.4/.travis.yml000066400000000000000000000023561364615736500160140ustar00rootroot00000000000000language: python python: - '3.6' env: - TOXENV=docs - TOXENV=py36 install: - pip install tox - > if [[ -n "${ES_VERSION}" ]] ; then wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz mkdir elasticsearch-${ES_VERSION} && tar -xzf elasticsearch-${ES_VERSION}.tar.gz -C elasticsearch-${ES_VERSION} --strip-components=1 ./elasticsearch-${ES_VERSION}/bin/elasticsearch & fi script: - > if [[ -n "${ES_VERSION}" ]] ; then wget -q --waitretry=1 --retry-connrefused --tries=30 -O - http://127.0.0.1:9200 make test-elasticsearch else make test fi jobs: include: - stage: 'Elasticsearch test' env: TOXENV=py36 ES_VERSION=7.0.0-linux-x86_64 - env: TOXENV=py36 ES_VERSION=6.6.2 - env: TOXENV=py36 ES_VERSION=6.3.2 - env: TOXENV=py36 ES_VERSION=6.2.4 - env: TOXENV=py36 ES_VERSION=6.0.1 - env: TOXENV=py36 ES_VERSION=5.6.16 deploy: provider: pypi user: yelplabs password: secure: TpSTlFu89tciZzboIfitHhU5NhAB1L1/rI35eQTXstiqzYg2mweOuip+MPNx9AlX3Swg7MhaFYnSUvRqPljuoLjLD0EQ7BHLVSBFl92ukkAMTeKvM6LbB9HnGOwzmAvTR5coegk8IHiegudODWvnhIj4hp7/0EA+gVX7E55kEAw= on: tags: true distributions: sdist bdist_wheel repo: Yelp/elastalert branch: master elastalert-0.2.4/Dockerfile-test000066400000000000000000000003641364615736500166470ustar00rootroot00000000000000FROM ubuntu:latest RUN apt-get update && apt-get upgrade -y RUN apt-get -y install build-essential python3.6 python3.6-dev python3-pip libssl-dev git WORKDIR /home/elastalert ADD requirements*.txt ./ RUN pip3 install -r requirements-dev.txt elastalert-0.2.4/LICENSE000066400000000000000000000261371364615736500147130ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. elastalert-0.2.4/Makefile000066400000000000000000000010201364615736500153260ustar00rootroot00000000000000.PHONY: all production test docs clean all: production production: @true docs: tox -e docs dev: $(LOCAL_CONFIG_DIR) $(LOGS_DIR) install-hooks install-hooks: pre-commit install -f --install-hooks test: tox test-elasticsearch: tox -- --runelasticsearch test-docker: docker-compose --project-name elastalert build tox docker-compose --project-name elastalert run tox clean: make -C docs clean find . -name '*.pyc' -delete find . -name '__pycache__' -delete rm -rf virtualenv_run .tox .coverage *.egg-info build elastalert-0.2.4/README.md000066400000000000000000000374731364615736500151720ustar00rootroot00000000000000Recent changes: As of Elastalert 0.2.0, you must use Python 3.6. Python 2 will not longer be supported. [![Build Status](https://travis-ci.org/Yelp/elastalert.svg)](https://travis-ci.org/Yelp/elastalert) [![Join the chat at https://gitter.im/Yelp/elastalert](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Yelp/elastalert?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ## ElastAlert - [Read the Docs](http://elastalert.readthedocs.org). ### Easy & Flexible Alerting With Elasticsearch ElastAlert is a simple framework for alerting on anomalies, spikes, or other patterns of interest from data in Elasticsearch. ElastAlert works with all versions of Elasticsearch. At Yelp, we use Elasticsearch, Logstash and Kibana for managing our ever increasing amount of data and logs. Kibana is great for visualizing and querying data, but we quickly realized that it needed a companion tool for alerting on inconsistencies in our data. Out of this need, ElastAlert was created. If you have data being written into Elasticsearch in near real time and want to be alerted when that data matches certain patterns, ElastAlert is the tool for you. If you can see it in Kibana, ElastAlert can alert on it. ## Overview We designed ElastAlert to be reliable, highly modular, and easy to set up and configure. It works by combining Elasticsearch with two types of components, rule types and alerts. Elasticsearch is periodically queried and the data is passed to the rule type, which determines when a match is found. When a match occurs, it is given to one or more alerts, which take action based on the match. This is configured by a set of rules, each of which defines a query, a rule type, and a set of alerts. Several rule types with common monitoring paradigms are included with ElastAlert: - Match where there are at least X events in Y time" (``frequency`` type) - Match when the rate of events increases or decreases" (``spike`` type) - Match when there are less than X events in Y time" (``flatline`` type) - Match when a certain field matches a blacklist/whitelist" (``blacklist`` and ``whitelist`` type) - Match on any event matching a given filter" (``any`` type) - Match when a field has two different values within some time" (``change`` type) - Match when a never before seen term appears in a field" (``new_term`` type) - Match when the number of unique values for a field is above or below a threshold (``cardinality`` type) Currently, we have built-in support for the following alert types: - Email - JIRA - OpsGenie - Commands - HipChat - MS Teams - Slack - Telegram - GoogleChat - AWS SNS - VictorOps - PagerDuty - PagerTree - Exotel - Twilio - Gitter - Line Notify - Zabbix Additional rule types and alerts can be easily imported or written. In addition to this basic usage, there are many other features that make alerts more useful: - Alerts link to Kibana dashboards - Aggregate counts for arbitrary fields - Combine alerts into periodic reports - Separate alerts by using a unique key field - Intercept and enhance match data To get started, check out `Running ElastAlert For The First Time` in the [documentation](http://elastalert.readthedocs.org). ## Running ElastAlert You can either install the latest released version of ElastAlert using pip: ```pip install elastalert``` or you can clone the ElastAlert repository for the most recent changes: ```git clone https://github.com/Yelp/elastalert.git``` Install the module: ```pip install "setuptools>=11.3"``` ```python setup.py install``` The following invocation can be used to run ElastAlert after installing ``$ elastalert [--debug] [--verbose] [--start ] [--end ] [--rule ] [--config ]`` ``--debug`` will print additional information to the screen as well as suppresses alerts and instead prints the alert body. Not compatible with `--verbose`. ``--verbose`` will print additional information without suppressing alerts. Not compatible with `--debug.` ``--start`` will begin querying at the given timestamp. By default, ElastAlert will begin querying from the present. Timestamp format is ``YYYY-MM-DDTHH-MM-SS[-/+HH:MM]`` (Note the T between date and hour). Eg: ``--start 2014-09-26T12:00:00`` (UTC) or ``--start 2014-10-01T07:30:00-05:00`` ``--end`` will cause ElastAlert to stop querying at the given timestamp. By default, ElastAlert will continue to query indefinitely. ``--rule`` will allow you to run only one rule. It must still be in the rules folder. Eg: ``--rule this_rule.yaml`` ``--config`` allows you to specify the location of the configuration. By default, it is will look for config.yaml in the current directory. ## Third Party Tools And Extras ### Kibana plugin ![img](https://raw.githubusercontent.com/bitsensor/elastalert-kibana-plugin/master/showcase.gif) Available at the [ElastAlert Kibana plugin repository](https://github.com/bitsensor/elastalert-kibana-plugin). ### Docker A [Dockerized version](https://github.com/bitsensor/elastalert) of ElastAlert including a REST api is build from `master` to `bitsensor/elastalert:latest`. ```bash git clone https://github.com/bitsensor/elastalert.git; cd elastalert docker run -d -p 3030:3030 \ -v `pwd`/config/elastalert.yaml:/opt/elastalert/config.yaml \ -v `pwd`/config/config.json:/opt/elastalert-server/config/config.json \ -v `pwd`/rules:/opt/elastalert/rules \ -v `pwd`/rule_templates:/opt/elastalert/rule_templates \ --net="host" \ --name elastalert bitsensor/elastalert:latest ``` ## Documentation Read the documentation at [Read the Docs](http://elastalert.readthedocs.org). To build a html version of the docs locally ``` pip install sphinx_rtd_theme sphinx cd docs make html ``` View in browser at build/html/index.html ## Configuration See config.yaml.example for details on configuration. ## Example rules Examples of different types of rules can be found in example_rules/. - ``example_spike.yaml`` is an example of the "spike" rule type, which allows you to alert when the rate of events, averaged over a time period, increases by a given factor. This example will send an email alert when there are 3 times more events matching a filter occurring within the last 2 hours than the number of events in the previous 2 hours. - ``example_frequency.yaml`` is an example of the "frequency" rule type, which will alert when there are a given number of events occuring within a time period. This example will send an email when 50 documents matching a given filter occur within a 4 hour timeframe. - ``example_change.yaml`` is an example of the "change" rule type, which will alert when a certain field in two documents changes. In this example, the alert email is sent when two documents with the same 'username' field but a different value of the 'country_name' field occur within 24 hours of each other. - ``example_new_term.yaml`` is an example of the "new term" rule type, which alerts when a new value appears in a field or fields. In this example, an email is sent when a new value of ("username", "computer") is encountered in example login logs. ## Frequently Asked Questions ### My rule is not getting any hits? So you've managed to set up ElastAlert, write a rule, and run it, but nothing happens, or it says ``0 query hits``. First of all, we recommend using the command ``elastalert-test-rule rule.yaml`` to debug. It will show you how many documents match your filters for the last 24 hours (or more, see ``--help``), and then shows you if any alerts would have fired. If you have a filter in your rule, remove it and try again. This will show you if the index is correct and that you have at least some documents. If you have a filter in Kibana and want to recreate it in ElastAlert, you probably want to use a query string. Your filter will look like ``` filter: - query: query_string: query: "foo: bar AND baz: abc*" ``` If you receive an error that Elasticsearch is unable to parse it, it's likely the YAML is not spaced correctly, and the filter is not in the right format. If you are using other types of filters, like ``term``, a common pitfall is not realizing that you may need to use the analyzed token. This is the default if you are using Logstash. For example, ``` filter: - term: foo: "Test Document" ``` will not match even if the original value for ``foo`` was exactly "Test Document". Instead, you want to use ``foo.raw``. If you are still having trouble troubleshooting why your documents do not match, try running ElastAlert with ``--es_debug_trace /path/to/file.log``. This will log the queries made to Elasticsearch in full so that you can see exactly what is happening. ### I got hits, why didn't I get an alert? If you got logs that had ``X query hits, 0 matches, 0 alerts sent``, it depends on the ``type`` why you didn't get any alerts. If ``type: any``, a match will occur for every hit. If you are using ``type: frequency``, ``num_events`` must occur within ``timeframe`` of each other for a match to occur. Different rules apply for different rule types. If you see ``X matches, 0 alerts sent``, this may occur for several reasons. If you set ``aggregation``, the alert will not be sent until after that time has elapsed. If you have gotten an alert for this same rule before, that rule may be silenced for a period of time. The default is one minute between alerts. If a rule is silenced, you will see ``Ignoring match for silenced rule`` in the logs. If you see ``X alerts sent`` but didn't get any alert, it's probably related to the alert configuration. If you are using the ``--debug`` flag, you will not receive any alerts. Instead, the alert text will be written to the console. Use ``--verbose`` to achieve the same affects without preventing alerts. If you are using email alert, make sure you have it configured for an SMTP server. By default, it will connect to localhost on port 25. It will also use the word "elastalert" as the "From:" address. Some SMTP servers will reject this because it does not have a domain while others will add their own domain automatically. See the email section in the documentation for how to configure this. ### Why did I only get one alert when I expected to get several? There is a setting called ``realert`` which is the minimum time between two alerts for the same rule. Any alert that occurs within this time will simply be dropped. The default value for this is one minute. If you want to receive an alert for every single match, even if they occur right after each other, use ``` realert: minutes: 0 ``` You can of course set it higher as well. ### How can I prevent duplicate alerts? By setting ``realert``, you will prevent the same rule from alerting twice in an amount of time. ``` realert: days: 1 ``` You can also prevent duplicates based on a certain field by using ``query_key``. For example, to prevent multiple alerts for the same user, you might use ``` realert: hours: 8 query_key: user ``` Note that this will also affect the way many rule types work. If you are using ``type: frequency`` for example, ``num_events`` for a single value of ``query_key`` must occur before an alert will be sent. You can also use a compound of multiple fields for this key. For example, if you only wanted to receieve an alert once for a specific error and hostname, you could use ``` query_key: [error, hostname] ``` Internally, this works by creating a new field for each document called ``field1,field2`` with a value of ``value1,value2`` and using that as the ``query_key``. The data for when an alert will fire again is stored in Elasticsearch in the ``elastalert_status`` index, with a ``_type`` of ``silence`` and also cached in memory. ### How can I change what's in the alert? You can use the field ``alert_text`` to add custom text to an alert. By setting ``alert_text_type: alert_text_only``, it will be the entirety of the alert. You can also add different fields from the alert by using Python style string formatting and ``alert_text_args``. For example ``` alert_text: "Something happened with {0} at {1}" alert_text_type: alert_text_only alert_text_args: ["username", "@timestamp"] ``` You can also limit the alert to only containing certain fields from the document by using ``include``. ``` include: ["ip_address", "hostname", "status"] ``` ### My alert only contains data for one event, how can I see more? If you are using ``type: frequency``, you can set the option ``attach_related: true`` and every document will be included in the alert. An alternative, which works for every type, is ``top_count_keys``. This will show the top counts for each value for certain fields. For example, if you have ``` top_count_keys: ["ip_address", "status"] ``` and 10 documents matched your alert, it may contain something like ``` ip_address: 127.0.0.1: 7 10.0.0.1: 2 192.168.0.1: 1 status: 200: 9 500: 1 ``` ### How can I make the alert come at a certain time? The ``aggregation`` feature will take every alert that has occured over a period of time and send them together in one alert. You can use cron style syntax to send all alerts that have occured since the last once by using ``` aggregation: schedule: '2 4 * * mon,fri' ``` ### I have lots of documents and it's really slow, how can I speed it up? There are several ways to potentially speed up queries. If you are using ``index: logstash-*``, Elasticsearch will query all shards, even if they do not possibly contain data with the correct timestamp. Instead, you can use Python time format strings and set ``use_strftime_index`` ``` index: logstash-%Y.%m use_strftime_index: true ``` Another thing you could change is ``buffer_time``. By default, ElastAlert will query large overlapping windows in order to ensure that it does not miss any events, even if they are indexed in real time. In config.yaml, you can adjust ``buffer_time`` to a smaller number to only query the most recent few minutes. ``` buffer_time: minutes: 5 ``` By default, ElastAlert will download every document in full before processing them. Instead, you can have ElastAlert simply get a count of the number of documents that have occured in between each query. To do this, set ``use_count_query: true``. This cannot be used if you use ``query_key``, because ElastAlert will not know the contents of each documents, just the total number of them. This also reduces the precision of alerts, because all events that occur between each query will be rounded to a single timestamp. If you are using ``query_key`` (a single key, not multiple keys) you can use ``use_terms_query``. This will make ElastAlert perform a terms aggregation to get the counts for each value of a certain field. Both ``use_terms_query`` and ``use_count_query`` also require ``doc_type`` to be set to the ``_type`` of the documents. They may not be compatible with all rule types. ### Can I perform aggregations? The only aggregation supported currently is a terms aggregation, by setting ``use_terms_query``. ### I'm not using @timestamp, what do I do? You can use ``timestamp_field`` to change which field ElastAlert will use as the timestamp. You can use ``timestamp_type`` to change it between ISO 8601 and unix timestamps. You must have some kind of timestamp for ElastAlert to work. If your events are not in real time, you can use ``query_delay`` and ``buffer_time`` to adjust when ElastAlert will look for documents. ### I'm using flatline but I don't see any alerts When using ``type: flatline``, ElastAlert must see at least one document before it will alert you that it has stopped seeing them. ### How can I get a "resolve" event? ElastAlert does not currently support stateful alerts or resolve events. ### Can I set a warning threshold? Currently, the only way to set a warning threshold is by creating a second rule with a lower threshold. ## License ElastAlert is licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 ### Read the documentation at [Read the Docs](http://elastalert.readthedocs.org). ### Questions? Drop by #elastalert on Freenode IRC. elastalert-0.2.4/changelog.md000066400000000000000000000266011364615736500161530ustar00rootroot00000000000000# Change Log # v0.2.4 ### Added - Added back customFields support for The Hive # v0.2.3 ### Added - Added back TheHive alerter without TheHive4py library # v0.2.2 ### Added - Integration with Kibana Discover app - Addied ability to specify opsgenie alert detailsĀ  ### Fixed - Fix some encoding issues with command alerter - Better error messages for missing config file - Fixed an issue with run_every not applying per-rule - Fixed an issue with rules not being removed - Fixed an issue with top count keys and nested query keys - Various documentation fixes - Fixed an issue with not being able to use spike aggregation ### Removed - Remove The Hive alerter # v0.2.1 ### Fixed - Fixed an AttributeError introduced in 0.2.0 # v0.2.0 - Switched to Python 3 ### Added - Add rule loader class for customized rule loading - Added thread based rules and limit_execution - Run_every can now be customized per rule ### Fixed - Various small fixes # v0.1.39 ### Added - Added spike alerts for metric aggregations - Allow SSL connections for Stomp - Allow limits on alert text length - Add optional min doc count for terms queries - Add ability to index into arrays for alert_text_args, etc ### Fixed - Fixed bug involving --config flag with create-index - Fixed some settings not being inherited from the config properly - Some fixes for Hive alerter - Close SMTP connections properly - Fix timestamps in Pagerduty v2 payload - Fixed an bug causing aggregated alerts to mix up # v0.1.38 ### Added - Added PagerTree alerter - Added Line alerter - Added more customizable logging - Added new logic in test-rule to detemine the default timeframe ### Fixed - Fixed an issue causing buffer_time to sometimes be ignored # v0.1.37 ### Added - Added more options for Opsgenie alerter - Added more pagerduty options - Added ability to add metadata to elastalert logs ### Fixed - Fixed some documentation to be more clear - Stop requiring doc_type for metric aggregations - No longer puts quotes around regex terms in blacklists or whitelists # v0.1.36 ### Added - Added a prefix "metric_" to the key used for metric aggregations to avoid possible conflicts - Added option to skip Alerta certificate validation ### Fixed - Fixed a typo in the documentation for spike rule # v0.1.35 ### Fixed - Fixed an issue preventing new term rule from working with terms query # v0.1.34 ### Added - Added prefix/suffix support for summary table - Added support for ignoring SSL validation in Slack - More visible exceptions during query parse failures ### Fixed - Fixed top_count_keys when using compound query_key - Fixed num_hits sometimes being reported too low - Fixed an issue with setting ES_USERNAME via env - Fixed an issue when using test script with custom timestamps - Fixed a unicode error when using Telegram - Fixed an issue with jsonschema version conflict - Fixed an issue with nested timestamps in cardinality type # v0.1.33 ### Added - Added ability to pipe alert text to a command - Add --start and --end support for elastalert-test-rule - Added ability to turn blacklist/whitelist files into queries for better performance - Allow setting of OpsGenie priority - Add ability to query the adjacent index if timestamp_field not used for index timestamping - Add support for pagerduty v2 - Add option to turn off .raw/.keyword field postfixing in new term rule - Added --use-downloaded feature for elastalert-test-rule ### Fixed - Fixed a bug that caused num_hits in matches to sometimes be erroneously small - Fixed an issue with HTTP Post alerter that could cause it to hang indefinitely - Fixed some issues with string formatting for various alerters - Fixed a couple of incorrect parts of the documentation # v0.1.32 ### Added - Add support for setting ES url prefix via environment var - Add support for using native Slack fields in alerts ### Fixed - Fixed a bug that would could scrolling queries to sometimes terminate early # v0.1.31 ### Added - Added ability to add start date to new term rule ### Fixed - Fixed a bug in create_index which would try to delete a nonexistent index - Apply filters to new term rule all terms query - Support Elasticsearch 6 for new term rule - Fixed is_enabled not working on rule changes # v0.1.30 ### Added - Alerta alerter - Added support for transitioning JIRA issues - Option to recreate index in elastalert-create-index ### Fixed - Update jira_ custom fields before each alert if they were modified - Use json instead of simplejson - Allow for relative path for smtp_auth_file - Fixed some grammar issues - Better code formatting of index mappings - Better formatting and size limit for HipChat HTML - Fixed gif link in readme for kibana plugin - Fixed elastalert-test-rule with Elasticsearch > 4 - Added documentation for is_enabled option ## v0.1.29 ### Added - Added a feature forget_keys to prevent realerting when using flatline with query_key - Added a new alert_text_type, aggregation_summary_only ### Fixed - Fixed incorrect documentation about es_conn_timeout default ## v0.1.28 ### Added - Added support for Stride formatting of simple HTML tags - Added support for custom titles in Opsgenie alerts - Added a denominator to percentage match based alerts ### Fixed - Fixed a bug with Stomp alerter connections - Removed escaping of some characaters in Slack messages ## v0.1.27 # Added - Added support for a value other than in formatted alerts ### Fixed - Fixed a failed creation of elastalert indicies when using Elasticsearch 6 - Truncate Telegram alerts to avoid API errors ## v0.1.26 ### Added - Added support for Elasticsearch 6 - Added support for mentions in Hipchat ### Fixed - Fixed an issue where a nested field lookup would crash if one of the intermediate fields was null ## v0.1.25 ### Fixed - Fixed a bug causing new term rule to break unless you passed a start time - Add a slight clarification on the localhost:9200 reported in es_debug_trace ## v0.1.24 ### Fixed - Pinned pytest - create-index reads index name from config.yaml - top_count_keys now works for context on a flatline rule type - Fixed JIRA behavior for issues with statuses that have spaces in the name ## v0.1.22 ### Added - Added Stride alerter - Allow custom string formatters for aggregation percentage - Added a field to disable rules from config - Added support for subaggregations for the metric rule type ### Fixed - Fixed a bug causing create-index to fail if missing config.yaml - Fixed a bug when using ES5 with query_key and top_count_keys - Allow enhancements to set and clear arbitrary JIRA fields - Fixed a bug causing timestamps to be formatted in scientific notation - Stop attempting to initialize alerters in debug mode - Changed default alert ordering so that JIRA tickets end up in other alerts - Fixed a bug when using Stomp alerter with complex query_key - Fixed a bug preventing hipchat room ID from being an integer - Fixed a bug causing duplicate alerts when using spike with alert_on_new_data - Minor fixes to summary table formatting - Fixed elastalert-test-rule when using new term rule type ## v0.1.21 ### Fixed - Fixed an incomplete bug fix for preventing duplicate enhancement runs ## v0.1.20 ### Added - Added support for client TLS keys ### Fixed - Fixed the formatting of summary tables in Slack - Fixed ES_USE_SSL env variable - Fixed the unique value count printed by new_term rule type - Jira alerter no longer uses the non-existent json code formatter ## v0.1.19 ### Added - Added support for populating JIRA fields via fields in the match - Added support for using a TLS certificate file for SMTP connections - Allow a custom suffix for non-analyzed Elasticsearch fields, like ".raw" or ".keyword" - Added match_time to Elastalert alert documents in Elasticsearch ### Fixed - Fixed an error in the documentation for rule importing - Prevent enhancements from re-running on retried alerts - Fixed a bug when using custom timestamp formats and new term rule - Lowered jira_bump_after_inactivity default to 0 days ## v0.1.18 ### Added - Added a new alerter "post" based on "simple" which makes POSTS JSON to HTTP endpoints - Added an option jira_bump_after_inacitivty to prevent ElastAlert commenting on active JIRA tickets ### Removed - Removed "simple" alerter, replaced by "post" ## v0.1.17 ### Added - Added a --patience flag to allow Elastalert to wait for Elasticsearch to become available - Allow custom PagerDuty alert titles via alert_subject ## v0.1.16 ### Fixed - Fixed a bug where JIRA titles might not use query_key values - Fixed a bug where flatline alerts don't respect query_key for realert - Fixed a typo "twilio_accout_sid" ### Added - Added support for env variables in kibana4 dashboard links - Added ca_certs option for custom CA support ## v0.1.15 ### Fixed - Fixed a bug where Elastalert would crash on connection error during startup - Fixed some typos in documentation - Fixed a bug in metric bucket offset calculation - Fixed a TypeError in Service Now alerter ### Added - Added support for compound compare key in change rules - Added support for absolute paths in rule config imports - Added Microsoft Teams alerter - Added support for markdown in Slack alerts - Added error codes to test script - Added support for lists in email_from_field ## v0.1.14 - 2017-05-11 ### Fixed - Twilio alerter uses the from number appropriately - Fixed a TypeError in SNS alerter - Some changes to requirements.txt and setup.py - Fixed a TypeError in new term rule ### Added - Set a custom pagerduty incident key - Preserve traceback in most exceptions ## v0.1.12 - 2017-04-21 ### Fixed - Fixed a bug causing filters to be ignored when using Elasticsearch 5 ## v0.1.11 - 2017-04-19 ### Fixed - Fixed an issue that would cause filters starting with "query" to sometimes throw errors in ES5 - Fixed a bug with multiple versions of ES on different rules - Fixed a possible KeyError when using use_terms_query with ES5 ## v0.1.10 - 2017-04-17 ### Fixed - Fixed an AttributeError occuring with older versions of Elasticsearch library - Made example rules more consistent and with unique names - Fixed an error caused by a typo when es_username is used ## v0.1.9 - 2017-04-14 ### Added - Added a changelog - Added metric aggregation rule type - Added percentage match rule type - Added default doc style and improved the instructions - Rule names will default to the filename - Added import keyword in rules to include sections from other files - Added email_from_field option to derive the recipient from a field in the match - Added simple HTTP alerter - Added Exotel SMS alerter - Added a readme link to third party Kibana plugin - Added option to use env variables to configure some settings - Added duplicate hits count in log line ### Fixed - Fixed a bug in change rule where a boolean false would be ignored - Clarify documentation on format of alert_text_args and alert_text_kw - Fixed a bug preventing new silence stashes from being loaded after a rule has previous alerted - Changed the default es_host in elastalert-test-rule to localhost - Fixed a bug preventing ES <5.0 formatted queries working in elastalert-test-rule - Fixed top_count_keys adding .raw on ES >5.0, uses .keyword instead - Fixed a bug causing compound aggregation keys not to work - Better error reporting for the Jira alerter - AWS request signing now refreshes credentials, uses boto3 - Support multiple ES versions on different rules - Added documentation for percentage match rule type ### Removed - Removed a feature that would disable writeback_es on errors, causing various issues elastalert-0.2.4/config.yaml.example000066400000000000000000000063711364615736500174670ustar00rootroot00000000000000# This is the folder that contains the rule yaml files # Any .yaml file will be loaded as a rule rules_folder: example_rules # How often ElastAlert will query Elasticsearch # The unit can be anything from weeks to seconds run_every: minutes: 1 # ElastAlert will buffer results from the most recent # period of time, in case some log sources are not in real time buffer_time: minutes: 15 # The Elasticsearch hostname for metadata writeback # Note that every rule can have its own Elasticsearch host es_host: elasticsearch.example.com # The Elasticsearch port es_port: 9200 # The AWS region to use. Set this when using AWS-managed elasticsearch #aws_region: us-east-1 # The AWS profile to use. Use this if you are using an aws-cli profile. # See http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html # for details #profile: test # Optional URL prefix for Elasticsearch #es_url_prefix: elasticsearch # Connect with TLS to Elasticsearch #use_ssl: True # Verify TLS certificates #verify_certs: True # GET request with body is the default option for Elasticsearch. # If it fails for some reason, you can pass 'GET', 'POST' or 'source'. # See http://elasticsearch-py.readthedocs.io/en/master/connection.html?highlight=send_get_body_as#transport # for details #es_send_get_body_as: GET # Option basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # Use SSL authentication with client certificates client_cert must be # a pem file containing both cert and key for client #verify_certs: True #ca_certs: /path/to/cacert.pem #client_cert: /path/to/client_cert.pem #client_key: /path/to/client_key.key # The index on es_host which is used for metadata storage # This can be a unmapped index, but it is recommended that you run # elastalert-create-index to set a mapping writeback_index: elastalert_status writeback_alias: elastalert_alerts # If an alert fails for some reason, ElastAlert will retry # sending the alert until this time period has elapsed alert_time_limit: days: 2 # Custom logging configuration # If you want to setup your own logging configuration to log into # files as well or to Logstash and/or modify log levels, use # the configuration below and adjust to your needs. # Note: if you run ElastAlert with --verbose/--debug, the log level of # the "elastalert" logger is changed to INFO, if not already INFO/DEBUG. #logging: # version: 1 # incremental: false # disable_existing_loggers: false # formatters: # logline: # format: '%(asctime)s %(levelname)+8s %(name)+20s %(message)s' # # handlers: # console: # class: logging.StreamHandler # formatter: logline # level: DEBUG # stream: ext://sys.stderr # # file: # class : logging.FileHandler # formatter: logline # level: DEBUG # filename: elastalert.log # # loggers: # elastalert: # level: WARN # handlers: [] # propagate: true # # elasticsearch: # level: WARN # handlers: [] # propagate: true # # elasticsearch.trace: # level: WARN # handlers: [] # propagate: true # # '': # root logger # level: WARN # handlers: # - console # - file # propagate: false elastalert-0.2.4/docker-compose.yml000066400000000000000000000004051364615736500173310ustar00rootroot00000000000000version: '2' services: tox: build: context: ./ dockerfile: Dockerfile-test command: tox container_name: elastalert_tox working_dir: /home/elastalert volumes: - ./:/home/elastalert/ elastalert-0.2.4/docs/000077500000000000000000000000001364615736500146255ustar00rootroot00000000000000elastalert-0.2.4/docs/Makefile000066400000000000000000000060741364615736500162740ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/monitor.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/monitor.qhc" latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." elastalert-0.2.4/docs/source/000077500000000000000000000000001364615736500161255ustar00rootroot00000000000000elastalert-0.2.4/docs/source/_static/000077500000000000000000000000001364615736500175535ustar00rootroot00000000000000elastalert-0.2.4/docs/source/_static/.gitkeep000066400000000000000000000000001364615736500211720ustar00rootroot00000000000000elastalert-0.2.4/docs/source/conf.py000066400000000000000000000144061364615736500174310ustar00rootroot00000000000000import sphinx_rtd_theme # -*- coding: utf-8 -*- # # ElastAlert documentation build configuration file, created by # sphinx-quickstart on Thu Jul 11 15:45:31 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.append(os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'ElastAlert' copyright = u'2014, Yelp' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.0.1' # The full version, including alpha/beta/rc tags. release = '0.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. # unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_use_modindex = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'elastalertdoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). # latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). # latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'elastalert.tex', u'ElastAlert Documentation', u'Quentin Long', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # Additional stuff for the LaTeX preamble. # latex_preamble = '' # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_use_modindex = True elastalert-0.2.4/docs/source/elastalert.rst000077500000000000000000000352241364615736500210300ustar00rootroot00000000000000ElastAlert - Easy & Flexible Alerting With Elasticsearch ******************************************************** ElastAlert is a simple framework for alerting on anomalies, spikes, or other patterns of interest from data in Elasticsearch. At Yelp, we use Elasticsearch, Logstash and Kibana for managing our ever increasing amount of data and logs. Kibana is great for visualizing and querying data, but we quickly realized that it needed a companion tool for alerting on inconsistencies in our data. Out of this need, ElastAlert was created. If you have data being written into Elasticsearch in near real time and want to be alerted when that data matches certain patterns, ElastAlert is the tool for you. Overview ======== We designed ElastAlert to be :ref:`reliable `, highly :ref:`modular `, and easy to :ref:`set up ` and :ref:`configure `. It works by combining Elasticsearch with two types of components, rule types and alerts. Elasticsearch is periodically queried and the data is passed to the rule type, which determines when a match is found. When a match occurs, it is given to one or more alerts, which take action based on the match. This is configured by a set of rules, each of which defines a query, a rule type, and a set of alerts. Several rule types with common monitoring paradigms are included with ElastAlert: - "Match where there are X events in Y time" (``frequency`` type) - "Match when the rate of events increases or decreases" (``spike`` type) - "Match when there are less than X events in Y time" (``flatline`` type) - "Match when a certain field matches a blacklist/whitelist" (``blacklist`` and ``whitelist`` type) - "Match on any event matching a given filter" (``any`` type) - "Match when a field has two different values within some time" (``change`` type) Currently, we have support built in for these alert types: - Command - Email - JIRA - OpsGenie - SNS - HipChat - Slack - Telegram - GoogleChat - Debug - Stomp - TheHive Additional rule types and alerts can be easily imported or written. (See :ref:`Writing rule types ` and :ref:`Writing alerts `) In addition to this basic usage, there are many other features that make alerts more useful: - Alerts link to Kibana dashboards - Aggregate counts for arbitrary fields - Combine alerts into periodic reports - Separate alerts by using a unique key field - Intercept and enhance match data To get started, check out :ref:`Running ElastAlert For The First Time `. .. _reliability: Reliability =========== ElastAlert has several features to make it more reliable in the event of restarts or Elasticsearch unavailability: - ElastAlert :ref:`saves its state to Elasticsearch ` and, when started, will resume where previously stopped - If Elasticsearch is unresponsive, ElastAlert will wait until it recovers before continuing - Alerts which throw errors may be automatically retried for a period of time .. _modularity: Modularity ========== ElastAlert has three main components that may be imported as a module or customized: Rule types ---------- The rule type is responsible for processing the data returned from Elasticsearch. It is initialized with the rule configuration, passed data that is returned from querying Elasticsearch with the rule's filters, and outputs matches based on this data. See :ref:`Writing rule types ` for more information. Alerts ------ Alerts are responsible for taking action based on a match. A match is generally a dictionary containing values from a document in Elasticsearch, but may contain arbitrary data added by the rule type. See :ref:`Writing alerts ` for more information. Enhancements ------------ Enhancements are a way of intercepting an alert and modifying or enhancing it in some way. They are passed the match dictionary before it is given to the alerter. See :ref:`Enhancements` for more information. .. _configuration: Configuration ============= ElastAlert has a global configuration file, ``config.yaml``, which defines several aspects of its operation: ``buffer_time``: ElastAlert will continuously query against a window from the present to ``buffer_time`` ago. This way, logs can be back filled up to a certain extent and ElastAlert will still process the events. This may be overridden by individual rules. This option is ignored for rules where ``use_count_query`` or ``use_terms_query`` is set to true. Note that back filled data may not always trigger count based alerts as if it was queried in real time. ``es_host``: The host name of the Elasticsearch cluster where ElastAlert records metadata about its searches. When ElastAlert is started, it will query for information about the time that it was last run. This way, even if ElastAlert is stopped and restarted, it will never miss data or look at the same events twice. It will also specify the default cluster for each rule to run on. The environment variable ``ES_HOST`` will override this field. ``es_port``: The port corresponding to ``es_host``. The environment variable ``ES_PORT`` will override this field. ``use_ssl``: Optional; whether or not to connect to ``es_host`` using TLS; set to ``True`` or ``False``. The environment variable ``ES_USE_SSL`` will override this field. ``verify_certs``: Optional; whether or not to verify TLS certificates; set to ``True`` or ``False``. The default is ``True``. ``client_cert``: Optional; path to a PEM certificate to use as the client certificate. ``client_key``: Optional; path to a private key file to use as the client key. ``ca_certs``: Optional; path to a CA cert bundle to use to verify SSL connections ``es_username``: Optional; basic-auth username for connecting to ``es_host``. The environment variable ``ES_USERNAME`` will override this field. ``es_password``: Optional; basic-auth password for connecting to ``es_host``. The environment variable ``ES_PASSWORD`` will override this field. ``es_url_prefix``: Optional; URL prefix for the Elasticsearch endpoint. The environment variable ``ES_URL_PREFIX`` will override this field. ``es_send_get_body_as``: Optional; Method for querying Elasticsearch - ``GET``, ``POST`` or ``source``. The default is ``GET`` ``es_conn_timeout``: Optional; sets timeout for connecting to and reading from ``es_host``; defaults to ``20``. ``rules_loader``: Optional; sets the loader class to be used by ElastAlert to retrieve rules and hashes. Defaults to ``FileRulesLoader`` if not set. ``rules_folder``: The name of the folder which contains rule configuration files. ElastAlert will load all files in this folder, and all subdirectories, that end in .yaml. If the contents of this folder change, ElastAlert will load, reload or remove rules based on their respective config files. (only required when using ``FileRulesLoader``). ``scan_subdirectories``: Optional; Sets whether or not ElastAlert should recursively descend the rules directory - ``true`` or ``false``. The default is ``true`` ``run_every``: How often ElastAlert should query Elasticsearch. ElastAlert will remember the last time it ran the query for a given rule, and periodically query from that time until the present. The format of this field is a nested unit of time, such as ``minutes: 5``. This is how time is defined in every ElastAlert configuration. ``writeback_index``: The index on ``es_host`` to use. ``max_query_size``: The maximum number of documents that will be downloaded from Elasticsearch in a single query. The default is 10,000, and if you expect to get near this number, consider using ``use_count_query`` for the rule. If this limit is reached, ElastAlert will `scroll `_ using the size of ``max_query_size`` through the set amount of pages, when ``max_scrolling_count`` is set or until processing all results. ``max_scrolling_count``: The maximum amount of pages to scroll through. The default is ``0``, which means the scrolling has no limit. For example if this value is set to ``5`` and the ``max_query_size`` is set to ``10000`` then ``50000`` documents will be downloaded at most. ``scroll_keepalive``: The maximum time (formatted in `Time Units `_) the scrolling context should be kept alive. Avoid using high values as it abuses resources in Elasticsearch, but be mindful to allow sufficient time to finish processing all the results. ``max_aggregation``: The maximum number of alerts to aggregate together. If a rule has ``aggregation`` set, all alerts occuring within a timeframe will be sent together. The default is 10,000. ``old_query_limit``: The maximum time between queries for ElastAlert to start at the most recently run query. When ElastAlert starts, for each rule, it will search ``elastalert_metadata`` for the most recently run query and start from that time, unless it is older than ``old_query_limit``, in which case it will start from the present time. The default is one week. ``disable_rules_on_error``: If true, ElastAlert will disable rules which throw uncaught (not EAException) exceptions. It will upload a traceback message to ``elastalert_metadata`` and if ``notify_email`` is set, send an email notification. The rule will no longer be run until either ElastAlert restarts or the rule file has been modified. This defaults to True. ``show_disabled_rules``: If true, ElastAlert show the disable rules' list when finishes the execution. This defaults to True. ``notify_email``: An email address, or list of email addresses, to which notification emails will be sent. Currently, only an uncaught exception will send a notification email. The from address, SMTP host, and reply-to header can be set using ``from_addr``, ``smtp_host``, and ``email_reply_to`` options, respectively. By default, no emails will be sent. ``from_addr``: The address to use as the from header in email notifications. This value will be used for email alerts as well, unless overwritten in the rule config. The default value is "ElastAlert". ``smtp_host``: The SMTP host used to send email notifications. This value will be used for email alerts as well, unless overwritten in the rule config. The default is "localhost". ``email_reply_to``: This sets the Reply-To header in emails. The default is the recipient address. ``aws_region``: This makes ElastAlert to sign HTTP requests when using Amazon Elasticsearch Service. It'll use instance role keys to sign the requests. The environment variable ``AWS_DEFAULT_REGION`` will override this field. ``boto_profile``: Deprecated! Boto profile to use when signing requests to Amazon Elasticsearch Service, if you don't want to use the instance role keys. ``profile``: AWS profile to use when signing requests to Amazon Elasticsearch Service, if you don't want to use the instance role keys. The environment variable ``AWS_DEFAULT_PROFILE`` will override this field. ``replace_dots_in_field_names``: If ``True``, ElastAlert replaces any dots in field names with an underscore before writing documents to Elasticsearch. The default value is ``False``. Elasticsearch 2.0 - 2.3 does not support dots in field names. ``string_multi_field_name``: If set, the suffix to use for the subfield for string multi-fields in Elasticsearch. The default value is ``.raw`` for Elasticsearch 2 and ``.keyword`` for Elasticsearch 5. ``add_metadata_alert``: If set, alerts will include metadata described in rules (``category``, ``description``, ``owner`` and ``priority``); set to ``True`` or ``False``. The default is ``False``. ``skip_invalid``: If ``True``, skip invalid files instead of exiting. Logging ------- By default, ElastAlert uses a simple basic logging configuration to print log messages to standard error. You can change the log level to ``INFO`` messages by using the ``--verbose`` or ``--debug`` command line options. If you need a more sophisticated logging configuration, you can provide a full logging configuration in the config file. This way you can also configure logging to a file, to Logstash and adjust the logging format. For details, see the end of ``config.yaml.example`` where you can find an example logging configuration. .. _runningelastalert: Running ElastAlert ================== ``$ python elastalert/elastalert.py`` Several arguments are available when running ElastAlert: ``--config`` will specify the configuration file to use. The default is ``config.yaml``. ``--debug`` will run ElastAlert in debug mode. This will increase the logging verboseness, change all alerts to ``DebugAlerter``, which prints alerts and suppresses their normal action, and skips writing search and alert metadata back to Elasticsearch. Not compatible with `--verbose`. ``--verbose`` will increase the logging verboseness, which allows you to see information about the state of queries. Not compatible with `--debug`. ``--start `` will force ElastAlert to begin querying from the given time, instead of the default, querying from the present. The timestamp should be ISO8601, e.g. ``YYYY-MM-DDTHH:MM:SS`` (UTC) or with timezone ``YYYY-MM-DDTHH:MM:SS-08:00`` (PST). Note that if querying over a large date range, no alerts will be sent until that rule has finished querying over the entire time period. To force querying from the current time, use "NOW". ``--end `` will cause ElastAlert to stop querying at the specified timestamp. By default, ElastAlert will periodically query until the present indefinitely. ``--rule `` will only run the given rule. The rule file may be a complete file path or a filename in ``rules_folder`` or its subdirectories. ``--silence =`` will silence the alerts for a given rule for a period of time. The rule must be specified using ``--rule``. is one of days, weeks, hours, minutes or seconds. is an integer. For example, ``--rule noisy_rule.yaml --silence hours=4`` will stop noisy_rule from generating any alerts for 4 hours. ``--es_debug`` will enable logging for all queries made to Elasticsearch. ``--es_debug_trace `` will enable logging curl commands for all queries made to Elasticsearch to the specified log file. ``--es_debug_trace`` is passed through to `elasticsearch.py `_ which logs `localhost:9200` instead of the actual ``es_host``:``es_port``. ``--end `` will force ElastAlert to stop querying after the given time, instead of the default, querying to the present time. This really only makes sense when running standalone. The timestamp is formatted as ``YYYY-MM-DDTHH:MM:SS`` (UTC) or with timezone ``YYYY-MM-DDTHH:MM:SS-XX:00`` (UTC-XX). ``--pin_rules`` will stop ElastAlert from loading, reloading or removing rules based on changes to their config files. elastalert-0.2.4/docs/source/elastalert_status.rst000066400000000000000000000116241364615736500224260ustar00rootroot00000000000000.. _metadata: ElastAlert Metadata Index ========================= ElastAlert uses Elasticsearch to store various information about its state. This not only allows for some level of auditing and debugging of ElastAlert's operation, but also to avoid loss of data or duplication of alerts when ElastAlert is shut down, restarted, or crashes. This cluster and index information is defined in the global config file with ``es_host``, ``es_port`` and ``writeback_index``. ElastAlert must be able to write to this index. The script, ``elastalert-create-index`` will create the index with the correct mapping for you, and optionally copy the documents from an existing ElastAlert writeback index. Run it and it will prompt you for the cluster information. ElastAlert will create three different types of documents in the writeback index: elastalert_status ~~~~~~~~~~~~~~~~~ ``elastalert_status`` is a log of the queries performed for a given rule and contains: - ``@timestamp``: The time when the document was uploaded to Elasticsearch. This is after a query has been run and the results have been processed. - ``rule_name``: The name of the corresponding rule. - ``starttime``: The beginning of the timestamp range the query searched. - ``endtime``: The end of the timestamp range the query searched. - ``hits``: The number of results from the query. - ``matches``: The number of matches that the rule returned after processing the hits. Note that this does not necessarily mean that alerts were triggered. - ``time_taken``: The number of seconds it took for this query to run. ``elastalert_status`` is what ElastAlert will use to determine what time range to query when it first starts to avoid duplicating queries. For each rule, it will start querying from the most recent endtime. If ElastAlert is running in debug mode, it will still attempt to base its start time by looking for the most recent search performed, but it will not write the results of any query back to Elasticsearch. elastalert ~~~~~~~~~~ ``elastalert`` is a log of information about every alert triggered and contains: - ``@timestamp``: The time when the document was uploaded to Elasticsearch. This is not the same as when the alert was sent, but rather when the rule outputs a match. - ``rule_name``: The name of the corresponding rule. - ``alert_info``: This contains the output of Alert.get_info, a function that alerts implement to give some relevant context to the alert type. This may contain alert_info.type, alert_info.recipient, or any number of other sub fields. - ``alert_sent``: A boolean value as to whether this alert was actually sent or not. It may be false in the case of an exception or if it is part of an aggregated alert. - ``alert_time``: The time that the alert was or will be sent. Usually, this is the same as @timestamp, but may be some time in the future, indicating when an aggregated alert will be sent. - ``match_body``: This is the contents of the match dictionary that is used to create the alert. The subfields may include a number of things containing information about the alert. - ``alert_exception``: This field is only present when the alert failed because of an exception occurring, and will contain the exception information. - ``aggregate_id``: This field is only present when the rule is configured to use aggregation. The first alert of the aggregation period will contain an alert_time set to the aggregation time into the future, and subsequent alerts will contain the document ID of the first. When the alert_time is reached, all alerts with that aggregate_id will be sent together. elastalert_error ~~~~~~~~~~~~~~~~ When an error occurs in ElastAlert, it is written to both Elasticsearch and to stderr. The ``elastalert_error`` type contains: - ``@timestamp``: The time when the error occurred. - ``message``: The error or exception message. - ``traceback``: The traceback from when the error occurred. - ``data``: Extra information about the error. This often contains the name of the rule which caused the error. silence ~~~~~~~ ``silence`` is a record of when alerts for a given rule will be suppressed, either because of a ``realert`` setting or from using --silence. When an alert with ``realert`` is triggered, a ``silence`` record will be written with ``until`` set to the alert time plus ``realert``. - ``@timestamp``: The time when the document was uploaded to Elasticsearch. - ``rule_name``: The name of the corresponding rule. - ``until``: The timestamp when alerts will begin being sent again. - ``exponent``: The exponential factor which multiplies ``realert``. The length of this silence is equal to ``realert`` * 2**exponent. This will be 0 unless ``exponential_realert`` is set. Whenever an alert is triggered, ElastAlert will check for a matching ``silence`` document, and if the ``until`` timestamp is in the future, it will ignore the alert completely. See the :ref:`Running ElastAlert ` section for information on how to silence an alert. elastalert-0.2.4/docs/source/index.rst000066400000000000000000000012741364615736500177720ustar00rootroot00000000000000.. ElastAlert documentation master file, created by sphinx-quickstart on Thu Jul 11 15:45:31 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. ElastAlert - Easy & Flexible Alerting With Elasticsearch ======================================================== Contents: .. toctree:: :maxdepth: 2 elastalert running_elastalert ruletypes elastalert_status recipes/adding_rules recipes/adding_alerts recipes/writing_filters recipes/adding_enhancements recipes/adding_loaders recipes/signing_requests Indices and Tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` elastalert-0.2.4/docs/source/recipes/000077500000000000000000000000001364615736500175575ustar00rootroot00000000000000elastalert-0.2.4/docs/source/recipes/adding_alerts.rst000066400000000000000000000111021364615736500231040ustar00rootroot00000000000000.. _writingalerts: Adding a New Alerter ==================== Alerters are subclasses of ``Alerter``, found in ``elastalert/alerts.py``. They are given matches and perform some action based on that. Your alerter needs to implement two member functions, and will look something like this: .. code-block:: python class AwesomeNewAlerter(Alerter): required_options = set(['some_config_option']) def alert(self, matches): ... def get_info(self): ... You can import alert types by specifying the type as ``module.file.AlertName``, where module is the name of a python module, and file is the name of the python file containing a ``Alerter`` subclass named ``AlertName``. Basics ------ The alerter class will be instantiated when ElastAlert starts, and be periodically passed matches through the ``alert`` method. ElastAlert also writes back info about the alert into Elasticsearch that it obtains through ``get_info``. Several important member properties: ``self.required_options``: This is a set containing names of configuration options that must be present. ElastAlert will not instantiate the alert if any are missing. ``self.rule``: The dictionary containing the rule configuration. All options specific to the alert should be in the rule configuration file and can be accessed here. ``self.pipeline``: This is a dictionary object that serves to transfer information between alerts. When an alert is triggered, a new empty pipeline object will be created and each alerter can add or receive information from it. Note that alerters are called in the order they are defined in the rule file. For example, the JIRA alerter will add its ticket number to the pipeline and the email alerter will add that link if it's present in the pipeline. alert(self, match): ------------------- ElastAlert will call this function to send an alert. ``matches`` is a list of dictionary objects with information about the match. You can get a nice string representation of the match by calling ``self.rule['type'].get_match_str(match, self.rule)``. If this method raises an exception, it will be caught by ElastAlert and the alert will be marked as unsent and saved for later. get_info(self): --------------- This function is called to get information about the alert to save back to Elasticsearch. It should return a dictionary, which is uploaded directly to Elasticsearch, and should contain useful information about the alert such as the type, recipients, parameters, etc. Tutorial -------- Let's create a new alert that will write alerts to a local output file. First, create a modules folder in the base ElastAlert folder: .. code-block:: console $ mkdir elastalert_modules $ cd elastalert_modules $ touch __init__.py Now, in a file named ``my_alerts.py``, add .. code-block:: python from elastalert.alerts import Alerter, BasicMatchString class AwesomeNewAlerter(Alerter): # By setting required_options to a set of strings # You can ensure that the rule config file specifies all # of the options. Otherwise, ElastAlert will throw an exception # when trying to load the rule. required_options = set(['output_file_path']) # Alert is called def alert(self, matches): # Matches is a list of match dictionaries. # It contains more than one match when the alert has # the aggregation option set for match in matches: # Config options can be accessed with self.rule with open(self.rule['output_file_path'], "a") as output_file: # basic_match_string will transform the match into the default # human readable string format match_string = str(BasicMatchString(self.rule, match)) output_file.write(match_string) # get_info is called after an alert is sent to get data that is written back # to Elasticsearch in the field "alert_info" # It should return a dict of information relevant to what the alert does def get_info(self): return {'type': 'Awesome Alerter', 'output_file': self.rule['output_file_path']} In the rule configuration file, we are going to specify the alert by writing .. code-block:: yaml alert: "elastalert_modules.my_alerts.AwesomeNewAlerter" output_file_path: "/tmp/alerts.log" ElastAlert will attempt to import the alert with ``from elastalert_modules.my_alerts import AwesomeNewAlerter``. This means that the folder must be in a location where it can be imported as a python module. elastalert-0.2.4/docs/source/recipes/adding_enhancements.rst000066400000000000000000000041211364615736500242650ustar00rootroot00000000000000.. _enhancements: Enhancements ============ Enhancements are modules which let you modify a match before an alert is sent. They should subclass ``BaseEnhancement``, found in ``elastalert/enhancements.py``. They can be added to rules using the ``match_enhancements`` option:: match_enhancements: - module.file.MyEnhancement where module is the name of a Python module, or folder containing ``__init__.py``, and file is the name of the Python file containing a ``BaseEnhancement`` subclass named ``MyEnhancement``. A special exception class ```DropMatchException``` can be used in enhancements to drop matches if custom conditions are met. For example: .. code-block:: python class MyEnhancement(BaseEnhancement): def process(self, match): # Drops a match if "field_1" == "field_2" if match['field_1'] == match['field_2']: raise DropMatchException() Example ------- As an example enhancement, let's add a link to a whois website. The match must contain a field named domain and it will add an entry named domain_whois_link. First, create a modules folder for the enhancement in the ElastAlert directory. .. code-block:: console $ mkdir elastalert_modules $ cd elastalert_modules $ touch __init__.py Now, in a file named ``my_enhancements.py``, add .. code-block:: python from elastalert.enhancements import BaseEnhancement class MyEnhancement(BaseEnhancement): # The enhancement is run against every match # The match is passed to the process function where it can be modified in any way # ElastAlert will do this for each enhancement linked to a rule def process(self, match): if 'domain' in match: url = "http://who.is/whois/%s" % (match['domain']) match['domain_whois_link'] = url Enhancements will not automatically be run. Inside the rule configuration file, you need to point it to the enhancement(s) that it should run by setting the ``match_enhancements`` option:: match_enhancements: - "elastalert_modules.my_enhancements.MyEnhancement" elastalert-0.2.4/docs/source/recipes/adding_loaders.rst000066400000000000000000000052101364615736500232460ustar00rootroot00000000000000.. _loaders: Rules Loaders ======================== RulesLoaders are subclasses of ``RulesLoader``, found in ``elastalert/loaders.py``. They are used to gather rules for a particular source. Your RulesLoader needs to implement three member functions, and will look something like this: .. code-block:: python class AwesomeNewRulesLoader(RulesLoader): def get_names(self, conf, use_rule=None): ... def get_hashes(self, conf, use_rule=None): ... def get_yaml(self, rule): ... You can import loaders by specifying the type as ``module.file.RulesLoaderName``, where module is the name of a python module, and file is the name of the python file containing a ``RulesLoader`` subclass named ``RulesLoaderName``. Example ------- As an example loader, let's retrieve rules from a database rather than from the local file system. First, create a modules folder for the loader in the ElastAlert directory. .. code-block:: console $ mkdir elastalert_modules $ cd elastalert_modules $ touch __init__.py Now, in a file named ``mongo_loader.py``, add .. code-block:: python from pymongo import MongoClient from elastalert.loaders import RulesLoader import yaml class MongoRulesLoader(RulesLoader): def __init__(self, conf): super(MongoRulesLoader, self).__init__(conf) self.client = MongoClient(conf['mongo_url']) self.db = self.client[conf['mongo_db']] self.cache = {} def get_names(self, conf, use_rule=None): if use_rule: return [use_rule] rules = [] self.cache = {} for rule in self.db.rules.find(): self.cache[rule['name']] = yaml.load(rule['yaml']) rules.append(rule['name']) return rules def get_hashes(self, conf, use_rule=None): if use_rule: return [use_rule] hashes = {} self.cache = {} for rule in self.db.rules.find(): self.cache[rule['name']] = rule['yaml'] hashes[rule['name']] = rule['hash'] return hashes def get_yaml(self, rule): if rule in self.cache: return self.cache[rule] self.cache[rule] = yaml.load(self.db.rules.find_one({'name': rule})['yaml']) return self.cache[rule] Finally, you need to specify in your ElastAlert configuration file that MongoRulesLoader should be used instead of the default FileRulesLoader, so in your ``elastalert.conf`` file:: rules_loader: "elastalert_modules.mongo_loader.MongoRulesLoader" elastalert-0.2.4/docs/source/recipes/adding_rules.rst000066400000000000000000000156201364615736500227550ustar00rootroot00000000000000.. _writingrules: Adding a New Rule Type ====================== This document describes how to create a new rule type. Built in rule types live in ``elastalert/ruletypes.py`` and are subclasses of ``RuleType``. At the minimum, your rule needs to implement ``add_data``. Your class may implement several functions from ``RuleType``: .. code-block:: python class AwesomeNewRule(RuleType): # ... def add_data(self, data): # ... def get_match_str(self, match): # ... def garbage_collect(self, timestamp): # ... You can import new rule types by specifying the type as ``module.file.RuleName``, where module is the name of a Python module, or folder containing ``__init__.py``, and file is the name of the Python file containing a ``RuleType`` subclass named ``RuleName``. Basics ------ The ``RuleType`` instance remains in memory while ElastAlert is running, receives data, keeps track of its state, and generates matches. Several important member properties are created in the ``__init__`` method of ``RuleType``: ``self.rules``: This dictionary is loaded from the rule configuration file. If there is a ``timeframe`` configuration option, this will be automatically converted to a ``datetime.timedelta`` object when the rules are loaded. ``self.matches``: This is where ElastAlert checks for matches from the rule. Whatever information is relevant to the match (generally coming from the fields in Elasticsearch) should be put into a dictionary object and added to ``self.matches``. ElastAlert will pop items out periodically and send alerts based on these objects. It is recommended that you use ``self.add_match(match)`` to add matches. In addition to appending to ``self.matches``, ``self.add_match`` will convert the datetime ``@timestamp`` back into an ISO8601 timestamp. ``self.required_options``: This is a set of options that must exist in the configuration file. ElastAlert will ensure that all of these fields exist before trying to instantiate a ``RuleType`` instance. add_data(self, data): --------------------- When ElastAlert queries Elasticsearch, it will pass all of the hits to the rule type by calling ``add_data``. ``data`` is a list of dictionary objects which contain all of the fields in ``include``, ``query_key`` and ``compare_key`` if they exist, and ``@timestamp`` as a datetime object. They will always come in chronological order sorted by '@timestamp'. get_match_str(self, match): --------------------------- Alerts will call this function to get a human readable string about a match for an alert. Match will be the same object that was added to ``self.matches``, and ``rules`` the same as ``self.rules``. The ``RuleType`` base implementation will return an empty string. Note that by default, the alert text will already contain the key-value pairs from the match. This should return a string that gives some information about the match in the context of this specific RuleType. garbage_collect(self, timestamp): --------------------------------- This will be called after ElastAlert has run over a time period ending in ``timestamp`` and should be used to clear any state that may be obsolete as of ``timestamp``. ``timestamp`` is a datetime object. Tutorial -------- As an example, we are going to create a rule type for detecting suspicious logins. Let's imagine the data we are querying is login events that contains IP address, username and a timestamp. Our configuration will take a list of usernames and a time range and alert if a login occurs in the time range. First, let's create a modules folder in the base ElastAlert folder: .. code-block:: console $ mkdir elastalert_modules $ cd elastalert_modules $ touch __init__.py Now, in a file named ``my_rules.py``, add .. code-block:: python import dateutil.parser from elastalert.ruletypes import RuleType # elastalert.util includes useful utility functions # such as converting from timestamp to datetime obj from elastalert.util import ts_to_dt class AwesomeRule(RuleType): # By setting required_options to a set of strings # You can ensure that the rule config file specifies all # of the options. Otherwise, ElastAlert will throw an exception # when trying to load the rule. required_options = set(['time_start', 'time_end', 'usernames']) # add_data will be called each time Elasticsearch is queried. # data is a list of documents from Elasticsearch, sorted by timestamp, # including all the fields that the config specifies with "include" def add_data(self, data): for document in data: # To access config options, use self.rules if document['username'] in self.rules['usernames']: # Convert the timestamp to a time object login_time = document['@timestamp'].time() # Convert time_start and time_end to time objects time_start = dateutil.parser.parse(self.rules['time_start']).time() time_end = dateutil.parser.parse(self.rules['time_end']).time() # If the time falls between start and end if login_time > time_start and login_time < time_end: # To add a match, use self.add_match self.add_match(document) # The results of get_match_str will appear in the alert text def get_match_str(self, match): return "%s logged in between %s and %s" % (match['username'], self.rules['time_start'], self.rules['time_end']) # garbage_collect is called indicating that ElastAlert has already been run up to timestamp # It is useful for knowing that there were no query results from Elasticsearch because # add_data will not be called with an empty list def garbage_collect(self, timestamp): pass In the rule configuration file, ``example_rules/example_login_rule.yaml``, we are going to specify this rule by writing .. code-block:: yaml name: "Example login rule" es_host: elasticsearch.example.com es_port: 14900 type: "elastalert_modules.my_rules.AwesomeRule" # Alert if admin, userXYZ or foobaz log in between 8 PM and midnight time_start: "20:00" time_end: "24:00" usernames: - "admin" - "userXYZ" - "foobaz" # We require the username field from documents include: - "username" alert: - debug ElastAlert will attempt to import the rule with ``from elastalert_modules.my_rules import AwesomeRule``. This means that the folder must be in a location where it can be imported as a Python module. An alert from this rule will look something like:: Example login rule userXYZ logged in between 20:00 and 24:00 @timestamp: 2015-03-02T22:23:24Z username: userXYZ elastalert-0.2.4/docs/source/recipes/signing_requests.rst000066400000000000000000000032521364615736500237040ustar00rootroot00000000000000.. _signingrequests: Signing requests to Amazon Elasticsearch service ================================================ When using Amazon Elasticsearch service, you need to secure your Elasticsearch from the outside. Currently, there is no way to secure your Elasticsearch using network firewall rules, so the only way is to signing the requests using the access key and secret key for a role or user with permissions on the Elasticsearch service. You can sign requests to AWS using any of the standard AWS methods of providing credentials. - Environment Variables, ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` - AWS Config or Credential Files, ``~/.aws/config`` and ``~/.aws/credentials`` - AWS Instance Profiles, uses the EC2 Metadata service Using an Instance Profile ------------------------- Typically, you'll deploy ElastAlert on a running EC2 instance on AWS. You can assign a role to this instance that gives it permissions to read from and write to the Elasticsearch service. When using an Instance Profile, you will need to specify the ``aws_region`` in the configuration file or set the ``AWS_DEFAULT_REGION`` environment variable. Using AWS profiles ------------------ You can also create a user with permissions on the Elasticsearch service and tell ElastAlert to authenticate itself using that user. First, create an AWS profile in the machine where you'd like to run ElastAlert for the user with permissions. You can use the environment variables ``AWS_DEFAULT_PROFILE`` and ``AWS_DEFAULT_REGION`` or add two options to the configuration file: - ``aws_region``: The AWS region where you want to operate. - ``profile``: The name of the AWS profile to use to sign the requests. elastalert-0.2.4/docs/source/recipes/writing_filters.rst000066400000000000000000000104761364615736500235340ustar00rootroot00000000000000.. _writingfilters: Writing Filters For Rules ========================= This document describes how to create a filter section for your rule config file. The filters used in rules are part of the Elasticsearch query DSL, further documentation for which can be found at https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html This document contains a small subset of particularly useful filters. The filter section is passed to Elasticsearch exactly as follows:: filter: and: filters: - [filters from rule.yaml] Every result that matches these filters will be passed to the rule for processing. Common Filter Types: -------------------- query_string ************ The query_string type follows the Lucene query format and can be used for partial or full matches to multiple fields. See http://lucene.apache.org/core/2_9_4/queryparsersyntax.html for more information:: filter: - query: query_string: query: "username: bob" - query: query_string: query: "_type: login_logs" - query: query_string: query: "field: value OR otherfield: othervalue" - query: query_string: query: "this: that AND these: those" term **** The term type allows for exact field matches:: filter: - term: name_field: "bob" - term: _type: "login_logs" Note that a term query may not behave as expected if a field is analyzed. By default, many string fields will be tokenized by whitespace, and a term query for "foo bar" may not match a field that appears to have the value "foo bar", unless it is not analyzed. Conversely, a term query for "foo" will match analyzed strings "foo bar" and "foo baz". For full text matching on analyzed fields, use query_string. See https://www.elastic.co/guide/en/elasticsearch/guide/current/term-vs-full-text.html `terms `_ ***************************************************************************************************** Terms allows for easy combination of multiple term filters:: filter: - terms: field: ["value1", "value2"] # value1 OR value2 You can also match on multiple fields:: - terms: fieldX: ["value1", "value2"] fieldY: ["something", "something_else"] fieldZ: ["foo", "bar", "baz"] wildcard ******** For wildcard matches:: filter: - query: wildcard: field: "foo*bar" range ***** For ranges on fields:: filter: - range: status_code: from: 500 to: 599 Negation, and, or ***************** For Elasticsearch 2.X, any of the filters can be embedded in ``not``, ``and``, and ``or``:: filter: - or: - term: field: "value" - wildcard: field: "foo*bar" - and: - not: term: field: "value" - not: term: _type: "something" For Elasticsearch 5.x, this will not work and to implement boolean logic use query strings:: filter: - query: query_string: query: "somefield: somevalue OR foo: bar" Loading Filters Directly From Kibana 3 -------------------------------------- There are two ways to load filters directly from a Kibana 3 dashboard. You can set your filter to:: filter: download_dashboard: "My Dashboard Name" and when ElastAlert starts, it will download the dashboard schema from Elasticsearch and use the filters from that. However, if the dashboard name changes or if there is connectivity problems when ElastAlert starts, the rule will not load and ElastAlert will exit with an error like "Could not download filters for .." The second way is to generate a config file once using the Kibana dashboard. To do this, run ``elastalert-rule-from-kibana``. .. code-block:: console $ elastalert-rule-from-kibana Elasticsearch host: elasticsearch.example.com Elasticsearch port: 14900 Dashboard name: My Dashboard Partial Config file ----------- name: My Dashboard es_host: elasticsearch.example.com es_port: 14900 filter: - query: query_string: {query: '_exists_:log.message'} - query: query_string: {query: 'some_field:12345'} elastalert-0.2.4/docs/source/ruletypes.rst000066400000000000000000003560351364615736500207270ustar00rootroot00000000000000Rule Types and Configuration Options ************************************ Examples of several types of rule configuration can be found in the example_rules folder. .. _commonconfig: .. note:: All "time" formats are of the form ``unit: X`` where unit is one of weeks, days, hours, minutes or seconds. Such as ``minutes: 15`` or ``hours: 1``. Rule Configuration Cheat Sheet ============================== +--------------------------------------------------------------------------+ | FOR ALL RULES | +==============================================================+===========+ | ``es_host`` (string) | Required | +--------------------------------------------------------------+ | | ``es_port`` (number) | | +--------------------------------------------------------------+ | | ``index`` (string) | | +--------------------------------------------------------------+ | | ``type`` (string) | | +--------------------------------------------------------------+ | | ``alert`` (string or list) | | +--------------------------------------------------------------+-----------+ | ``name`` (string, defaults to the filename) | | +--------------------------------------------------------------+ | | ``use_strftime_index`` (boolean, default False) | Optional | +--------------------------------------------------------------+ | | ``use_ssl`` (boolean, default False) | | +--------------------------------------------------------------+ | | ``verify_certs`` (boolean, default True) | | +--------------------------------------------------------------+ | | ``es_username`` (string, no default) | | +--------------------------------------------------------------+ | | ``es_password`` (string, no default) | | +--------------------------------------------------------------+ | | ``es_url_prefix`` (string, no default) | | +--------------------------------------------------------------+ | | ``es_send_get_body_as`` (string, default "GET") | | +--------------------------------------------------------------+ | | ``aggregation`` (time, no default) | | +--------------------------------------------------------------+ | | ``description`` (string, default empty string) | | +--------------------------------------------------------------+ | | ``generate_kibana_link`` (boolean, default False) | | +--------------------------------------------------------------+ | | ``use_kibana_dashboard`` (string, no default) | | +--------------------------------------------------------------+ | | ``kibana_url`` (string, default from es_host) | | +--------------------------------------------------------------+ | | ``use_kibana4_dashboard`` (string, no default) | | +--------------------------------------------------------------+ | | ``kibana4_start_timedelta`` (time, default: 10 min) | | +--------------------------------------------------------------+ | | ``kibana4_end_timedelta`` (time, default: 10 min) | | +--------------------------------------------------------------+ | | ``generate_kibana_discover_url`` (boolean, default False) | | +--------------------------------------------------------------+ | | ``kibana_discover_app_url`` (string, no default) | | +--------------------------------------------------------------+ | | ``kibana_discover_version`` (string, no default) | | +--------------------------------------------------------------+ | | ``kibana_discover_index_pattern_id`` (string, no default) | | +--------------------------------------------------------------+ | | ``kibana_discover_columns`` (list of strs, default _source) | | +--------------------------------------------------------------+ | | ``kibana_discover_from_timedelta`` (time, default: 10 min) | | +--------------------------------------------------------------+ | | ``kibana_discover_to_timedelta`` (time, default: 10 min) | | +--------------------------------------------------------------+ | | ``use_local_time`` (boolean, default True) | | +--------------------------------------------------------------+ | | ``realert`` (time, default: 1 min) | | +--------------------------------------------------------------+ | | ``exponential_realert`` (time, no default) | | +--------------------------------------------------------------+ | | ``match_enhancements`` (list of strs, no default) | | +--------------------------------------------------------------+ | | ``top_count_number`` (int, default 5) | | +--------------------------------------------------------------+ | | ``top_count_keys`` (list of strs) | | +--------------------------------------------------------------+ | | ``raw_count_keys`` (boolean, default True) | | +--------------------------------------------------------------+ | | ``include`` (list of strs, default ["*"]) | | +--------------------------------------------------------------+ | | ``filter`` (ES filter DSL, no default) | | +--------------------------------------------------------------+ | | ``max_query_size`` (int, default global max_query_size) | | +--------------------------------------------------------------+ | | ``query_delay`` (time, default 0 min) | | +--------------------------------------------------------------+ | | ``owner`` (string, default empty string) | | +--------------------------------------------------------------+ | | ``priority`` (int, default 2) | | +--------------------------------------------------------------+ | | ``category`` (string, default empty string) | | +--------------------------------------------------------------+ | | ``scan_entire_timeframe`` (bool, default False) | | +--------------------------------------------------------------+ | | ``import`` (string) | | | | | | IGNORED IF ``use_count_query`` or ``use_terms_query`` is true| | +--------------------------------------------------------------+ + | ``buffer_time`` (time, default from config.yaml) | | +--------------------------------------------------------------+ | | ``timestamp_type`` (string, default iso) | | +--------------------------------------------------------------+ | | ``timestamp_format`` (string, default "%Y-%m-%dT%H:%M:%SZ") | | +--------------------------------------------------------------+ | | ``timestamp_format_expr`` (string, no default ) | | +--------------------------------------------------------------+ | | ``_source_enabled`` (boolean, default True) | | +--------------------------------------------------------------+ | | ``alert_text_args`` (array of strs) | | +--------------------------------------------------------------+ | | ``alert_text_kw`` (object) | | +--------------------------------------------------------------+ | | ``alert_missing_value`` (string, default "") | | +--------------------------------------------------------------+ | | ``is_enabled`` (boolean, default True) | | +--------------------------------------------------------------+-----------+ | ``search_extra_index`` (boolean, default False) | | +--------------------------------------------------------------+-----------+ | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | RULE TYPE | Any | Blacklist | Whitelist | Change | Frequency | Spike | Flatline |New_term|Cardinality| +====================================================+========+===========+===========+========+===========+=======+==========+========+===========+ | ``compare_key`` (list of strs, no default) | | Req | Req | Req | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``blacklist`` (list of strs, no default) | | Req | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``whitelist`` (list of strs, no default) | | | Req | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``ignore_null`` (boolean, no default) | | | Req | Req | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``query_key`` (string, no default) | Opt | | | Req | Opt | Opt | Opt | Req | Opt | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``aggregation_key`` (string, no default) | Opt | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``summary_table_fields`` (list, no default) | Opt | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``timeframe`` (time, no default) | | | | Opt | Req | Req | Req | | Req | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``num_events`` (int, no default) | | | | | Req | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``attach_related`` (boolean, no default) | | | | | Opt | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``use_count_query`` (boolean, no default) | | | | | Opt | Opt | Opt | | | | | | | | | | | | | | |``doc_type`` (string, no default) | | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``use_terms_query`` (boolean, no default) | | | | | Opt | Opt | | Opt | | | | | | | | | | | | | |``doc_type`` (string, no default) | | | | | | | | | | | | | | | | | | | | | |``query_key`` (string, no default) | | | | | | | | | | | | | | | | | | | | | |``terms_size`` (int, default 50) | | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``spike_height`` (int, no default) | | | | | | Req | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``spike_type`` ([up|down|both], no default) | | | | | | Req | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``alert_on_new_data`` (boolean, default False) | | | | | | Opt | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``threshold_ref`` (int, no default) | | | | | | Opt | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``threshold_cur`` (int, no default) | | | | | | Opt | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``threshold`` (int, no default) | | | | | | | Req | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``fields`` (string or list, no default) | | | | | | | | Req | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``terms_window_size`` (time, default 30 days) | | | | | | | | Opt | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``window_step_size`` (time, default 1 day) | | | | | | | | Opt | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``alert_on_missing_fields`` (boolean, default False)| | | | | | | | Opt | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``cardinality_field`` (string, no default) | | | | | | | | | Req | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``max_cardinality`` (boolean, no default) | | | | | | | | | Opt | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ |``min_cardinality`` (boolean, no default) | | | | | | | | | Opt | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ Common Configuration Options ============================ Every file that ends in ``.yaml`` in the ``rules_folder`` will be run by default. The following configuration settings are common to all types of rules. Required Settings ~~~~~~~~~~~~~~~~~ es_host ^^^^^^^ ``es_host``: The hostname of the Elasticsearch cluster the rule will use to query. (Required, string, no default) The environment variable ``ES_HOST`` will override this field. es_port ^^^^^^^ ``es_port``: The port of the Elasticsearch cluster. (Required, number, no default) The environment variable ``ES_PORT`` will override this field. index ^^^^^ ``index``: The name of the index that will be searched. Wildcards can be used here, such as: ``index: my-index-*`` which will match ``my-index-2014-10-05``. You can also use a format string containing ``%Y`` for year, ``%m`` for month, and ``%d`` for day. To use this, you must also set ``use_strftime_index`` to true. (Required, string, no default) name ^^^^ ``name``: The name of the rule. This must be unique across all rules. The name will be used in alerts and used as a key when writing and reading search metadata back from Elasticsearch. (Required, string, no default) type ^^^^ ``type``: The ``RuleType`` to use. This may either be one of the built in rule types, see :ref:`Rule Types ` section below for more information, or loaded from a module. For loading from a module, the type should be specified as ``module.file.RuleName``. (Required, string, no default) alert ^^^^^ ``alert``: The ``Alerter`` type to use. This may be one or more of the built in alerts, see :ref:`Alert Types ` section below for more information, or loaded from a module. For loading from a module, the alert should be specified as ``module.file.AlertName``. (Required, string or list, no default) Optional Settings ~~~~~~~~~~~~~~~~~ import ^^^^^^ ``import``: If specified includes all the settings from this yaml file. This allows common config options to be shared. Note that imported files that aren't complete rules should not have a ``.yml`` or ``.yaml`` suffix so that ElastAlert doesn't treat them as rules. Filters in imported files are merged (ANDed) with any filters in the rule. You can only have one import per rule, though the imported file can import another file, recursively. The filename can be an absolute path or relative to the rules directory. (Optional, string, no default) use_ssl ^^^^^^^ ``use_ssl``: Whether or not to connect to ``es_host`` using TLS. (Optional, boolean, default False) The environment variable ``ES_USE_SSL`` will override this field. verify_certs ^^^^^^^^^^^^ ``verify_certs``: Whether or not to verify TLS certificates. (Optional, boolean, default True) client_cert ^^^^^^^^^^^ ``client_cert``: Path to a PEM certificate to use as the client certificate (Optional, string, no default) client_key ^^^^^^^^^^^ ``client_key``: Path to a private key file to use as the client key (Optional, string, no default) ca_certs ^^^^^^^^ ``ca_certs``: Path to a CA cert bundle to use to verify SSL connections (Optional, string, no default) es_username ^^^^^^^^^^^ ``es_username``: basic-auth username for connecting to ``es_host``. (Optional, string, no default) The environment variable ``ES_USERNAME`` will override this field. es_password ^^^^^^^^^^^ ``es_password``: basic-auth password for connecting to ``es_host``. (Optional, string, no default) The environment variable ``ES_PASSWORD`` will override this field. es_url_prefix ^^^^^^^^^^^^^ ``es_url_prefix``: URL prefix for the Elasticsearch endpoint. (Optional, string, no default) es_send_get_body_as ^^^^^^^^^^^^^^^^^^^ ``es_send_get_body_as``: Method for querying Elasticsearch. (Optional, string, default "GET") use_strftime_index ^^^^^^^^^^^^^^^^^^ ``use_strftime_index``: If this is true, ElastAlert will format the index using datetime.strftime for each query. See https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior for more details. If a query spans multiple days, the formatted indexes will be concatenated with commas. This is useful as narrowing the number of indexes searched, compared to using a wildcard, may be significantly faster. For example, if ``index`` is ``logstash-%Y.%m.%d``, the query url will be similar to ``elasticsearch.example.com/logstash-2015.02.03/...`` or ``elasticsearch.example.com/logstash-2015.02.03,logstash-2015.02.04/...``. search_extra_index ^^^^^^^^^^^^^^^^^^ ``search_extra_index``: If this is true, ElastAlert will add an extra index on the early side onto each search. For example, if it's querying completely within 2018-06-28, it will actually use 2018-06-27,2018-06-28. This can be useful if your timestamp_field is not what's being used to generate the index names. If that's the case, sometimes a query would not have been using the right index. aggregation ^^^^^^^^^^^ ``aggregation``: This option allows you to aggregate multiple matches together into one alert. Every time a match is found, ElastAlert will wait for the ``aggregation`` period, and send all of the matches that have occurred in that time for a particular rule together. For example:: aggregation: hours: 2 means that if one match occurred at 12:00, another at 1:00, and a third at 2:30, one alert would be sent at 2:00, containing the first two matches, and another at 4:30, containing the third match plus any additional matches occurring before 4:30. This can be very useful if you expect a large number of matches and only want a periodic report. (Optional, time, default none) If you wish to aggregate all your alerts and send them on a recurring interval, you can do that using the ``schedule`` field. For example, if you wish to receive alerts every Monday and Friday:: aggregation: schedule: '2 4 * * mon,fri' This uses Cron syntax, which you can read more about `here `_. Make sure to `only` include either a schedule field or standard datetime fields (such as ``hours``, ``minutes``, ``days``), not both. By default, all events that occur during an aggregation window are grouped together. However, if your rule has the ``aggregation_key`` field set, then each event sharing a common key value will be grouped together. A separate aggregation window will be made for each newly encountered key value. For example, if you wish to receive alerts that are grouped by the user who triggered the event, you can set:: aggregation_key: 'my_data.username' Then, assuming an aggregation window of 10 minutes, if you receive the following data points:: {'my_data': {'username': 'alice', 'event_type': 'login'}, '@timestamp': '2016-09-20T00:00:00'} {'my_data': {'username': 'bob', 'event_type': 'something'}, '@timestamp': '2016-09-20T00:05:00'} {'my_data': {'username': 'alice', 'event_type': 'something else'}, '@timestamp': '2016-09-20T00:06:00'} This should result in 2 alerts: One containing alice's two events, sent at ``2016-09-20T00:10:00`` and one containing bob's one event sent at ``2016-09-20T00:16:00`` For aggregations, there can sometimes be a large number of documents present in the viewing medium (email, jira ticket, etc..). If you set the ``summary_table_fields`` field, Elastalert will provide a summary of the specified fields from all the results. For example, if you wish to summarize the usernames and event_types that appear in the documents so that you can see the most relevant fields at a quick glance, you can set:: summary_table_fields: - my_data.username - my_data.event_type Then, for the same sample data shown above listing alice and bob's events, Elastalert will provide the following summary table in the alert medium:: +------------------+--------------------+ | my_data.username | my_data.event_type | +------------------+--------------------+ | alice | login | | bob | something | | alice | something else | +------------------+--------------------+ .. note:: By default, aggregation time is relative to the current system time, not the time of the match. This means that running elastalert over past events will result in different alerts than if elastalert had been running while those events occured. This behavior can be changed by setting ``aggregate_by_match_time``. aggregate_by_match_time ^^^^^^^^^^^^^^^^^^^^^^^ Setting this to true will cause aggregations to be created relative to the timestamp of the first event, rather than the current time. This is useful for querying over historic data or if using a very large buffer_time and you want multiple aggregations to occur from a single query. realert ^^^^^^^ ``realert``: This option allows you to ignore repeating alerts for a period of time. If the rule uses a ``query_key``, this option will be applied on a per key basis. All matches for a given rule, or for matches with the same ``query_key``, will be ignored for the given time. All matches with a missing ``query_key`` will be grouped together using a value of ``_missing``. This is applied to the time the alert is sent, not to the time of the event. It defaults to one minute, which means that if ElastAlert is run over a large time period which triggers many matches, only the first alert will be sent by default. If you want every alert, set realert to 0 minutes. (Optional, time, default 1 minute) exponential_realert ^^^^^^^^^^^^^^^^^^^ ``exponential_realert``: This option causes the value of ``realert`` to exponentially increase while alerts continue to fire. If set, the value of ``exponential_realert`` is the maximum ``realert`` will increase to. If the time between alerts is less than twice ``realert``, ``realert`` will double. For example, if ``realert: minutes: 10`` and ``exponential_realert: hours: 1``, an alerts fires at 1:00 and another at 1:15, the next alert will not be until at least 1:35. If another alert fires between 1:35 and 2:15, ``realert`` will increase to the 1 hour maximum. If more than 2 hours elapse before the next alert, ``realert`` will go back down. Note that alerts that are ignored (e.g. one that occurred at 1:05) would not change ``realert``. (Optional, time, no default) buffer_time ^^^^^^^^^^^ ``buffer_time``: This options allows the rule to override the ``buffer_time`` global setting defined in config.yaml. This value is ignored if ``use_count_query`` or ``use_terms_query`` is true. (Optional, time) query_delay ^^^^^^^^^^^ ``query_delay``: This option will cause ElastAlert to subtract a time delta from every query, causing the rule to run with a delay. This is useful if the data is Elasticsearch doesn't get indexed immediately. (Optional, time) owner ^^^^^ ``owner``: This value will be used to identify the stakeholder of the alert. Optionally, this field can be included in any alert type. (Optional, string) priority ^^^^^^^^ ``priority``: This value will be used to identify the relative priority of the alert. Optionally, this field can be included in any alert type (e.g. for use in email subject/body text). (Optional, int, default 2) category ^^^^^^^^ ``category``: This value will be used to identify the category of the alert. Optionally, this field can be included in any alert type (e.g. for use in email subject/body text). (Optional, string, default empty string) max_query_size ^^^^^^^^^^^^^^ ``max_query_size``: The maximum number of documents that will be downloaded from Elasticsearch in a single query. If you expect a large number of results, consider using ``use_count_query`` for the rule. If this limit is reached, a warning will be logged but ElastAlert will continue without downloading more results. This setting will override a global ``max_query_size``. (Optional, int, default value of global ``max_query_size``) filter ^^^^^^ ``filter``: A list of Elasticsearch query DSL filters that is used to query Elasticsearch. ElastAlert will query Elasticsearch using the format ``{'filter': {'bool': {'must': [config.filter]}}}`` with an additional timestamp range filter. All of the results of querying with these filters are passed to the ``RuleType`` for analysis. For more information writing filters, see :ref:`Writing Filters `. (Required, Elasticsearch query DSL, no default) include ^^^^^^^ ``include``: A list of terms that should be included in query results and passed to rule types and alerts. When set, only those fields, along with '@timestamp', ``query_key``, ``compare_key``, and ``top_count_keys`` are included, if present. (Optional, list of strings, default all fields) top_count_keys ^^^^^^^^^^^^^^ ``top_count_keys``: A list of fields. ElastAlert will perform a terms query for the top X most common values for each of the fields, where X is 5 by default, or ``top_count_number`` if it exists. For example, if ``num_events`` is 100, and ``top_count_keys`` is ``- "username"``, the alert will say how many of the 100 events have each username, for the top 5 usernames. When this is computed, the time range used is from ``timeframe`` before the most recent event to 10 minutes past the most recent event. Because ElastAlert uses an aggregation query to compute this, it will attempt to use the field name plus ".raw" to count unanalyzed terms. To turn this off, set ``raw_count_keys`` to false. top_count_number ^^^^^^^^^^^^^^^^ ``top_count_number``: The number of terms to list if ``top_count_keys`` is set. (Optional, integer, default 5) raw_count_keys ^^^^^^^^^^^^^^ ``raw_count_keys``: If true, all fields in ``top_count_keys`` will have ``.raw`` appended to them. (Optional, boolean, default true) description ^^^^^^^^^^^ ``description``: text describing the purpose of rule. (Optional, string, default empty string) Can be referenced in custom alerters to provide context as to why a rule might trigger. generate_kibana_link ^^^^^^^^^^^^^^^^^^^^ ``generate_kibana_link``: This option is for Kibana 3 only. If true, ElastAlert will generate a temporary Kibana dashboard and include a link to it in alerts. The dashboard consists of an events over time graph and a table with ``include`` fields selected in the table. If the rule uses ``query_key``, the dashboard will also contain a filter for the ``query_key`` of the alert. The dashboard schema will be uploaded to the kibana-int index as a temporary dashboard. (Optional, boolean, default False) kibana_url ^^^^^^^^^^ ``kibana_url``: The url to access Kibana. This will be used if ``generate_kibana_link`` or ``use_kibana_dashboard`` is true. If not specified, a URL will be constructed using ``es_host`` and ``es_port``. (Optional, string, default ``http://:/_plugin/kibana/``) use_kibana_dashboard ^^^^^^^^^^^^^^^^^^^^ ``use_kibana_dashboard``: The name of a Kibana 3 dashboard to link to. Instead of generating a dashboard from a template, ElastAlert can use an existing dashboard. It will set the time range on the dashboard to around the match time, upload it as a temporary dashboard, add a filter to the ``query_key`` of the alert if applicable, and put the url to the dashboard in the alert. (Optional, string, no default) use_kibana4_dashboard ^^^^^^^^^^^^^^^^^^^^^ ``use_kibana4_dashboard``: A link to a Kibana 4 dashboard. For example, "https://kibana.example.com/#/dashboard/My-Dashboard". This will set the time setting on the dashboard from the match time minus the timeframe, to 10 minutes after the match time. Note that this does not support filtering by ``query_key`` like Kibana 3. This value can use `$VAR` and `${VAR}` references to expand environment variables. kibana4_start_timedelta ^^^^^^^^^^^^^^^^^^^^^^^ ``kibana4_start_timedelta``: Defaults to 10 minutes. This option allows you to specify the start time for the generated kibana4 dashboard. This value is added in front of the event. For example, ``kibana4_start_timedelta: minutes: 2`` kibana4_end_timedelta ^^^^^^^^^^^^^^^^^^^^^ ``kibana4_end_timedelta``: Defaults to 10 minutes. This option allows you to specify the end time for the generated kibana4 dashboard. This value is added in back of the event. For example, ``kibana4_end_timedelta: minutes: 2`` generate_kibana_discover_url ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``generate_kibana_discover_url``: Enables the generation of the ``kibana_discover_url`` variable for the Kibana Discover application. This setting requires the following settings are also configured: - ``kibana_discover_app_url`` - ``kibana_discover_version`` - ``kibana_discover_index_pattern_id`` ``generate_kibana_discover_url: true`` kibana_discover_app_url ^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_app_url``: The url of the Kibana Discover application used to generate the ``kibana_discover_url`` variable. This value can use `$VAR` and `${VAR}` references to expand environment variables. ``kibana_discover_app_url: http://kibana:5601/#/discover`` kibana_discover_version ^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_version``: Specifies the version of the Kibana Discover application. The currently supported versions of Kibana Discover are: - `5.6` - `6.0`, `6.1`, `6.2`, `6.3`, `6.4`, `6.5`, `6.6`, `6.7`, `6.8` - `7.0`, `7.1`, `7.2`, `7.3` ``kibana_discover_version: '7.3'`` kibana_discover_index_pattern_id ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_index_pattern_id``: The id of the index pattern to link to in the Kibana Discover application. These ids are usually generated and can be found in url of the index pattern management page, or by exporting its saved object. Example export of an index pattern's saved object: .. code-block:: text [ { "_id": "4e97d188-8a45-4418-8a37-07ed69b4d34c", "_type": "index-pattern", "_source": { ... } } ] You can modify an index pattern's id by exporting the saved object, modifying the ``_id`` field, and re-importing. ``kibana_discover_index_pattern_id: 4e97d188-8a45-4418-8a37-07ed69b4d34c`` kibana_discover_columns ^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_columns``: The columns to display in the generated Kibana Discover application link. Defaults to the ``_source`` column. ``kibana_discover_columns: [ timestamp, message ]`` kibana_discover_from_timedelta ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_from_timedelta``: The offset to the `from` time of the Kibana Discover link's time range. The `from` time is calculated by subtracting this timedelta from the event time. Defaults to 10 minutes. ``kibana_discover_from_timedelta: minutes: 2`` kibana_discover_to_timedelta ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``kibana_discover_to_timedelta``: The offset to the `to` time of the Kibana Discover link's time range. The `to` time is calculated by adding this timedelta to the event time. Defaults to 10 minutes. ``kibana_discover_to_timedelta: minutes: 2`` use_local_time ^^^^^^^^^^^^^^ ``use_local_time``: Whether to convert timestamps to the local time zone in alerts. If false, timestamps will be converted to UTC, which is what ElastAlert uses internally. (Optional, boolean, default true) match_enhancements ^^^^^^^^^^^^^^^^^^ ``match_enhancements``: A list of enhancement modules to use with this rule. An enhancement module is a subclass of enhancements.BaseEnhancement that will be given the match dictionary and can modify it before it is passed to the alerter. The enhancements will be run after silence and realert is calculated and in the case of aggregated alerts, right before the alert is sent. This can be changed by setting ``run_enhancements_first``. The enhancements should be specified as ``module.file.EnhancementName``. See :ref:`Enhancements` for more information. (Optional, list of strings, no default) run_enhancements_first ^^^^^^^^^^^^^^^^^^^^^^ ``run_enhancements_first``: If set to true, enhancements will be run as soon as a match is found. This means that they can be changed or dropped before affecting realert or being added to an aggregation. Silence stashes will still be created before the enhancement runs, meaning even if a ``DropMatchException`` is raised, the rule will still be silenced. (Optional, boolean, default false) query_key ^^^^^^^^^ ``query_key``: Having a query key means that realert time will be counted separately for each unique value of ``query_key``. For rule types which count documents, such as spike, frequency and flatline, it also means that these counts will be independent for each unique value of ``query_key``. For example, if ``query_key`` is set to ``username`` and ``realert`` is set, and an alert triggers on a document with ``{'username': 'bob'}``, additional alerts for ``{'username': 'bob'}`` will be ignored while other usernames will trigger alerts. Documents which are missing the ``query_key`` will be grouped together. A list of fields may also be used, which will create a compound query key. This compound key is treated as if it were a single field whose value is the component values, or "None", joined by commas. A new field with the key "field1,field2,etc" will be created in each document and may conflict with existing fields of the same name. aggregation_key ^^^^^^^^^^^^^^^ ``aggregation_key``: Having an aggregation key in conjunction with an aggregation will make it so that each new value encountered for the aggregation_key field will result in a new, separate aggregation window. summary_table_fields ^^^^^^^^^^^^^^^^^^^^ ``summary_table_fields``: Specifying the summmary_table_fields in conjunction with an aggregation will make it so that each aggregated alert will contain a table summarizing the values for the specified fields in all the matches that were aggregated together. timestamp_type ^^^^^^^^^^^^^^ ``timestamp_type``: One of ``iso``, ``unix``, ``unix_ms``, ``custom``. This option will set the type of ``@timestamp`` (or ``timestamp_field``) used to query Elasticsearch. ``iso`` will use ISO8601 timestamps, which will work with most Elasticsearch date type field. ``unix`` will query using an integer unix (seconds since 1/1/1970) timestamp. ``unix_ms`` will use milliseconds unix timestamp. ``custom`` allows you to define your own ``timestamp_format``. The default is ``iso``. (Optional, string enum, default iso). timestamp_format ^^^^^^^^^^^^^^^^ ``timestamp_format``: In case Elasticsearch used custom date format for date type field, this option provides a way to define custom timestamp format to match the type used for Elastisearch date type field. This option is only valid if ``timestamp_type`` set to ``custom``. (Optional, string, default '%Y-%m-%dT%H:%M:%SZ'). timestamp_format_expr ^^^^^^^^^^^^^^^^^^^^^ ``timestamp_format_expr``: In case Elasticsearch used custom date format for date type field, this option provides a way to adapt the value obtained converting a datetime through ``timestamp_format``, when the format cannot match perfectly what defined in Elastisearch. When set, this option is evaluated as a Python expression along with a *globals* dictionary containing the original datetime instance named ``dt`` and the timestamp to be refined, named ``ts``. The returned value becomes the timestamp obtained from the datetime. For example, when the date type field in Elasticsearch uses milliseconds (``yyyy-MM-dd'T'HH:mm:ss.SSS'Z'``) and ``timestamp_format`` option is ``'%Y-%m-%dT%H:%M:%S.%fZ'``, Elasticsearch would fail to parse query terms as they contain microsecond values - that is it gets 6 digits instead of 3 - since the ``%f`` placeholder stands for microseconds for Python *strftime* method calls. Setting ``timestamp_format_expr: 'ts[:23] + ts[26:]'`` will truncate the value to milliseconds granting Elasticsearch compatibility. This option is only valid if ``timestamp_type`` set to ``custom``. (Optional, string, no default). _source_enabled ^^^^^^^^^^^^^^^ ``_source_enabled``: If true, ElastAlert will use _source to retrieve fields from documents in Elasticsearch. If false, ElastAlert will use ``fields`` to retrieve stored fields. Both of these are represented internally as if they came from ``_source``. See https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-fields.html for more details. The fields used come from ``include``, see above for more details. (Optional, boolean, default True) scan_entire_timeframe ^^^^^^^^^^^^^^^^^^^^^ ``scan_entire_timeframe``: If true, when ElastAlert starts, it will always start querying at the current time minus the timeframe. ``timeframe`` must exist in the rule. This may be useful, for example, if you are using a flatline rule type with a large timeframe, and you want to be sure that if ElastAlert restarts, you can still get alerts. This may cause duplicate alerts for some rule types, for example, Frequency can alert multiple times in a single timeframe, and if ElastAlert were to restart with this setting, it may scan the same range again, triggering duplicate alerts. Some rules and alerts require additional options, which also go in the top level of the rule configuration file. .. _testing : Testing Your Rule ================= Once you've written a rule configuration, you will want to validate it. To do so, you can either run ElastAlert in debug mode, or use ``elastalert-test-rule``, which is a script that makes various aspects of testing easier. It can: - Check that the configuration file loaded successfully. - Check that the Elasticsearch filter parses. - Run against the last X day(s) and the show the number of hits that match your filter. - Show the available terms in one of the results. - Save documents returned to a JSON file. - Run ElastAlert using either a JSON file or actual results from Elasticsearch. - Print out debug alerts or trigger real alerts. - Check that, if they exist, the primary_key, compare_key and include terms are in the results. - Show what metadata documents would be written to ``elastalert_status``. Without any optional arguments, it will run ElastAlert over the last 24 hours and print out any alerts that would have occurred. Here is an example test run which triggered an alert: .. code-block:: console $ elastalert-test-rule my_rules/rule1.yaml Successfully Loaded Example rule1 Got 105 hits from the last 1 day Available terms in first hit: @timestamp field1 field2 ... Included term this_field_doesnt_exist may be missing or null INFO:root:Queried rule Example rule1 from 6-16 15:21 PDT to 6-17 15:21 PDT: 105 hits INFO:root:Alert for Example rule1 at 2015-06-16T23:53:12Z: INFO:root:Example rule1 At least 50 events occurred between 6-16 18:30 PDT and 6-16 20:30 PDT field1: value1: 25 value2: 25 @timestamp: 2015-06-16T20:30:04-07:00 field1: value1 field2: something Would have written the following documents to elastalert_status: silence - {'rule_name': 'Example rule1', '@timestamp': datetime.datetime( ... ), 'exponent': 0, 'until': datetime.datetime( ... )} elastalert_status - {'hits': 105, 'matches': 1, '@timestamp': datetime.datetime( ... ), 'rule_name': 'Example rule1', 'starttime': datetime.datetime( ... ), 'endtime': datetime.datetime( ... ), 'time_taken': 3.1415926} Note that everything between "Alert for Example rule1 at ..." and "Would have written the following ..." is the exact text body that an alert would have. See the section below on alert content for more details. Also note that datetime objects are converted to ISO8601 timestamps when uploaded to Elasticsearch. See :ref:`the section on metadata ` for more details. Other options include: ``--schema-only``: Only perform schema validation on the file. It will not load modules or query Elasticsearch. This may catch invalid YAML and missing or misconfigured fields. ``--count-only``: Only find the number of matching documents and list available fields. ElastAlert will not be run and documents will not be downloaded. ``--days N``: Instead of the default 1 day, query N days. For selecting more specific time ranges, you must run ElastAlert itself and use ``--start`` and ``--end``. ``--save-json FILE``: Save all documents downloaded to a file as JSON. This is useful if you wish to modify data while testing or do offline testing in conjunction with ``--data FILE``. A maximum of 10,000 documents will be downloaded. ``--data FILE``: Use a JSON file as a data source instead of Elasticsearch. The file should be a single list containing objects, rather than objects on separate lines. Note than this uses mock functions which mimic some Elasticsearch query methods and is not guaranteed to have the exact same results as with Elasticsearch. For example, analyzed string fields may behave differently. ``--alert``: Trigger real alerts instead of the debug (logging text) alert. ``--formatted-output``: Output results in formatted JSON. .. note:: Results from running this script may not always be the same as if an actual ElastAlert instance was running. Some rule types, such as spike and flatline require a minimum elapsed time before they begin alerting, based on their timeframe. In addition, use_count_query and use_terms_query rely on run_every to determine their resolution. This script uses a fixed 5 minute window, which is the same as the default. .. _ruletypes: Rule Types ========== The various ``RuleType`` classes, defined in ``elastalert/ruletypes.py``, form the main logic behind ElastAlert. An instance is held in memory for each rule, passed all of the data returned by querying Elasticsearch with a given filter, and generates matches based on that data. To select a rule type, set the ``type`` option to the name of the rule type in the rule configuration file: ``type: `` Any ~~~ ``any``: The any rule will match everything. Every hit that the query returns will generate an alert. Blacklist ~~~~~~~~~ ``blacklist``: The blacklist rule will check a certain field against a blacklist, and match if it is in the blacklist. This rule requires two additional options: ``compare_key``: The name of the field to use to compare to the blacklist. If the field is null, those events will be ignored. ``blacklist``: A list of blacklisted values, and/or a list of paths to flat files which contain the blacklisted values using ``- "!file /path/to/file"``; for example:: blacklist: - value1 - value2 - "!file /tmp/blacklist1.txt" - "!file /tmp/blacklist2.txt" It is possible to mix between blacklist value definitions, or use either one. The ``compare_key`` term must be equal to one of these values for it to match. Whitelist ~~~~~~~~~ ``whitelist``: Similar to ``blacklist``, this rule will compare a certain field to a whitelist, and match if the list does not contain the term. This rule requires three additional options: ``compare_key``: The name of the field to use to compare to the whitelist. ``ignore_null``: If true, events without a ``compare_key`` field will not match. ``whitelist``: A list of whitelisted values, and/or a list of paths to flat files which contain the whitelisted values using ``- "!file /path/to/file"``; for example:: whitelist: - value1 - value2 - "!file /tmp/whitelist1.txt" - "!file /tmp/whitelist2.txt" It is possible to mix between whitelisted value definitions, or use either one. The ``compare_key`` term must be in this list or else it will match. Change ~~~~~~ For an example configuration file using this rule type, look at ``example_rules/example_change.yaml``. ``change``: This rule will monitor a certain field and match if that field changes. The field must change with respect to the last event with the same ``query_key``. This rule requires three additional options: ``compare_key``: The names of the field to monitor for changes. Since this is a list of strings, we can have multiple keys. An alert will trigger if any of the fields change. ``ignore_null``: If true, events without a ``compare_key`` field will not count as changed. Currently this checks for all the fields in ``compare_key`` ``query_key``: This rule is applied on a per-``query_key`` basis. This field must be present in all of the events that are checked. There is also an optional field: ``timeframe``: The maximum time between changes. After this time period, ElastAlert will forget the old value of the ``compare_key`` field. Frequency ~~~~~~~~~ For an example configuration file using this rule type, look at ``example_rules/example_frequency.yaml``. ``frequency``: This rule matches when there are at least a certain number of events in a given time frame. This may be counted on a per-``query_key`` basis. This rule requires two additional options: ``num_events``: The number of events which will trigger an alert, inclusive. ``timeframe``: The time that ``num_events`` must occur within. Optional: ``use_count_query``: If true, ElastAlert will poll Elasticsearch using the count api, and not download all of the matching documents. This is useful is you care only about numbers and not the actual data. It should also be used if you expect a large number of query hits, in the order of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, default 50, unique terms. ``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. ``query_key``: Counts of documents will be stored independently for each value of ``query_key``. Only ``num_events`` documents, all with the same value of ``query_key``, will trigger an alert. ``attach_related``: Will attach all the related events to the event that triggered the frequency alert. For example in an alert triggered with ``num_events``: 3, the 3rd event will trigger the alert on itself and add the other 2 events in a key named ``related_events`` that can be accessed in the alerter. Spike ~~~~~ ``spike``: This rule matches when the volume of events during a given time period is ``spike_height`` times larger or smaller than during the previous time period. It uses two sliding windows to compare the current and reference frequency of events. We will call this two windows "reference" and "current". This rule requires three additional options: ``spike_height``: The ratio of number of events in the last ``timeframe`` to the previous ``timeframe`` that when hit will trigger an alert. ``spike_type``: Either 'up', 'down' or 'both'. 'Up' meaning the rule will only match when the number of events is ``spike_height`` times higher. 'Down' meaning the reference number is ``spike_height`` higher than the current number. 'Both' will match either. ``timeframe``: The rule will average out the rate of events over this time period. For example, ``hours: 1`` means that the 'current' window will span from present to one hour ago, and the 'reference' window will span from one hour ago to two hours ago. The rule will not be active until the time elapsed from the first event is at least two timeframes. This is to prevent an alert being triggered before a baseline rate has been established. This can be overridden using ``alert_on_new_data``. Optional: ``field_value``: When set, uses the value of the field in the document and not the number of matching documents. This is useful to monitor for example a temperature sensor and raise an alarm if the temperature grows too fast. Note that the means of the field on the reference and current windows are used to determine if the ``spike_height`` value is reached. Note also that the threshold parameters are ignored in this smode. ``threshold_ref``: The minimum number of events that must exist in the reference window for an alert to trigger. For example, if ``spike_height: 3`` and ``threshold_ref: 10``, then the 'reference' window must contain at least 10 events and the 'current' window at least three times that for an alert to be triggered. ``threshold_cur``: The minimum number of events that must exist in the current window for an alert to trigger. For example, if ``spike_height: 3`` and ``threshold_cur: 60``, then an alert will occur if the current window has more than 60 events and the reference window has less than a third as many. To illustrate the use of ``threshold_ref``, ``threshold_cur``, ``alert_on_new_data``, ``timeframe`` and ``spike_height`` together, consider the following examples:: " Alert if at least 15 events occur within two hours and less than a quarter of that number occurred within the previous two hours. " timeframe: hours: 2 spike_height: 4 spike_type: up threshold_cur: 15 hour1: 5 events (ref: 0, cur: 5) - No alert because (a) threshold_cur not met, (b) ref window not filled hour2: 5 events (ref: 0, cur: 10) - No alert because (a) threshold_cur not met, (b) ref window not filled hour3: 10 events (ref: 5, cur: 15) - No alert because (a) spike_height not met, (b) ref window not filled hour4: 35 events (ref: 10, cur: 45) - Alert because (a) spike_height met, (b) threshold_cur met, (c) ref window filled hour1: 20 events (ref: 0, cur: 20) - No alert because ref window not filled hour2: 21 events (ref: 0, cur: 41) - No alert because ref window not filled hour3: 19 events (ref: 20, cur: 40) - No alert because (a) spike_height not met, (b) ref window not filled hour4: 23 events (ref: 41, cur: 42) - No alert because spike_height not met hour1: 10 events (ref: 0, cur: 10) - No alert because (a) threshold_cur not met, (b) ref window not filled hour2: 0 events (ref: 0, cur: 10) - No alert because (a) threshold_cur not met, (b) ref window not filled hour3: 0 events (ref: 10, cur: 0) - No alert because (a) threshold_cur not met, (b) ref window not filled, (c) spike_height not met hour4: 30 events (ref: 10, cur: 30) - No alert because spike_height not met hour5: 5 events (ref: 0, cur: 35) - Alert because (a) spike_height met, (b) threshold_cur met, (c) ref window filled " Alert if at least 5 events occur within two hours, and twice as many events occur within the next two hours. " timeframe: hours: 2 spike_height: 2 spike_type: up threshold_ref: 5 hour1: 20 events (ref: 0, cur: 20) - No alert because (a) threshold_ref not met, (b) ref window not filled hour2: 100 events (ref: 0, cur: 120) - No alert because (a) threshold_ref not met, (b) ref window not filled hour3: 100 events (ref: 20, cur: 200) - No alert because ref window not filled hour4: 100 events (ref: 120, cur: 200) - No alert because spike_height not met hour1: 0 events (ref: 0, cur: 0) - No alert because (a) threshold_ref not met, (b) ref window not filled hour2: 20 events (ref: 0, cur: 20) - No alert because (a) threshold_ref not met, (b) ref window not filled hour3: 100 events (ref: 0, cur: 120) - No alert because (a) threshold_ref not met, (b) ref window not filled hour4: 100 events (ref: 20, cur: 200) - Alert because (a) spike_height met, (b) threshold_ref met, (c) ref window filled hour1: 1 events (ref: 0, cur: 1) - No alert because (a) threshold_ref not met, (b) ref window not filled hour2: 2 events (ref: 0, cur: 3) - No alert because (a) threshold_ref not met, (b) ref window not filled hour3: 2 events (ref: 1, cur: 4) - No alert because (a) threshold_ref not met, (b) ref window not filled hour4: 1000 events (ref: 3, cur: 1002) - No alert because threshold_ref not met hour5: 2 events (ref: 4, cur: 1002) - No alert because threshold_ref not met hour6: 4 events: (ref: 1002, cur: 6) - No alert because spike_height not met hour1: 1000 events (ref: 0, cur: 1000) - No alert because (a) threshold_ref not met, (b) ref window not filled hour2: 0 events (ref: 0, cur: 1000) - No alert because (a) threshold_ref not met, (b) ref window not filled hour3: 0 events (ref: 1000, cur: 0) - No alert because (a) spike_height not met, (b) ref window not filled hour4: 0 events (ref: 1000, cur: 0) - No alert because spike_height not met hour5: 1000 events (ref: 0, cur: 1000) - No alert because threshold_ref not met hour6: 1050 events (ref: 0, cur: 2050)- No alert because threshold_ref not met hour7: 1075 events (ref: 1000, cur: 2125) Alert because (a) spike_height met, (b) threshold_ref met, (c) ref window filled " Alert if at least 100 events occur within two hours and less than a fifth of that number occurred in the previous two hours. " timeframe: hours: 2 spike_height: 5 spike_type: up threshold_cur: 100 hour1: 1000 events (ref: 0, cur: 1000) - No alert because ref window not filled hour1: 2 events (ref: 0, cur: 2) - No alert because (a) threshold_cur not met, (b) ref window not filled hour2: 1 events (ref: 0, cur: 3) - No alert because (a) threshold_cur not met, (b) ref window not filled hour3: 20 events (ref: 2, cur: 21) - No alert because (a) threshold_cur not met, (b) ref window not filled hour4: 81 events (ref: 3, cur: 101) - Alert because (a) spike_height met, (b) threshold_cur met, (c) ref window filled hour1: 10 events (ref: 0, cur: 10) - No alert because (a) threshold_cur not met, (b) ref window not filled hour2: 20 events (ref: 0, cur: 30) - No alert because (a) threshold_cur not met, (b) ref window not filled hour3: 40 events (ref: 10, cur: 60) - No alert because (a) threshold_cur not met, (b) ref window not filled hour4: 80 events (ref: 30, cur: 120) - No alert because spike_height not met hour5: 200 events (ref: 60, cur: 280) - No alert because spike_height not met ``alert_on_new_data``: This option is only used if ``query_key`` is set. When this is set to true, any new ``query_key`` encountered may trigger an immediate alert. When set to false, baseline must be established for each new ``query_key`` value, and then subsequent spikes may cause alerts. Baseline is established after ``timeframe`` has elapsed twice since first occurrence. ``use_count_query``: If true, ElastAlert will poll Elasticsearch using the count api, and not download all of the matching documents. This is useful is you care only about numbers and not the actual data. It should also be used if you expect a large number of query hits, in the order of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, default 50, unique terms. ``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. ``query_key``: Counts of documents will be stored independently for each value of ``query_key``. Flatline ~~~~~~~~ ``flatline``: This rule matches when the total number of events is under a given ``threshold`` for a time period. This rule requires two additional options: ``threshold``: The minimum number of events for an alert not to be triggered. ``timeframe``: The time period that must contain less than ``threshold`` events. Optional: ``use_count_query``: If true, ElastAlert will poll Elasticsearch using the count api, and not download all of the matching documents. This is useful is you care only about numbers and not the actual data. It should also be used if you expect a large number of query hits, in the order of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, default 50, unique terms. ``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. ``query_key``: With flatline rule, ``query_key`` means that an alert will be triggered if any value of ``query_key`` has been seen at least once and then falls below the threshold. ``forget_keys``: Only valid when used with ``query_key``. If this is set to true, ElastAlert will "forget" about the ``query_key`` value that triggers an alert, therefore preventing any more alerts for it until it's seen again. New Term ~~~~~~~~ ``new_term``: This rule matches when a new value appears in a field that has never been seen before. When ElastAlert starts, it will use an aggregation query to gather all known terms for a list of fields. This rule requires one additional option: ``fields``: A list of fields to monitor for new terms. ``query_key`` will be used if ``fields`` is not set. Each entry in the list of fields can itself be a list. If a field entry is provided as a list, it will be interpreted as a set of fields that compose a composite key used for the ElasticSearch query. .. note:: The composite fields may only refer to primitive types, otherwise the initial ElasticSearch query will not properly return the aggregation results, thus causing alerts to fire every time the ElastAlert service initially launches with the rule. A warning will be logged to the console if this scenario is encountered. However, future alerts will actually work as expected after the initial flurry. Optional: ``terms_window_size``: The amount of time used for the initial query to find existing terms. No term that has occurred within this time frame will trigger an alert. The default is 30 days. ``window_step_size``: When querying for existing terms, split up the time range into steps of this size. For example, using the default 30 day window size, and the default 1 day step size, 30 invidivdual queries will be made. This helps to avoid timeouts for very expensive aggregation queries. The default is 1 day. ``alert_on_missing_field``: Whether or not to alert when a field is missing from a document. The default is false. ``use_terms_query``: If true, ElastAlert will use aggregation queries to get terms instead of regular search queries. This is faster than regular searching if there is a large number of documents. If this is used, you may only specify a single field, and must also set ``query_key`` to that field. Also, note that ``terms_size`` (the number of buckets returned per query) defaults to 50. This means that if a new term appears but there are at least 50 terms which appear more frequently, it will not be found. .. note:: When using use_terms_query, make sure that the field you are using is not analyzed. If it is, the results of each terms query may return tokens rather than full values. ElastAlert will by default turn on use_keyword_postfix, which attempts to use the non-analyzed version (.keyword or .raw) to gather initial terms. These will not match the partial values and result in false positives. ``use_keyword_postfix``: If true, ElastAlert will automatically try to add .keyword (ES5+) or .raw to the fields when making an initial query. These are non-analyzed fields added by Logstash. If the field used is analyzed, the initial query will return only the tokenized values, potentially causing false positives. Defaults to true. Cardinality ~~~~~~~~~~~ ``cardinality``: This rule matches when a the total number of unique values for a certain field within a time frame is higher or lower than a threshold. This rule requires: ``timeframe``: The time period in which the number of unique values will be counted. ``cardinality_field``: Which field to count the cardinality for. This rule requires one of the two following options: ``max_cardinality``: If the cardinality of the data is greater than this number, an alert will be triggered. Each new event that raises the cardinality will trigger an alert. ``min_cardinality``: If the cardinality of the data is lower than this number, an alert will be triggered. The ``timeframe`` must have elapsed since the first event before any alerts will be sent. When a match occurs, the ``timeframe`` will be reset and must elapse again before additional alerts. Optional: ``query_key``: Group cardinality counts by this field. For each unique value of the ``query_key`` field, cardinality will be counted separately. Metric Aggregation ~~~~~~~~~~~~~~~~~~ ``metric_aggregation``: This rule matches when the value of a metric within the calculation window is higher or lower than a threshold. By default this is ``buffer_time``. This rule requires: ``metric_agg_key``: This is the name of the field over which the metric value will be calculated. The underlying type of this field must be supported by the specified aggregation type. ``metric_agg_type``: The type of metric aggregation to perform on the ``metric_agg_key`` field. This must be one of 'min', 'max', 'avg', 'sum', 'cardinality', 'value_count'. ``doc_type``: Specify the ``_type`` of document to search for. This rule also requires at least one of the two following options: ``max_threshold``: If the calculated metric value is greater than this number, an alert will be triggered. This threshold is exclusive. ``min_threshold``: If the calculated metric value is less than this number, an alert will be triggered. This threshold is exclusive. Optional: ``query_key``: Group metric calculations by this field. For each unique value of the ``query_key`` field, the metric will be calculated and evaluated separately against the threshold(s). ``min_doc_count``: The minimum number of events in the current window needed for an alert to trigger. Used in conjunction with ``query_key``, this will only consider terms which in their last ``buffer_time`` had at least ``min_doc_count`` records. Default 1. ``use_run_every_query_size``: By default the metric value is calculated over a ``buffer_time`` sized window. If this parameter is true the rule will use ``run_every`` as the calculation window. ``allow_buffer_time_overlap``: This setting will only have an effect if ``use_run_every_query_size`` is false and ``buffer_time`` is greater than ``run_every``. If true will allow the start of the metric calculation window to overlap the end time of a previous run. By default the start and end times will not overlap, so if the time elapsed since the last run is less than the metric calculation window size, rule execution will be skipped (to avoid calculations on partial data). ``bucket_interval``: If present this will divide the metric calculation window into ``bucket_interval`` sized segments. The metric value will be calculated and evaluated against the threshold(s) for each segment. If ``bucket_interval`` is specified then ``buffer_time`` must be a multiple of ``bucket_interval``. (Or ``run_every`` if ``use_run_every_query_size`` is true). ``sync_bucket_interval``: This only has an effect if ``bucket_interval`` is present. If true it will sync the start and end times of the metric calculation window to the keys (timestamps) of the underlying date_histogram buckets. Because of the way elasticsearch calculates date_histogram bucket keys these usually round evenly to nearest minute, hour, day etc (depending on the bucket size). By default the bucket keys are offset to allign with the time elastalert runs, (This both avoid calculations on partial data, and ensures the very latest documents are included). See: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-datehistogram-aggregation.html#_offset for a more comprehensive explaination. Spike Aggregation ~~~~~~~~~~~~~~~~~~ ``spike_aggregation``: This rule matches when the value of a metric within the calculation window is ``spike_height`` times larger or smaller than during the previous time period. It uses two sliding windows to compare the current and reference metric values. We will call these two windows "reference" and "current". This rule requires: ``metric_agg_key``: This is the name of the field over which the metric value will be calculated. The underlying type of this field must be supported by the specified aggregation type. If using a scripted field via ``metric_agg_script``, this is the name for your scripted field ``metric_agg_type``: The type of metric aggregation to perform on the ``metric_agg_key`` field. This must be one of 'min', 'max', 'avg', 'sum', 'cardinality', 'value_count'. ``spike_height``: The ratio of the metric value in the last ``timeframe`` to the previous ``timeframe`` that when hit will trigger an alert. ``spike_type``: Either 'up', 'down' or 'both'. 'Up' meaning the rule will only match when the metric value is ``spike_height`` times higher. 'Down' meaning the reference metric value is ``spike_height`` higher than the current metric value. 'Both' will match either. ``buffer_time``: The rule will average out the rate of events over this time period. For example, ``hours: 1`` means that the 'current' window will span from present to one hour ago, and the 'reference' window will span from one hour ago to two hours ago. The rule will not be active until the time elapsed from the first event is at least two timeframes. This is to prevent an alert being triggered before a baseline rate has been established. This can be overridden using ``alert_on_new_data``. Optional: ``query_key``: Group metric calculations by this field. For each unique value of the ``query_key`` field, the metric will be calculated and evaluated separately against the 'reference'/'current' metric value and ``spike height``. ``metric_agg_script``: A `Painless` formatted script describing how to calculate your metric on-the-fly:: metric_agg_key: myScriptedMetric metric_agg_script: script: doc['field1'].value * doc['field2'].value ``threshold_ref``: The minimum value of the metric in the reference window for an alert to trigger. For example, if ``spike_height: 3`` and ``threshold_ref: 10``, then the 'reference' window must have a metric value of 10 and the 'current' window at least three times that for an alert to be triggered. ``threshold_cur``: The minimum value of the metric in the current window for an alert to trigger. For example, if ``spike_height: 3`` and ``threshold_cur: 60``, then an alert will occur if the current window has a metric value greater than 60 and the reference window is less than a third of that value. ``min_doc_count``: The minimum number of events in the current window needed for an alert to trigger. Used in conjunction with ``query_key``, this will only consider terms which in their last ``buffer_time`` had at least ``min_doc_count`` records. Default 1. Percentage Match ~~~~~~~~~~~~~~~~ ``percentage_match``: This rule matches when the percentage of document in the match bucket within a calculation window is higher or lower than a threshold. By default the calculation window is ``buffer_time``. This rule requires: ``match_bucket_filter``: ES filter DSL. This defines a filter for the match bucket, which should match a subset of the documents returned by the main query filter. ``doc_type``: Specify the ``_type`` of document to search for. This rule also requires at least one of the two following options: ``min_percentage``: If the percentage of matching documents is less than this number, an alert will be triggered. ``max_percentage``: If the percentage of matching documents is greater than this number, an alert will be triggered. Optional: ``query_key``: Group percentage by this field. For each unique value of the ``query_key`` field, the percentage will be calculated and evaluated separately against the threshold(s). ``use_run_every_query_size``: See ``use_run_every_query_size`` in Metric Aggregation rule ``allow_buffer_time_overlap``: See ``allow_buffer_time_overlap`` in Metric Aggregation rule ``bucket_interval``: See ``bucket_interval`` in Metric Aggregation rule ``sync_bucket_interval``: See ``sync_bucket_interval`` in Metric Aggregation rule ``percentage_format_string``: An optional format string to apply to the percentage value in the alert match text. Must be a valid python format string. For example, "%.2f" will round it to 2 decimal places. See: https://docs.python.org/3.4/library/string.html#format-specification-mini-language ``min_denominator``: Minimum number of documents on which percentage calculation will apply. Default is 0. .. _alerts: Alerts ====== Each rule may have any number of alerts attached to it. Alerts are subclasses of ``Alerter`` and are passed a dictionary, or list of dictionaries, from ElastAlert which contain relevant information. They are configured in the rule configuration file similarly to rule types. To set the alerts for a rule, set the ``alert`` option to the name of the alert, or a list of the names of alerts: ``alert: email`` or .. code-block:: yaml alert: - email - jira Options for each alerter can either defined at the top level of the YAML file, or nested within the alert name, allowing for different settings for multiple of the same alerter. For example, consider sending multiple emails, but with different 'To' and 'From' fields: .. code-block:: yaml alert: - email from_addr: "no-reply@example.com" email: "customer@example.com" versus .. code-block:: yaml alert: - email: from_addr: "no-reply@example.com" email: "customer@example.com" - email: from_addr: "elastalert@example.com"" email: "devs@example.com" If multiple of the same alerter type are used, top level settings will be used as the default and inline settings will override those for each alerter. Alert Subject ~~~~~~~~~~~~~ E-mail subjects, JIRA issue summaries, PagerDuty alerts, or any alerter that has a "subject" can be customized by adding an ``alert_subject`` that contains a custom summary. It can be further formatted using standard Python formatting syntax:: alert_subject: "Issue {0} occurred at {1}" The arguments for the formatter will be fed from the matched objects related to the alert. The field names whose values will be used as the arguments can be passed with ``alert_subject_args``:: alert_subject_args: - issue.name - "@timestamp" It is mandatory to enclose the ``@timestamp`` field in quotes since in YAML format a token cannot begin with the ``@`` character. Not using the quotation marks will trigger a YAML parse error. In case the rule matches multiple objects in the index, only the first match is used to populate the arguments for the formatter. If the field(s) mentioned in the arguments list are missing, the email alert will have the text ``alert_missing_value`` in place of its expected value. This will also occur if ``use_count_query`` is set to true. Alert Content ~~~~~~~~~~~~~ There are several ways to format the body text of the various types of events. In EBNF:: rule_name = name alert_text = alert_text ruletype_text = Depends on type top_counts_header = top_count_key, ":" top_counts_value = Value, ": ", Count top_counts = top_counts_header, LF, top_counts_value field_values = Field, ": ", Value Similarly to ``alert_subject``, ``alert_text`` can be further formatted using standard Python formatting syntax. The field names whose values will be used as the arguments can be passed with ``alert_text_args`` or ``alert_text_kw``. You may also refer to any top-level rule property in the ``alert_subject_args``, ``alert_text_args``, ``alert_missing_value``, and ``alert_text_kw fields``. However, if the matched document has a key with the same name, that will take preference over the rule property. By default:: body = rule_name [alert_text] ruletype_text {top_counts} {field_values} With ``alert_text_type: alert_text_only``:: body = rule_name alert_text With ``alert_text_type: exclude_fields``:: body = rule_name [alert_text] ruletype_text {top_counts} With ``alert_text_type: aggregation_summary_only``:: body = rule_name aggregation_summary ruletype_text is the string returned by RuleType.get_match_str. field_values will contain every key value pair included in the results from Elasticsearch. These fields include "@timestamp" (or the value of ``timestamp_field``), every key in ``include``, every key in ``top_count_keys``, ``query_key``, and ``compare_key``. If the alert spans multiple events, these values may come from an individual event, usually the one which triggers the alert. When using ``alert_text_args``, you can access nested fields and index into arrays. For example, if your match was ``{"data": {"ips": ["127.0.0.1", "12.34.56.78"]}}``, then by using ``"data.ips[1]"`` in ``alert_text_args``, it would replace value with ``"12.34.56.78"``. This can go arbitrarily deep into fields and will still work on keys that contain dots themselves. Command ~~~~~~~ The command alert allows you to execute an arbitrary command and pass arguments or stdin from the match. Arguments to the command can use Python format string syntax to access parts of the match. The alerter will open a subprocess and optionally pass the match, or matches in the case of an aggregated alert, as a JSON array, to the stdin of the process. This alert requires one option: ``command``: A list of arguments to execute or a string to execute. If in list format, the first argument is the name of the program to execute. If passed a string, the command is executed through the shell. Strings can be formatted using the old-style format (``%``) or the new-style format (``.format()``). When the old-style format is used, fields are accessed using ``%(field_name)s``, or ``%(field.subfield)s``. When the new-style format is used, fields are accessed using ``{field_name}``. New-style formatting allows accessing nested fields (e.g., ``{field_1[subfield]}``). In an aggregated alert, these fields come from the first match. Optional: ``pipe_match_json``: If true, the match will be converted to JSON and passed to stdin of the command. Note that this will cause ElastAlert to block until the command exits or sends an EOF to stdout. ``pipe_alert_text``: If true, the standard alert body text will be passed to stdin of the command. Note that this will cause ElastAlert to block until the command exits or sends an EOF to stdout. It cannot be used at the same time as ``pipe_match_json``. Example usage using old-style format:: alert: - command command: ["/bin/send_alert", "--username", "%(username)s"] .. warning:: Executing commmands with untrusted data can make it vulnerable to shell injection! If you use formatted data in your command, it is highly recommended that you use a args list format instead of a shell string. Example usage using new-style format:: alert: - command command: ["/bin/send_alert", "--username", "{match[username]}"] Email ~~~~~ This alert will send an email. It connects to an smtp server located at ``smtp_host``, or localhost by default. If available, it will use STARTTLS. This alert requires one additional option: ``email``: An address or list of addresses to sent the alert to. Optional: ``email_from_field``: Use a field from the document that triggered the alert as the recipient. If the field cannot be found, the ``email`` value will be used as a default. Note that this field will not be available in every rule type, for example, if you have ``use_count_query`` or if it's ``type: flatline``. You can optionally add a domain suffix to the field to generate the address using ``email_add_domain``. It can be a single recipient or list of recipients. For example, with the following settings:: email_from_field: "data.user" email_add_domain: "@example.com" and a match ``{"@timestamp": "2017", "data": {"foo": "bar", "user": "qlo"}}`` an email would be sent to ``qlo@example.com`` ``smtp_host``: The SMTP host to use, defaults to localhost. ``smtp_port``: The port to use. Default is 25. ``smtp_ssl``: Connect the SMTP host using TLS, defaults to ``false``. If ``smtp_ssl`` is not used, ElastAlert will still attempt STARTTLS. ``smtp_auth_file``: The path to a file which contains SMTP authentication credentials. The path can be either absolute or relative to the given rule. It should be YAML formatted and contain two fields, ``user`` and ``password``. If this is not present, no authentication will be attempted. ``smtp_cert_file``: Connect the SMTP host using the given path to a TLS certificate file, default to ``None``. ``smtp_key_file``: Connect the SMTP host using the given path to a TLS key file, default to ``None``. ``email_reply_to``: This sets the Reply-To header in the email. By default, the from address is ElastAlert@ and the domain will be set by the smtp server. ``from_addr``: This sets the From header in the email. By default, the from address is ElastAlert@ and the domain will be set by the smtp server. ``cc``: This adds the CC emails to the list of recipients. By default, this is left empty. ``bcc``: This adds the BCC emails to the list of recipients but does not show up in the email message. By default, this is left empty. ``email_format``: If set to ``html``, the email's MIME type will be set to HTML, and HTML content should correctly render. If you use this, you need to put your own HTML into ``alert_text`` and use ``alert_text_type: alert_text_only``. Jira ~~~~ The JIRA alerter will open a ticket on jira whenever an alert is triggered. You must have a service account for ElastAlert to connect with. The credentials of the service account are loaded from a separate file. The ticket number will be written to the alert pipeline, and if it is followed by an email alerter, a link will be included in the email. This alert requires four additional options: ``jira_server``: The hostname of the JIRA server. ``jira_project``: The project to open the ticket under. ``jira_issuetype``: The type of issue that the ticket will be filed as. Note that this is case sensitive. ``jira_account_file``: The path to the file which contains JIRA account credentials. For an example JIRA account file, see ``example_rules/jira_acct.yaml``. The account file is also yaml formatted and must contain two fields: ``user``: The username. ``password``: The password. Optional: ``jira_component``: The name of the component or components to set the ticket to. This can be a single string or a list of strings. This is provided for backwards compatibility and will eventually be deprecated. It is preferable to use the plural ``jira_components`` instead. ``jira_components``: The name of the component or components to set the ticket to. This can be a single string or a list of strings. ``jira_description``: Similar to ``alert_text``, this text is prepended to the JIRA description. ``jira_label``: The label or labels to add to the JIRA ticket. This can be a single string or a list of strings. This is provided for backwards compatibility and will eventually be deprecated. It is preferable to use the plural ``jira_labels`` instead. ``jira_labels``: The label or labels to add to the JIRA ticket. This can be a single string or a list of strings. ``jira_priority``: The index of the priority to set the issue to. In the JIRA dropdown for priorities, 0 would represent the first priority, 1 the 2nd, etc. ``jira_watchers``: A list of user names to add as watchers on a JIRA ticket. This can be a single string or a list of strings. ``jira_bump_tickets``: If true, ElastAlert search for existing tickets newer than ``jira_max_age`` and comment on the ticket with information about the alert instead of opening another ticket. ElastAlert finds the existing ticket by searching by summary. If the summary has changed or contains special characters, it may fail to find the ticket. If you are using a custom ``alert_subject``, the two summaries must be exact matches, except by setting ``jira_ignore_in_title``, you can ignore the value of a field when searching. For example, if the custom subject is "foo occured at bar", and "foo" is the value field X in the match, you can set ``jira_ignore_in_title`` to "X" and it will only bump tickets with "bar" in the subject. Defaults to false. ``jira_ignore_in_title``: ElastAlert will attempt to remove the value for this field from the JIRA subject when searching for tickets to bump. See ``jira_bump_tickets`` description above for an example. ``jira_max_age``: If ``jira_bump_tickets`` is true, the maximum age of a ticket, in days, such that ElastAlert will comment on the ticket instead of opening a new one. Default is 30 days. ``jira_bump_not_in_statuses``: If ``jira_bump_tickets`` is true, a list of statuses the ticket must **not** be in for ElastAlert to comment on the ticket instead of opening a new one. For example, to prevent comments being added to resolved or closed tickets, set this to 'Resolved' and 'Closed'. This option should not be set if the ``jira_bump_in_statuses`` option is set. Example usage:: jira_bump_not_in_statuses: - Resolved - Closed ``jira_bump_in_statuses``: If ``jira_bump_tickets`` is true, a list of statuses the ticket *must be in* for ElastAlert to comment on the ticket instead of opening a new one. For example, to only comment on 'Open' tickets -- and thus not 'In Progress', 'Analyzing', 'Resolved', etc. tickets -- set this to 'Open'. This option should not be set if the ``jira_bump_not_in_statuses`` option is set. Example usage:: jira_bump_in_statuses: - Open ``jira_bump_only``: Only update if a ticket is found to bump. This skips ticket creation for rules where you only want to affect existing tickets. Example usage:: jira_bump_only: true ``jira_transition_to``: If ``jira_bump_tickets`` is true, Transition this ticket to the given Status when bumping. Must match the text of your JIRA implementation's Status field. Example usage:: jira_transition_to: 'Fixed' ``jira_bump_after_inactivity``: If this is set, ElastAlert will only comment on tickets that have been inactive for at least this many days. It only applies if ``jira_bump_tickets`` is true. Default is 0 days. Arbitrary Jira fields: ElastAlert supports setting any arbitrary JIRA field that your jira issue supports. For example, if you had a custom field, called "Affected User", you can set it by providing that field name in ``snake_case`` prefixed with ``jira_``. These fields can contain primitive strings or arrays of strings. Note that when you create a custom field in your JIRA server, internally, the field is represented as ``customfield_1111``. In elastalert, you may refer to either the public facing name OR the internal representation. In addition, if you would like to use a field in the alert as the value for a custom JIRA field, use the field name plus a # symbol in front. For example, if you wanted to set a custom JIRA field called "user" to the value of the field "username" from the match, you would use the following. Example:: jira_user: "#username" Example usage:: jira_arbitrary_singular_field: My Name jira_arbitrary_multivalue_field: - Name 1 - Name 2 jira_customfield_12345: My Custom Value jira_customfield_9999: - My Custom Value 1 - My Custom Value 2 OpsGenie ~~~~~~~~ OpsGenie alerter will create an alert which can be used to notify Operations people of issues or log information. An OpsGenie ``API`` integration must be created in order to acquire the necessary ``opsgenie_key`` rule variable. Currently the OpsGenieAlerter only creates an alert, however it could be extended to update or close existing alerts. It is necessary for the user to create an OpsGenie Rest HTTPS API `integration page `_ in order to create alerts. The OpsGenie alert requires one option: ``opsgenie_key``: The randomly generated API Integration key created by OpsGenie. Optional: ``opsgenie_account``: The OpsGenie account to integrate with. ``opsgenie_recipients``: A list OpsGenie recipients who will be notified by the alert. ``opsgenie_recipients_args``: Map of arguments used to format opsgenie_recipients. ``opsgenie_default_recipients``: List of default recipients to notify when the formatting of opsgenie_recipients is unsuccesful. ``opsgenie_teams``: A list of OpsGenie teams to notify (useful for schedules with escalation). ``opsgenie_teams_args``: Map of arguments used to format opsgenie_teams (useful for assigning the alerts to teams based on some data) ``opsgenie_default_teams``: List of default teams to notify when the formatting of opsgenie_teams is unsuccesful. ``opsgenie_tags``: A list of tags for this alert. ``opsgenie_message``: Set the OpsGenie message to something other than the rule name. The message can be formatted with fields from the first match e.g. "Error occurred for {app_name} at {timestamp}.". ``opsgenie_alias``: Set the OpsGenie alias. The alias can be formatted with fields from the first match e.g "{app_name} error". ``opsgenie_subject``: A string used to create the title of the OpsGenie alert. Can use Python string formatting. ``opsgenie_subject_args``: A list of fields to use to format ``opsgenie_subject`` if it contains formaters. ``opsgenie_priority``: Set the OpsGenie priority level. Possible values are P1, P2, P3, P4, P5. ``opsgenie_details``: Map of custom key/value pairs to include in the alert's details. The value can sourced from either fields in the first match, environment variables, or a constant value. Example usage:: opsgenie_details: Author: 'Bob Smith' # constant value Environment: '$VAR' # environment variable Message: { field: message } # field in the first match SNS ~~~ The SNS alerter will send an SNS notification. The body of the notification is formatted the same as with other alerters. The SNS alerter uses boto3 and can use credentials in the rule yaml, in a standard AWS credential and config files, or via environment variables. See http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html for details. SNS requires one option: ``sns_topic_arn``: The SNS topic's ARN. For example, ``arn:aws:sns:us-east-1:123456789:somesnstopic`` Optional: ``aws_access_key``: An access key to connect to SNS with. ``aws_secret_key``: The secret key associated with the access key. ``aws_region``: The AWS region in which the SNS resource is located. Default is us-east-1 ``profile``: The AWS profile to use. If none specified, the default will be used. HipChat ~~~~~~~ HipChat alerter will send a notification to a predefined HipChat room. The body of the notification is formatted the same as with other alerters. The alerter requires the following two options: ``hipchat_auth_token``: The randomly generated notification token created by HipChat. Go to https://XXXXX.hipchat.com/account/api and use 'Create new token' section, choosing 'Send notification' in Scopes list. ``hipchat_room_id``: The id associated with the HipChat room you want to send the alert to. Go to https://XXXXX.hipchat.com/rooms and choose the room you want to post to. The room ID will be the numeric part of the URL. ``hipchat_msg_color``: The color of the message background that is sent to HipChat. May be set to green, yellow or red. Default is red. ``hipchat_domain``: The custom domain in case you have HipChat own server deployment. Default is api.hipchat.com. ``hipchat_ignore_ssl_errors``: Ignore TLS errors (self-signed certificates, etc.). Default is false. ``hipchat_proxy``: By default ElastAlert will not use a network proxy to send notifications to HipChat. Set this option using ``hostname:port`` if you need to use a proxy. ``hipchat_notify``: When set to true, triggers a hipchat bell as if it were a user. Default is true. ``hipchat_from``: When humans report to hipchat, a timestamp appears next to their name. For bots, the name is the name of the token. The from, instead of a timestamp, defaults to empty unless set, which you can do here. This is optional. ``hipchat_message_format``: Determines how the message is treated by HipChat and rendered inside HipChat applications html - Message is rendered as HTML and receives no special treatment. Must be valid HTML and entities must be escaped (e.g.: '&' instead of '&'). May contain basic tags: a, b, i, strong, em, br, img, pre, code, lists, tables. text - Message is treated just like a message sent by a user. Can include @mentions, emoticons, pastes, and auto-detected URLs (Twitter, YouTube, images, etc). Valid values: html, text. Defaults to 'html'. ``hipchat_mentions``: When using a ``html`` message format, it's not possible to mentions specific users using the ``@user`` syntax. In that case, you can set ``hipchat_mentions`` to a list of users which will be first mentioned using a single text message, then the normal ElastAlert message will be sent to Hipchat. If set, it will mention the users, no matter if the original message format is set to HTML or text. Valid values: list of strings. Defaults to ``[]``. Stride ~~~~~~~ Stride alerter will send a notification to a predefined Stride room. The body of the notification is formatted the same as with other alerters. Simple HTML such as and tags will be parsed into a format that Stride can consume. The alerter requires the following two options: ``stride_access_token``: The randomly generated notification token created by Stride. ``stride_cloud_id``: The site_id associated with the Stride site you want to send the alert to. ``stride_conversation_id``: The conversation_id associated with the Stride conversation you want to send the alert to. ``stride_ignore_ssl_errors``: Ignore TLS errors (self-signed certificates, etc.). Default is false. ``stride_proxy``: By default ElastAlert will not use a network proxy to send notifications to Stride. Set this option using ``hostname:port`` if you need to use a proxy. MS Teams ~~~~~~~~ MS Teams alerter will send a notification to a predefined Microsoft Teams channel. The alerter requires the following options: ``ms_teams_webhook_url``: The webhook URL that includes your auth data and the ID of the channel you want to post to. Go to the Connectors menu in your channel and configure an Incoming Webhook, then copy the resulting URL. You can use a list of URLs to send to multiple channels. ``ms_teams_alert_summary``: Summary should be configured according to `MS documentation `_, although it seems not displayed by Teams currently. Optional: ``ms_teams_theme_color``: By default the alert will be posted without any color line. To add color, set this attribute to a HTML color value e.g. ``#ff0000`` for red. ``ms_teams_proxy``: By default ElastAlert will not use a network proxy to send notifications to MS Teams. Set this option using ``hostname:port`` if you need to use a proxy. ``ms_teams_alert_fixed_width``: By default this is ``False`` and the notification will be sent to MS Teams as-is. Teams supports a partial Markdown implementation, which means asterisk, underscore and other characters may be interpreted as Markdown. Currenlty, Teams does not fully implement code blocks. Setting this attribute to ``True`` will enable line by line code blocks. It is recommended to enable this to get clearer notifications in Teams. Slack ~~~~~ Slack alerter will send a notification to a predefined Slack channel. The body of the notification is formatted the same as with other alerters. The alerter requires the following option: ``slack_webhook_url``: The webhook URL that includes your auth data and the ID of the channel (room) you want to post to. Go to the Incoming Webhooks section in your Slack account https://XXXXX.slack.com/services/new/incoming-webhook , choose the channel, click 'Add Incoming Webhooks Integration' and copy the resulting URL. You can use a list of URLs to send to multiple channels. Optional: ``slack_username_override``: By default Slack will use your username when posting to the channel. Use this option to change it (free text). ``slack_channel_override``: Incoming webhooks have a default channel, but it can be overridden. A public channel can be specified "#other-channel", and a Direct Message with "@username". ``slack_emoji_override``: By default ElastAlert will use the :ghost: emoji when posting to the channel. You can use a different emoji per ElastAlert rule. Any Apple emoji can be used, see http://emojipedia.org/apple/ . If slack_icon_url_override parameter is provided, emoji is ignored. ``slack_icon_url_override``: By default ElastAlert will use the :ghost: emoji when posting to the channel. You can provide icon_url to use custom image. Provide absolute address of the pciture, for example: http://some.address.com/image.jpg . ``slack_msg_color``: By default the alert will be posted with the 'danger' color. You can also use 'good' or 'warning' colors. ``slack_proxy``: By default ElastAlert will not use a network proxy to send notifications to Slack. Set this option using ``hostname:port`` if you need to use a proxy. ``slack_alert_fields``: You can add additional fields to your slack alerts using this field. Specify the title using `title` and a value for the field using `value`. Additionally you can specify whether or not this field should be a `short` field using `short: true`. ``slack_title``: Sets a title for the message, this shows up as a blue text at the start of the message ``slack_title_link``: You can add a link in your Slack notification by setting this to a valid URL. Requires slack_title to be set. ``slack_timeout``: You can specify a timeout value, in seconds, for making communicating with Slac. The default is 10. If a timeout occurs, the alert will be retried next time elastalert cycles. ``slack_attach_kibana_discover_url``: Enables the attachment of the ``kibana_discover_url`` to the slack notification. The config ``generate_kibana_discover_url`` must also be ``True`` in order to generate the url. Defaults to ``False``. ``slack_kibana_discover_color``: The color of the Kibana Discover url attachment. Defaults to ``#ec4b98``. ``slack_kibana_discover_title``: The title of the Kibana Discover url attachment. Defaults to ``Discover in Kibana``. Mattermost ~~~~~~~~~~ Mattermost alerter will send a notification to a predefined Mattermost channel. The body of the notification is formatted the same as with other alerters. The alerter requires the following option: ``mattermost_webhook_url``: The webhook URL. Follow the instructions on https://docs.mattermost.com/developer/webhooks-incoming.html to create an incoming webhook on your Mattermost installation. Optional: ``mattermost_proxy``: By default ElastAlert will not use a network proxy to send notifications to Mattermost. Set this option using ``hostname:port`` if you need to use a proxy. ``mattermost_ignore_ssl_errors``: By default ElastAlert will verify SSL certificate. Set this option to ``False`` if you want to ignore SSL errors. ``mattermost_username_override``: By default Mattermost will use your username when posting to the channel. Use this option to change it (free text). ``mattermost_channel_override``: Incoming webhooks have a default channel, but it can be overridden. A public channel can be specified "#other-channel", and a Direct Message with "@username". ``mattermost_icon_url_override``: By default ElastAlert will use the default webhook icon when posting to the channel. You can provide icon_url to use custom image. Provide absolute address of the picture (for example: http://some.address.com/image.jpg) or Base64 data url. ``mattermost_msg_pretext``: You can set the message attachment pretext using this option. ``mattermost_msg_color``: By default the alert will be posted with the 'danger' color. You can also use 'good', 'warning', or hex color code. ``mattermost_msg_fields``: You can add fields to your Mattermost alerts using this option. You can specify the title using `title` and the text value using `value`. Additionally you can specify whether this field should be a `short` field using `short: true`. If you set `args` and `value` is a formattable string, ElastAlert will format the incident key based on the provided array of fields from the rule or match. See https://docs.mattermost.com/developer/message-attachments.html#fields for more information. Telegram ~~~~~~~~ Telegram alerter will send a notification to a predefined Telegram username or channel. The body of the notification is formatted the same as with other alerters. The alerter requires the following two options: ``telegram_bot_token``: The token is a string along the lines of ``110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsaw`` that will be required to authorize the bot and send requests to the Bot API. You can learn about obtaining tokens and generating new ones in this document https://core.telegram.org/bots#botfather ``telegram_room_id``: Unique identifier for the target chat or username of the target channel using telegram chat_id (in the format "-xxxxxxxx") Optional: ``telegram_api_url``: Custom domain to call Telegram Bot API. Default to api.telegram.org ``telegram_proxy``: By default ElastAlert will not use a network proxy to send notifications to Telegram. Set this option using ``hostname:port`` if you need to use a proxy. GoogleChat ~~~~~~~~~~ GoogleChat alerter will send a notification to a predefined GoogleChat channel. The body of the notification is formatted the same as with other alerters. The alerter requires the following options: ``googlechat_webhook_url``: The webhook URL that includes the channel (room) you want to post to. Go to the Google Chat website https://chat.google.com and choose the channel in which you wish to receive the notifications. Select 'Configure Webhooks' to create a new webhook or to copy the URL from an existing one. You can use a list of URLs to send to multiple channels. Optional: ``googlechat_format``: Formatting for the notification. Can be either 'card' or 'basic' (default). ``googlechat_header_title``: Sets the text for the card header title. (Only used if format=card) ``googlechat_header_subtitle``: Sets the text for the card header subtitle. (Only used if format=card) ``googlechat_header_image``: URL for the card header icon. (Only used if format=card) ``googlechat_footer_kibanalink``: URL to Kibana to include in the card footer. (Only used if format=card) PagerDuty ~~~~~~~~~ PagerDuty alerter will trigger an incident to a predefined PagerDuty service. The body of the notification is formatted the same as with other alerters. The alerter requires the following option: ``pagerduty_service_key``: Integration Key generated after creating a service with the 'Use our API directly' option at Integration Settings ``pagerduty_client_name``: The name of the monitoring client that is triggering this event. ``pagerduty_event_type``: Any of the following: `trigger`, `resolve`, or `acknowledge`. (Optional, defaults to `trigger`) Optional: ``alert_subject``: If set, this will be used as the Incident description within PagerDuty. If not set, ElastAlert will default to using the rule name of the alert for the incident. ``alert_subject_args``: If set, and ``alert_subject`` is a formattable string, ElastAlert will format the incident key based on the provided array of fields from the rule or match. ``pagerduty_incident_key``: If not set PagerDuty will trigger a new incident for each alert sent. If set to a unique string per rule PagerDuty will identify the incident that this event should be applied. If there's no open (i.e. unresolved) incident with this key, a new one will be created. If there's already an open incident with a matching key, this event will be appended to that incident's log. ``pagerduty_incident_key_args``: If set, and ``pagerduty_incident_key`` is a formattable string, Elastalert will format the incident key based on the provided array of fields from the rule or match. ``pagerduty_proxy``: By default ElastAlert will not use a network proxy to send notifications to PagerDuty. Set this option using ``hostname:port`` if you need to use a proxy. V2 API Options (Optional): These options are specific to the PagerDuty V2 API See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 ``pagerduty_api_version``: Defaults to `v1`. Set to `v2` to enable the PagerDuty V2 Event API. ``pagerduty_v2_payload_class``: Sets the class of the payload. (the event type in PagerDuty) ``pagerduty_v2_payload_class_args``: If set, and ``pagerduty_v2_payload_class`` is a formattable string, Elastalert will format the class based on the provided array of fields from the rule or match. ``pagerduty_v2_payload_component``: Sets the component of the payload. (what program/interface/etc the event came from) ``pagerduty_v2_payload_component_args``: If set, and ``pagerduty_v2_payload_component`` is a formattable string, Elastalert will format the component based on the provided array of fields from the rule or match. ``pagerduty_v2_payload_group``: Sets the logical grouping (e.g. app-stack) ``pagerduty_v2_payload_group_args``: If set, and ``pagerduty_v2_payload_group`` is a formattable string, Elastalert will format the group based on the provided array of fields from the rule or match. ``pagerduty_v2_payload_severity``: Sets the severity of the page. (defaults to `critical`, valid options: `critical`, `error`, `warning`, `info`) ``pagerduty_v2_payload_source``: Sets the source of the event, preferably the hostname or fqdn. ``pagerduty_v2_payload_source_args``: If set, and ``pagerduty_v2_payload_source`` is a formattable string, Elastalert will format the source based on the provided array of fields from the rule or match. PagerTree ~~~~~~~~~ PagerTree alerter will trigger an incident to a predefined PagerTree integration url. The alerter requires the following options: ``pagertree_integration_url``: URL generated by PagerTree for the integration. Exotel ~~~~~~ Developers in India can use Exotel alerter, it will trigger an incident to a mobile phone as sms from your exophone. Alert name along with the message body will be sent as an sms. The alerter requires the following option: ``exotel_accout_sid``: This is sid of your Exotel account. ``exotel_auth_token``: Auth token assosiated with your Exotel account. If you don't know how to find your accound sid and auth token, refer - http://support.exotel.in/support/solutions/articles/3000023019-how-to-find-my-exotel-token-and-exotel-sid- ``exotel_to_number``: The phone number where you would like send the notification. ``exotel_from_number``: Your exophone number from which message will be sent. The alerter has one optional argument: ``exotel_message_body``: Message you want to send in the sms, is you don't specify this argument only the rule name is sent Twilio ~~~~~~ Twilio alerter will trigger an incident to a mobile phone as sms from your twilio phone number. Alert name will arrive as sms once this option is chosen. The alerter requires the following option: ``twilio_account_sid``: This is sid of your twilio account. ``twilio_auth_token``: Auth token assosiated with your twilio account. ``twilio_to_number``: The phone number where you would like send the notification. ``twilio_from_number``: Your twilio phone number from which message will be sent. VictorOps ~~~~~~~~~ VictorOps alerter will trigger an incident to a predefined VictorOps routing key. The body of the notification is formatted the same as with other alerters. The alerter requires the following options: ``victorops_api_key``: API key generated under the 'REST Endpoint' in the Integrations settings. ``victorops_routing_key``: VictorOps routing key to route the alert to. ``victorops_message_type``: VictorOps field to specify severity level. Must be one of the following: INFO, WARNING, ACKNOWLEDGEMENT, CRITICAL, RECOVERY Optional: ``victorops_entity_id``: The identity of the incident used by VictorOps to correlate incidents throughout the alert lifecycle. If not defined, VictorOps will assign a random string to each alert. ``victorops_entity_display_name``: Human-readable name of alerting entity to summarize incidents without affecting the life-cycle workflow. ``victorops_proxy``: By default ElastAlert will not use a network proxy to send notifications to VictorOps. Set this option using ``hostname:port`` if you need to use a proxy. Gitter ~~~~~~ Gitter alerter will send a notification to a predefined Gitter channel. The body of the notification is formatted the same as with other alerters. The alerter requires the following option: ``gitter_webhook_url``: The webhook URL that includes your auth data and the ID of the channel (room) you want to post to. Go to the Integration Settings of the channel https://gitter.im/ORGA/CHANNEL#integrations , click 'CUSTOM' and copy the resulting URL. Optional: ``gitter_msg_level``: By default the alert will be posted with the 'error' level. You can use 'info' if you want the messages to be black instead of red. ``gitter_proxy``: By default ElastAlert will not use a network proxy to send notifications to Gitter. Set this option using ``hostname:port`` if you need to use a proxy. ServiceNow ~~~~~~~~~~ The ServiceNow alerter will create a ne Incident in ServiceNow. The body of the notification is formatted the same as with other alerters. The alerter requires the following options: ``servicenow_rest_url``: The ServiceNow RestApi url, this will look like https://instancename.service-now.com/api/now/v1/table/incident ``username``: The ServiceNow Username to access the api. ``password``: The ServiceNow password to access the api. ``short_description``: The ServiceNow password to access the api. ``comments``: Comments to be attached to the incident, this is the equivilant of work notes. ``assignment_group``: The group to assign the incident to. ``category``: The category to attach the incident to, use an existing category. ``subcategory``: The subcategory to attach the incident to, use an existing subcategory. ``cmdb_ci``: The configuration item to attach the incident to. ``caller_id``: The caller id (email address) of the user that created the incident (elastalert@somewhere.com). Optional: ``servicenow_proxy``: By default ElastAlert will not use a network proxy to send notifications to ServiceNow. Set this option using ``hostname:port`` if you need to use a proxy. Debug ~~~~~ The debug alerter will log the alert information using the Python logger at the info level. It is logged into a Python Logger object with the name ``elastalert`` that can be easily accessed using the ``getLogger`` command. Stomp ~~~~~ This alert type will use the STOMP protocol in order to push a message to a broker like ActiveMQ or RabbitMQ. The message body is a JSON string containing the alert details. The default values will work with a pristine ActiveMQ installation. Optional: ``stomp_hostname``: The STOMP host to use, defaults to localhost. ``stomp_hostport``: The STOMP port to use, defaults to 61613. ``stomp_login``: The STOMP login to use, defaults to admin. ``stomp_password``: The STOMP password to use, defaults to admin. ``stomp_destination``: The STOMP destination to use, defaults to /queue/ALERT The stomp_destination field depends on the broker, the /queue/ALERT example is the nomenclature used by ActiveMQ. Each broker has its own logic. Alerta ~~~~~~ Alerta alerter will post an alert in the Alerta server instance through the alert API endpoint. See http://alerta.readthedocs.io/en/latest/api/alert.html for more details on the Alerta JSON format. For Alerta 5.0 Required: ``alerta_api_url``: API server URL. Optional: ``alerta_api_key``: This is the api key for alerta server, sent in an ``Authorization`` HTTP header. If not defined, no Authorization header is sent. ``alerta_use_qk_as_resource``: If true and query_key is present, this will override ``alerta_resource`` field with the ``query_key value`` (Can be useful if ``query_key`` is a hostname). ``alerta_use_match_timestamp``: If true, it will use the timestamp of the first match as the ``createTime`` of the alert. otherwise, the current server time is used. ``alert_missing_value``: Text to replace any match field not found when formating strings. Defaults to ````. The following options dictate the values of the API JSON payload: ``alerta_severity``: Defaults to "warning". ``alerta_timeout``: Defaults 84600 (1 Day). ``alerta_type``: Defaults to "elastalert". The following options use Python-like string syntax ``{}`` or ``%()s`` to access parts of the match, similar to the CommandAlerter. Ie: "Alert for {clientip}". If the referenced key is not found in the match, it is replaced by the text indicated by the option ``alert_missing_value``. ``alerta_resource``: Defaults to "elastalert". ``alerta_service``: Defaults to "elastalert". ``alerta_origin``: Defaults to "elastalert". ``alerta_environment``: Defaults to "Production". ``alerta_group``: Defaults to "". ``alerta_correlate``: Defaults to an empty list. ``alerta_tags``: Defaults to an empty list. ``alerta_event``: Defaults to the rule's name. ``alerta_text``: Defaults to the rule's text according to its type. ``alerta_value``: Defaults to "". The ``attributes`` dictionary is built by joining the lists from ``alerta_attributes_keys`` and ``alerta_attributes_values``, considered in order. Example usage using old-style format:: alert: - alerta alerta_api_url: "http://youralertahost/api/alert" alerta_attributes_keys: ["hostname", "TimestampEvent", "senderIP" ] alerta_attributes_values: ["%(key)s", "%(logdate)s", "%(sender_ip)s" ] alerta_correlate: ["ProbeUP","ProbeDOWN"] alerta_event: "ProbeUP" alerta_text: "Probe %(hostname)s is UP at %(logdate)s GMT" alerta_value: "UP" Example usage using new-style format:: alert: - alerta alerta_attributes_values: ["{key}", "{logdate}", "{sender_ip}" ] alerta_text: "Probe {hostname} is UP at {logdate} GMT" HTTP POST ~~~~~~~~~ This alert type will send results to a JSON endpoint using HTTP POST. The key names are configurable so this is compatible with almost any endpoint. By default, the JSON will contain all the items from the match, unless you specify http_post_payload, in which case it will only contain those items. Required: ``http_post_url``: The URL to POST. Optional: ``http_post_payload``: List of keys:values to use as the content of the POST. Example - ip:clientip will map the value from the clientip index of Elasticsearch to JSON key named ip. If not defined, all the Elasticsearch keys will be sent. ``http_post_static_payload``: Key:value pairs of static parameters to be sent, along with the Elasticsearch results. Put your authentication or other information here. ``http_post_headers``: Key:value pairs of headers to be sent as part of the request. ``http_post_proxy``: URL of proxy, if required. ``http_post_all_values``: Boolean of whether or not to include every key value pair from the match in addition to those in http_post_payload and http_post_static_payload. Defaults to True if http_post_payload is not specified, otherwise False. ``http_post_timeout``: The timeout value, in seconds, for making the post. The default is 10. If a timeout occurs, the alert will be retried next time elastalert cycles. Example usage:: alert: post http_post_url: "http://example.com/api" http_post_payload: ip: clientip http_post_static_payload: apikey: abc123 http_post_headers: authorization: Basic 123dr3234 Alerter ~~~~~~~ For all Alerter subclasses, you may reference values from a top-level rule property in your Alerter fields by referring to the property name surrounded by dollar signs. This can be useful when you have rule-level properties that you would like to reference many times in your alert. For example: Example usage:: jira_priority: $priority$ jira_alert_owner: $owner$ Line Notify ~~~~~~~~~~~ Line Notify will send notification to a Line application. The body of the notification is formatted the same as with other alerters. Required: ``linenotify_access_token``: The access token that you got from https://notify-bot.line.me/my/ theHive ~~~~~~~ theHive alert type will send JSON request to theHive (Security Incident Response Platform) with TheHive4py API. Sent request will be stored like Hive Alert with description and observables. Required: ``hive_connection``: The connection details as key:values. Required keys are ``hive_host``, ``hive_port`` and ``hive_apikey``. ``hive_alert_config``: Configuration options for the alert. Optional: ``hive_proxies``: Proxy configuration. ``hive_observable_data_mapping``: If needed, matched data fields can be mapped to TheHive observable types using python string formatting. Example usage:: alert: hivealerter hive_connection: hive_host: http://localhost hive_port: hive_apikey: hive_proxies: http: '' https: '' hive_alert_config: title: 'Title' ## This will default to {rule[index]_rule[name]} if not provided type: 'external' source: 'elastalert' description: '{match[field1]} {rule[name]} Sample description' severity: 2 tags: ['tag1', 'tag2 {rule[name]}'] tlp: 3 status: 'New' follow: True hive_observable_data_mapping: - domain: "{match[field1]}_{rule[name]}" - domain: "{match[field]}" - ip: "{match[ip_field]}" Zabbix ~~~~~~~~~~~ Zabbix will send notification to a Zabbix server. The item in the host specified receive a 1 value for each hit. For example, if the elastic query produce 3 hits in the last execution of elastalert, three '1' (integer) values will be send from elastalert to Zabbix Server. If the query have 0 hits, any value will be sent. Required: ``zbx_sender_host``: The address where zabbix server is running. ``zbx_sender_port``: The port where zabbix server is listenning. ``zbx_host``: This field setup the host in zabbix that receives the value sent by Elastalert. ``zbx_item``: This field setup the item in the host that receives the value sent by Elastalert. elastalert-0.2.4/docs/source/running_elastalert.rst000066400000000000000000000250211364615736500225570ustar00rootroot00000000000000.. _tutorial: Running ElastAlert for the First Time ===================================== Requirements ------------ - Elasticsearch - ISO8601 or Unix timestamped data - Python 3.6 - pip, see requirements.txt - Packages on Ubuntu 14.x: python-pip python-dev libffi-dev libssl-dev Downloading and Configuring --------------------------- You can either install the latest released version of ElastAlert using pip:: $ pip install elastalert or you can clone the ElastAlert repository for the most recent changes:: $ git clone https://github.com/Yelp/elastalert.git Install the module:: $ pip install "setuptools>=11.3" $ python setup.py install Depending on the version of Elasticsearch, you may need to manually install the correct version of elasticsearch-py. Elasticsearch 5.0+:: $ pip install "elasticsearch>=5.0.0" Elasticsearch 2.X:: $ pip install "elasticsearch<3.0.0" Next, open up config.yaml.example. In it, you will find several configuration options. ElastAlert may be run without changing any of these settings. ``rules_folder`` is where ElastAlert will load rule configuration files from. It will attempt to load every .yaml file in the folder. Without any valid rules, ElastAlert will not start. ElastAlert will also load new rules, stop running missing rules, and restart modified rules as the files in this folder change. For this tutorial, we will use the example_rules folder. ``run_every`` is how often ElastAlert will query Elasticsearch. ``buffer_time`` is the size of the query window, stretching backwards from the time each query is run. This value is ignored for rules where ``use_count_query`` or ``use_terms_query`` is set to true. ``es_host`` is the address of an Elasticsearch cluster where ElastAlert will store data about its state, queries run, alerts, and errors. Each rule may also use a different Elasticsearch host to query against. ``es_port`` is the port corresponding to ``es_host``. ``use_ssl``: Optional; whether or not to connect to ``es_host`` using TLS; set to ``True`` or ``False``. ``verify_certs``: Optional; whether or not to verify TLS certificates; set to ``True`` or ``False``. The default is ``True`` ``client_cert``: Optional; path to a PEM certificate to use as the client certificate ``client_key``: Optional; path to a private key file to use as the client key ``ca_certs``: Optional; path to a CA cert bundle to use to verify SSL connections ``es_username``: Optional; basic-auth username for connecting to ``es_host``. ``es_password``: Optional; basic-auth password for connecting to ``es_host``. ``es_url_prefix``: Optional; URL prefix for the Elasticsearch endpoint. ``es_send_get_body_as``: Optional; Method for querying Elasticsearch - ``GET``, ``POST`` or ``source``. The default is ``GET`` ``writeback_index`` is the name of the index in which ElastAlert will store data. We will create this index later. ``alert_time_limit`` is the retry window for failed alerts. Save the file as ``config.yaml`` Setting Up Elasticsearch ------------------------ ElastAlert saves information and metadata about its queries and its alerts back to Elasticsearch. This is useful for auditing, debugging, and it allows ElastAlert to restart and resume exactly where it left off. This is not required for ElastAlert to run, but highly recommended. First, we need to create an index for ElastAlert to write to by running ``elastalert-create-index`` and following the instructions:: $ elastalert-create-index New index name (Default elastalert_status) Name of existing index to copy (Default None) New index elastalert_status created Done! For information about what data will go here, see :ref:`ElastAlert Metadata Index `. Creating a Rule --------------- Each rule defines a query to perform, parameters on what triggers a match, and a list of alerts to fire for each match. We are going to use ``example_rules/example_frequency.yaml`` as a template:: # From example_rules/example_frequency.yaml es_host: elasticsearch.example.com es_port: 14900 name: Example rule type: frequency index: logstash-* num_events: 50 timeframe: hours: 4 filter: - term: some_field: "some_value" alert: - "email" email: - "elastalert@example.com" ``es_host`` and ``es_port`` should point to the Elasticsearch cluster we want to query. ``name`` is the unique name for this rule. ElastAlert will not start if two rules share the same name. ``type``: Each rule has a different type which may take different parameters. The ``frequency`` type means "Alert when more than ``num_events`` occur within ``timeframe``." For information other types, see :ref:`Rule types `. ``index``: The name of the index(es) to query. If you are using Logstash, by default the indexes will match ``"logstash-*"``. ``num_events``: This parameter is specific to ``frequency`` type and is the threshold for when an alert is triggered. ``timeframe`` is the time period in which ``num_events`` must occur. ``filter`` is a list of Elasticsearch filters that are used to filter results. Here we have a single term filter for documents with ``some_field`` matching ``some_value``. See :ref:`Writing Filters For Rules ` for more information. If no filters are desired, it should be specified as an empty list: ``filter: []`` ``alert`` is a list of alerts to run on each match. For more information on alert types, see :ref:`Alerts `. The email alert requires an SMTP server for sending mail. By default, it will attempt to use localhost. This can be changed with the ``smtp_host`` option. ``email`` is a list of addresses to which alerts will be sent. There are many other optional configuration options, see :ref:`Common configuration options `. All documents must have a timestamp field. ElastAlert will try to use ``@timestamp`` by default, but this can be changed with the ``timestamp_field`` option. By default, ElastAlert uses ISO8601 timestamps, though unix timestamps are supported by setting ``timestamp_type``. As is, this rule means "Send an email to elastalert@example.com when there are more than 50 documents with ``some_field == some_value`` within a 4 hour period." Testing Your Rule ----------------- Running the ``elastalert-test-rule`` tool will test that your config file successfully loads and run it in debug mode over the last 24 hours:: $ elastalert-test-rule example_rules/example_frequency.yaml If you want to specify a configuration file to use, you can run it with the config flag:: $ elastalert-test-rule --config example_rules/example_frequency.yaml The configuration preferences will be loaded as follows: 1. Configurations specified in the yaml file. 2. Configurations specified in the config file, if specified. 3. Default configurations, for the tool to run. See :ref:`the testing section for more details ` Running ElastAlert ------------------ There are two ways of invoking ElastAlert. As a daemon, through Supervisor (http://supervisord.org/), or directly with Python. For easier debugging purposes in this tutorial, we will invoke it directly:: $ python -m elastalert.elastalert --verbose --rule example_frequency.yaml # or use the entry point: elastalert --verbose --rule ... No handlers could be found for logger "Elasticsearch" INFO:root:Queried rule Example rule from 1-15 14:22 PST to 1-15 15:07 PST: 5 hits INFO:Elasticsearch:POST http://elasticsearch.example.com:14900/elastalert_status/elastalert_status?op_type=create [status:201 request:0.025s] INFO:root:Ran Example rule from 1-15 14:22 PST to 1-15 15:07 PST: 5 query hits (0 already seen), 0 matches, 0 alerts sent INFO:root:Sleeping for 297 seconds ElastAlert uses the python logging system and ``--verbose`` sets it to display INFO level messages. ``--rule example_frequency.yaml`` specifies the rule to run, otherwise ElastAlert will attempt to load the other rules in the example_rules folder. Let's break down the response to see what's happening. ``Queried rule Example rule from 1-15 14:22 PST to 1-15 15:07 PST: 5 hits`` ElastAlert periodically queries the most recent ``buffer_time`` (default 45 minutes) for data matching the filters. Here we see that it matched 5 hits. ``POST http://elasticsearch.example.com:14900/elastalert_status/elastalert_status?op_type=create [status:201 request:0.025s]`` This line showing that ElastAlert uploaded a document to the elastalert_status index with information about the query it just made. ``Ran Example rule from 1-15 14:22 PST to 1-15 15:07 PST: 5 query hits (0 already seen), 0 matches, 0 alerts sent`` The line means ElastAlert has finished processing the rule. For large time periods, sometimes multiple queries may be run, but their data will be processed together. ``query hits`` is the number of documents that are downloaded from Elasticsearch, ``already seen`` refers to documents that were already counted in a previous overlapping query and will be ignored, ``matches`` is the number of matches the rule type outputted, and ``alerts sent`` is the number of alerts actually sent. This may differ from ``matches`` because of options like ``realert`` and ``aggregation`` or because of an error. ``Sleeping for 297 seconds`` The default ``run_every`` is 5 minutes, meaning ElastAlert will sleep until 5 minutes have elapsed from the last cycle before running queries for each rule again with time ranges shifted forward 5 minutes. Say, over the next 297 seconds, 46 more matching documents were added to Elasticsearch:: INFO:root:Queried rule Example rule from 1-15 14:27 PST to 1-15 15:12 PST: 51 hits ... INFO:root:Sent email to ['elastalert@example.com'] ... INFO:root:Ran Example rule from 1-15 14:27 PST to 1-15 15:12 PST: 51 query hits, 1 matches, 1 alerts sent The body of the email will contain something like:: Example rule At least 50 events occurred between 1-15 11:12 PST and 1-15 15:12 PST @timestamp: 2015-01-15T15:12:00-08:00 If an error occurred, such as an unreachable SMTP server, you may see: ``ERROR:root:Error while running alert email: Error connecting to SMTP host: [Errno 61] Connection refused`` Note that if you stop ElastAlert and then run it again later, it will look up ``elastalert_status`` and begin querying at the end time of the last query. This is to prevent duplication or skipping of alerts if ElastAlert is restarted. By using the ``--debug`` flag instead of ``--verbose``, the body of email will instead be logged and the email will not be sent. In addition, the queries will not be saved to ``elastalert_status``. elastalert-0.2.4/elastalert/000077500000000000000000000000001364615736500160355ustar00rootroot00000000000000elastalert-0.2.4/elastalert/__init__.py000066400000000000000000000270171364615736500201550ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import time from elasticsearch import Elasticsearch from elasticsearch import RequestsHttpConnection from elasticsearch.client import _make_path from elasticsearch.client import query_params from elasticsearch.exceptions import TransportError class ElasticSearchClient(Elasticsearch): """ Extension of low level :class:`Elasticsearch` client with additional version resolving features """ def __init__(self, conf): """ :arg conf: es_conn_config dictionary. Ref. :func:`~util.build_es_conn_config` """ super(ElasticSearchClient, self).__init__(host=conf['es_host'], port=conf['es_port'], url_prefix=conf['es_url_prefix'], use_ssl=conf['use_ssl'], verify_certs=conf['verify_certs'], ca_certs=conf['ca_certs'], connection_class=RequestsHttpConnection, http_auth=conf['http_auth'], timeout=conf['es_conn_timeout'], send_get_body_as=conf['send_get_body_as'], client_cert=conf['client_cert'], client_key=conf['client_key']) self._conf = copy.copy(conf) self._es_version = None @property def conf(self): """ Returns the provided es_conn_config used when initializing the class instance. """ return self._conf @property def es_version(self): """ Returns the reported version from the Elasticsearch server. """ if self._es_version is None: for retry in range(3): try: self._es_version = self.info()['version']['number'] break except TransportError: if retry == 2: raise time.sleep(3) return self._es_version def is_atleastfive(self): """ Returns True when the Elasticsearch server version >= 5 """ return int(self.es_version.split(".")[0]) >= 5 def is_atleastsix(self): """ Returns True when the Elasticsearch server version >= 6 """ return int(self.es_version.split(".")[0]) >= 6 def is_atleastsixtwo(self): """ Returns True when the Elasticsearch server version >= 6.2 """ major, minor = list(map(int, self.es_version.split(".")[:2])) return major > 6 or (major == 6 and minor >= 2) def is_atleastsixsix(self): """ Returns True when the Elasticsearch server version >= 6.6 """ major, minor = list(map(int, self.es_version.split(".")[:2])) return major > 6 or (major == 6 and minor >= 6) def is_atleastseven(self): """ Returns True when the Elasticsearch server version >= 7 """ return int(self.es_version.split(".")[0]) >= 7 def resolve_writeback_index(self, writeback_index, doc_type): """ In ES6, you cannot have multiple _types per index, therefore we use self.writeback_index as the prefix for the actual index name, based on doc_type. """ if not self.is_atleastsix(): return writeback_index elif doc_type == 'silence': return writeback_index + '_silence' elif doc_type == 'past_elastalert': return writeback_index + '_past' elif doc_type == 'elastalert_status': return writeback_index + '_status' elif doc_type == 'elastalert_error': return writeback_index + '_error' return writeback_index @query_params( "_source", "_source_exclude", "_source_excludes", "_source_include", "_source_includes", "allow_no_indices", "allow_partial_search_results", "analyze_wildcard", "analyzer", "batched_reduce_size", "default_operator", "df", "docvalue_fields", "expand_wildcards", "explain", "from_", "ignore_unavailable", "lenient", "max_concurrent_shard_requests", "pre_filter_shard_size", "preference", "q", "rest_total_hits_as_int", "request_cache", "routing", "scroll", "search_type", "seq_no_primary_term", "size", "sort", "stats", "stored_fields", "suggest_field", "suggest_mode", "suggest_size", "suggest_text", "terminate_after", "timeout", "track_scores", "track_total_hits", "typed_keys", "version", ) def deprecated_search(self, index=None, doc_type=None, body=None, params=None): """ Execute a search query and get back search hits that match the query. ``_ :arg index: A list of index names to search, or a string containing a comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices :arg doc_type: A comma-separated list of document types to search; leave empty to perform the operation on all types :arg body: The search definition using the Query DSL :arg _source: True or false to return the _source field or not, or a list of fields to return :arg _source_exclude: A list of fields to exclude from the returned _source field :arg _source_include: A list of fields to extract and return from the _source field :arg allow_no_indices: Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified) :arg allow_partial_search_results: Set to false to return an overall failure if the request would produce partial results. Defaults to True, which will allow partial results in the case of timeouts or partial failures :arg analyze_wildcard: Specify whether wildcard and prefix queries should be analyzed (default: false) :arg analyzer: The analyzer to use for the query string :arg batched_reduce_size: The number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection mechanism to reduce the memory overhead per search request if the potential number of shards in the request can be large., default 512 :arg default_operator: The default operator for query string query (AND or OR), default 'OR', valid choices are: 'AND', 'OR' :arg df: The field to use as default where no field prefix is given in the query string :arg docvalue_fields: A comma-separated list of fields to return as the docvalue representation of a field for each hit :arg expand_wildcards: Whether to expand wildcard expression to concrete indices that are open, closed or both., default 'open', valid choices are: 'open', 'closed', 'none', 'all' :arg explain: Specify whether to return detailed information about score computation as part of a hit :arg from\\_: Starting offset (default: 0) :arg ignore_unavailable: Whether specified concrete indices should be ignored when unavailable (missing or closed) :arg lenient: Specify whether format-based query failures (such as providing text to a numeric field) should be ignored :arg max_concurrent_shard_requests: The number of concurrent shard requests this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests, default 'The default grows with the number of nodes in the cluster but is at most 256.' :arg pre_filter_shard_size: A threshold that enforces a pre-filter roundtrip to prefilter search shards based on query rewriting if the number of shards the search request expands to exceeds the threshold. This filter roundtrip can limit the number of shards significantly if for instance a shard can not match any documents based on it's rewrite method ie. if date filters are mandatory to match but the shard bounds and the query are disjoint., default 128 :arg preference: Specify the node or shard the operation should be performed on (default: random) :arg q: Query in the Lucene query string syntax :arg rest_total_hits_as_int: This parameter is used to restore the total hits as a number in the response. This param is added version 6.x to handle mixed cluster queries where nodes are in multiple versions (7.0 and 6.latest) :arg request_cache: Specify if request cache should be used for this request or not, defaults to index level setting :arg routing: A comma-separated list of specific routing values :arg scroll: Specify how long a consistent view of the index should be maintained for scrolled search :arg search_type: Search operation type, valid choices are: 'query_then_fetch', 'dfs_query_then_fetch' :arg size: Number of hits to return (default: 10) :arg sort: A comma-separated list of : pairs :arg stats: Specific 'tag' of the request for logging and statistical purposes :arg stored_fields: A comma-separated list of stored fields to return as part of a hit :arg suggest_field: Specify which field to use for suggestions :arg suggest_mode: Specify suggest mode, default 'missing', valid choices are: 'missing', 'popular', 'always' :arg suggest_size: How many suggestions to return in response :arg suggest_text: The source text for which the suggestions should be returned :arg terminate_after: The maximum number of documents to collect for each shard, upon reaching which the query execution will terminate early. :arg timeout: Explicit operation timeout :arg track_scores: Whether to calculate and return scores even if they are not used for sorting :arg track_total_hits: Indicate if the number of documents that match the query should be tracked :arg typed_keys: Specify whether aggregation and suggester names should be prefixed by their respective types in the response :arg version: Specify whether to return document version as part of a hit """ # from is a reserved word so it cannot be used, use from_ instead if "from_" in params: params["from"] = params.pop("from_") if not index: index = "_all" res = self.transport.perform_request( "GET", _make_path(index, doc_type, "_search"), params=params, body=body ) if type(res) == list or type(res) == tuple: return res[1] return res elastalert-0.2.4/elastalert/alerts.py000066400000000000000000002753051364615736500177150ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import json import logging import os import re import subprocess import sys import time import uuid import warnings from email.mime.text import MIMEText from email.utils import formatdate from html.parser import HTMLParser from smtplib import SMTP from smtplib import SMTP_SSL from smtplib import SMTPAuthenticationError from smtplib import SMTPException from socket import error import boto3 import requests import stomp from exotel import Exotel from jira.client import JIRA from jira.exceptions import JIRAError from requests.auth import HTTPProxyAuth from requests.exceptions import RequestException from staticconf.loader import yaml_loader from texttable import Texttable from twilio.base.exceptions import TwilioRestException from twilio.rest import Client as TwilioClient from .util import EAException from .util import elastalert_logger from .util import lookup_es_key from .util import pretty_ts from .util import resolve_string from .util import ts_now from .util import ts_to_dt class DateTimeEncoder(json.JSONEncoder): def default(self, obj): if hasattr(obj, 'isoformat'): return obj.isoformat() else: return json.JSONEncoder.default(self, obj) class BasicMatchString(object): """ Creates a string containing fields in match for the given rule. """ def __init__(self, rule, match): self.rule = rule self.match = match def _ensure_new_line(self): while self.text[-2:] != '\n\n': self.text += '\n' def _add_custom_alert_text(self): missing = self.rule.get('alert_missing_value', '') alert_text = str(self.rule.get('alert_text', '')) if 'alert_text_args' in self.rule: alert_text_args = self.rule.get('alert_text_args') alert_text_values = [lookup_es_key(self.match, arg) for arg in alert_text_args] # Support referencing other top-level rule properties # This technically may not work if there is a top-level rule property with the same name # as an es result key, since it would have been matched in the lookup_es_key call above for i, text_value in enumerate(alert_text_values): if text_value is None: alert_value = self.rule.get(alert_text_args[i]) if alert_value: alert_text_values[i] = alert_value alert_text_values = [missing if val is None else val for val in alert_text_values] alert_text = alert_text.format(*alert_text_values) elif 'alert_text_kw' in self.rule: kw = {} for name, kw_name in list(self.rule.get('alert_text_kw').items()): val = lookup_es_key(self.match, name) # Support referencing other top-level rule properties # This technically may not work if there is a top-level rule property with the same name # as an es result key, since it would have been matched in the lookup_es_key call above if val is None: val = self.rule.get(name) kw[kw_name] = missing if val is None else val alert_text = alert_text.format(**kw) self.text += alert_text def _add_rule_text(self): self.text += self.rule['type'].get_match_str(self.match) def _add_top_counts(self): for key, counts in list(self.match.items()): if key.startswith('top_events_'): self.text += '%s:\n' % (key[11:]) top_events = list(counts.items()) if not top_events: self.text += 'No events found.\n' else: top_events.sort(key=lambda x: x[1], reverse=True) for term, count in top_events: self.text += '%s: %s\n' % (term, count) self.text += '\n' def _add_match_items(self): match_items = list(self.match.items()) match_items.sort(key=lambda x: x[0]) for key, value in match_items: if key.startswith('top_events_'): continue value_str = str(value) value_str.replace('\\n', '\n') if type(value) in [list, dict]: try: value_str = self._pretty_print_as_json(value) except TypeError: # Non serializable object, fallback to str pass self.text += '%s: %s\n' % (key, value_str) def _pretty_print_as_json(self, blob): try: return json.dumps(blob, cls=DateTimeEncoder, sort_keys=True, indent=4, ensure_ascii=False) except UnicodeDecodeError: # This blob contains non-unicode, so lets pretend it's Latin-1 to show something return json.dumps(blob, cls=DateTimeEncoder, sort_keys=True, indent=4, encoding='Latin-1', ensure_ascii=False) def __str__(self): self.text = '' if 'alert_text' not in self.rule: self.text += self.rule['name'] + '\n\n' self._add_custom_alert_text() self._ensure_new_line() if self.rule.get('alert_text_type') != 'alert_text_only': self._add_rule_text() self._ensure_new_line() if self.rule.get('top_count_keys'): self._add_top_counts() if self.rule.get('alert_text_type') != 'exclude_fields': self._add_match_items() return self.text class JiraFormattedMatchString(BasicMatchString): def _add_match_items(self): match_items = dict([(x, y) for x, y in list(self.match.items()) if not x.startswith('top_events_')]) json_blob = self._pretty_print_as_json(match_items) preformatted_text = '{{code}}{0}{{code}}'.format(json_blob) self.text += preformatted_text class Alerter(object): """ Base class for types of alerts. :param rule: The rule configuration. """ required_options = frozenset([]) def __init__(self, rule): self.rule = rule # pipeline object is created by ElastAlerter.send_alert() # and attached to each alerters used by a rule before calling alert() self.pipeline = None self.resolve_rule_references(self.rule) def resolve_rule_references(self, root): # Support referencing other top-level rule properties to avoid redundant copy/paste if type(root) == list: # Make a copy since we may be modifying the contents of the structure we're walking for i, item in enumerate(copy.copy(root)): if type(item) == dict or type(item) == list: self.resolve_rule_references(root[i]) else: root[i] = self.resolve_rule_reference(item) elif type(root) == dict: # Make a copy since we may be modifying the contents of the structure we're walking for key, value in root.copy().items(): if type(value) == dict or type(value) == list: self.resolve_rule_references(root[key]) else: root[key] = self.resolve_rule_reference(value) def resolve_rule_reference(self, value): strValue = str(value) if strValue.startswith('$') and strValue.endswith('$') and strValue[1:-1] in self.rule: if type(value) == int: return int(self.rule[strValue[1:-1]]) else: return self.rule[strValue[1:-1]] else: return value def alert(self, match): """ Send an alert. Match is a dictionary of information about the alert. :param match: A dictionary of relevant information to the alert. """ raise NotImplementedError() def get_info(self): """ Returns a dictionary of data related to this alert. At minimum, this should contain a field type corresponding to the type of Alerter. """ return {'type': 'Unknown'} def create_title(self, matches): """ Creates custom alert title to be used, e.g. as an e-mail subject or JIRA issue summary. :param matches: A list of dictionaries of relevant information to the alert. """ if 'alert_subject' in self.rule: return self.create_custom_title(matches) return self.create_default_title(matches) def create_custom_title(self, matches): alert_subject = str(self.rule['alert_subject']) alert_subject_max_len = int(self.rule.get('alert_subject_max_len', 2048)) if 'alert_subject_args' in self.rule: alert_subject_args = self.rule['alert_subject_args'] alert_subject_values = [lookup_es_key(matches[0], arg) for arg in alert_subject_args] # Support referencing other top-level rule properties # This technically may not work if there is a top-level rule property with the same name # as an es result key, since it would have been matched in the lookup_es_key call above for i, subject_value in enumerate(alert_subject_values): if subject_value is None: alert_value = self.rule.get(alert_subject_args[i]) if alert_value: alert_subject_values[i] = alert_value missing = self.rule.get('alert_missing_value', '') alert_subject_values = [missing if val is None else val for val in alert_subject_values] alert_subject = alert_subject.format(*alert_subject_values) if len(alert_subject) > alert_subject_max_len: alert_subject = alert_subject[:alert_subject_max_len] return alert_subject def create_alert_body(self, matches): body = self.get_aggregation_summary_text(matches) if self.rule.get('alert_text_type') != 'aggregation_summary_only': for match in matches: body += str(BasicMatchString(self.rule, match)) # Separate text of aggregated alerts with dashes if len(matches) > 1: body += '\n----------------------------------------\n' return body def get_aggregation_summary_text__maximum_width(self): """Get maximum width allowed for summary text.""" return 80 def get_aggregation_summary_text(self, matches): text = '' if 'aggregation' in self.rule and 'summary_table_fields' in self.rule: text = self.rule.get('summary_prefix', '') summary_table_fields = self.rule['summary_table_fields'] if not isinstance(summary_table_fields, list): summary_table_fields = [summary_table_fields] # Include a count aggregation so that we can see at a glance how many of each aggregation_key were encountered summary_table_fields_with_count = summary_table_fields + ['count'] text += "Aggregation resulted in the following data for summary_table_fields ==> {0}:\n\n".format( summary_table_fields_with_count ) text_table = Texttable(max_width=self.get_aggregation_summary_text__maximum_width()) text_table.header(summary_table_fields_with_count) # Format all fields as 'text' to avoid long numbers being shown as scientific notation text_table.set_cols_dtype(['t' for i in summary_table_fields_with_count]) match_aggregation = {} # Maintain an aggregate count for each unique key encountered in the aggregation period for match in matches: key_tuple = tuple([str(lookup_es_key(match, key)) for key in summary_table_fields]) if key_tuple not in match_aggregation: match_aggregation[key_tuple] = 1 else: match_aggregation[key_tuple] = match_aggregation[key_tuple] + 1 for keys, count in match_aggregation.items(): text_table.add_row([key for key in keys] + [count]) text += text_table.draw() + '\n\n' text += self.rule.get('summary_prefix', '') return str(text) def create_default_title(self, matches): return self.rule['name'] def get_account(self, account_file): """ Gets the username and password from an account file. :param account_file: Path to the file which contains user and password information. It can be either an absolute file path or one that is relative to the given rule. """ if os.path.isabs(account_file): account_file_path = account_file else: account_file_path = os.path.join(os.path.dirname(self.rule['rule_file']), account_file) account_conf = yaml_loader(account_file_path) if 'user' not in account_conf or 'password' not in account_conf: raise EAException('Account file must have user and password fields') self.user = account_conf['user'] self.password = account_conf['password'] class StompAlerter(Alerter): """ The stomp alerter publishes alerts via stomp to a broker. """ required_options = frozenset( ['stomp_hostname', 'stomp_hostport', 'stomp_login', 'stomp_password']) def alert(self, matches): alerts = [] qk = self.rule.get('query_key', None) fullmessage = {} for match in matches: if qk is not None: resmatch = lookup_es_key(match, qk) else: resmatch = None if resmatch is not None: elastalert_logger.info( 'Alert for %s, %s at %s:' % (self.rule['name'], resmatch, lookup_es_key(match, self.rule['timestamp_field']))) alerts.append( 'Alert for %s, %s at %s:' % (self.rule['name'], resmatch, lookup_es_key( match, self.rule['timestamp_field'])) ) fullmessage['match'] = resmatch else: elastalert_logger.info('Rule %s generated an alert at %s:' % ( self.rule['name'], lookup_es_key(match, self.rule['timestamp_field']))) alerts.append( 'Rule %s generated an alert at %s:' % (self.rule['name'], lookup_es_key( match, self.rule['timestamp_field'])) ) fullmessage['match'] = lookup_es_key( match, self.rule['timestamp_field']) elastalert_logger.info(str(BasicMatchString(self.rule, match))) fullmessage['alerts'] = alerts fullmessage['rule'] = self.rule['name'] fullmessage['rule_file'] = self.rule['rule_file'] fullmessage['matching'] = str(BasicMatchString(self.rule, match)) fullmessage['alertDate'] = datetime.datetime.now( ).strftime("%Y-%m-%d %H:%M:%S") fullmessage['body'] = self.create_alert_body(matches) fullmessage['matches'] = matches self.stomp_hostname = self.rule.get('stomp_hostname', 'localhost') self.stomp_hostport = self.rule.get('stomp_hostport', '61613') self.stomp_login = self.rule.get('stomp_login', 'admin') self.stomp_password = self.rule.get('stomp_password', 'admin') self.stomp_destination = self.rule.get( 'stomp_destination', '/queue/ALERT') self.stomp_ssl = self.rule.get('stomp_ssl', False) conn = stomp.Connection([(self.stomp_hostname, self.stomp_hostport)], use_ssl=self.stomp_ssl) conn.start() conn.connect(self.stomp_login, self.stomp_password) # Ensures that the CONNECTED frame is received otherwise, the disconnect call will fail. time.sleep(1) conn.send(self.stomp_destination, json.dumps(fullmessage)) conn.disconnect() def get_info(self): return {'type': 'stomp'} class DebugAlerter(Alerter): """ The debug alerter uses a Python logger (by default, alerting to terminal). """ def alert(self, matches): qk = self.rule.get('query_key', None) for match in matches: if qk in match: elastalert_logger.info( 'Alert for %s, %s at %s:' % (self.rule['name'], match[qk], lookup_es_key(match, self.rule['timestamp_field']))) else: elastalert_logger.info('Alert for %s at %s:' % (self.rule['name'], lookup_es_key(match, self.rule['timestamp_field']))) elastalert_logger.info(str(BasicMatchString(self.rule, match))) def get_info(self): return {'type': 'debug'} class EmailAlerter(Alerter): """ Sends an email alert """ required_options = frozenset(['email']) def __init__(self, *args): super(EmailAlerter, self).__init__(*args) self.smtp_host = self.rule.get('smtp_host', 'localhost') self.smtp_ssl = self.rule.get('smtp_ssl', False) self.from_addr = self.rule.get('from_addr', 'ElastAlert') self.smtp_port = self.rule.get('smtp_port') if self.rule.get('smtp_auth_file'): self.get_account(self.rule['smtp_auth_file']) self.smtp_key_file = self.rule.get('smtp_key_file') self.smtp_cert_file = self.rule.get('smtp_cert_file') # Convert email to a list if it isn't already if isinstance(self.rule['email'], str): self.rule['email'] = [self.rule['email']] # If there is a cc then also convert it a list if it isn't cc = self.rule.get('cc') if cc and isinstance(cc, str): self.rule['cc'] = [self.rule['cc']] # If there is a bcc then also convert it to a list if it isn't bcc = self.rule.get('bcc') if bcc and isinstance(bcc, str): self.rule['bcc'] = [self.rule['bcc']] add_suffix = self.rule.get('email_add_domain') if add_suffix and not add_suffix.startswith('@'): self.rule['email_add_domain'] = '@' + add_suffix def alert(self, matches): body = self.create_alert_body(matches) # Add JIRA ticket if it exists if self.pipeline is not None and 'jira_ticket' in self.pipeline: url = '%s/browse/%s' % (self.pipeline['jira_server'], self.pipeline['jira_ticket']) body += '\nJIRA ticket: %s' % (url) to_addr = self.rule['email'] if 'email_from_field' in self.rule: recipient = lookup_es_key(matches[0], self.rule['email_from_field']) if isinstance(recipient, str): if '@' in recipient: to_addr = [recipient] elif 'email_add_domain' in self.rule: to_addr = [recipient + self.rule['email_add_domain']] elif isinstance(recipient, list): to_addr = recipient if 'email_add_domain' in self.rule: to_addr = [name + self.rule['email_add_domain'] for name in to_addr] if self.rule.get('email_format') == 'html': email_msg = MIMEText(body, 'html', _charset='UTF-8') else: email_msg = MIMEText(body, _charset='UTF-8') email_msg['Subject'] = self.create_title(matches) email_msg['To'] = ', '.join(to_addr) email_msg['From'] = self.from_addr email_msg['Reply-To'] = self.rule.get('email_reply_to', email_msg['To']) email_msg['Date'] = formatdate() if self.rule.get('cc'): email_msg['CC'] = ','.join(self.rule['cc']) to_addr = to_addr + self.rule['cc'] if self.rule.get('bcc'): to_addr = to_addr + self.rule['bcc'] try: if self.smtp_ssl: if self.smtp_port: self.smtp = SMTP_SSL(self.smtp_host, self.smtp_port, keyfile=self.smtp_key_file, certfile=self.smtp_cert_file) else: self.smtp = SMTP_SSL(self.smtp_host, keyfile=self.smtp_key_file, certfile=self.smtp_cert_file) else: if self.smtp_port: self.smtp = SMTP(self.smtp_host, self.smtp_port) else: self.smtp = SMTP(self.smtp_host) self.smtp.ehlo() if self.smtp.has_extn('STARTTLS'): self.smtp.starttls(keyfile=self.smtp_key_file, certfile=self.smtp_cert_file) if 'smtp_auth_file' in self.rule: self.smtp.login(self.user, self.password) except (SMTPException, error) as e: raise EAException("Error connecting to SMTP host: %s" % (e)) except SMTPAuthenticationError as e: raise EAException("SMTP username/password rejected: %s" % (e)) self.smtp.sendmail(self.from_addr, to_addr, email_msg.as_string()) self.smtp.quit() elastalert_logger.info("Sent email to %s" % (to_addr)) def create_default_title(self, matches): subject = 'ElastAlert: %s' % (self.rule['name']) # If the rule has a query_key, add that value plus timestamp to subject if 'query_key' in self.rule: qk = matches[0].get(self.rule['query_key']) if qk: subject += ' - %s' % (qk) return subject def get_info(self): return {'type': 'email', 'recipients': self.rule['email']} class JiraAlerter(Alerter): """ Creates a Jira ticket for each alert """ required_options = frozenset(['jira_server', 'jira_account_file', 'jira_project', 'jira_issuetype']) # Maintain a static set of built-in fields that we explicitly know how to set # For anything else, we will do best-effort and try to set a string value known_field_list = [ 'jira_account_file', 'jira_assignee', 'jira_bump_after_inactivity', 'jira_bump_in_statuses', 'jira_bump_not_in_statuses', 'jira_bump_only', 'jira_bump_tickets', 'jira_component', 'jira_components', 'jira_description', 'jira_ignore_in_title', 'jira_issuetype', 'jira_label', 'jira_labels', 'jira_max_age', 'jira_priority', 'jira_project', 'jira_server', 'jira_transition_to', 'jira_watchers', ] # Some built-in jira types that can be used as custom fields require special handling # Here is a sample of one of them: # {"id":"customfield_12807","name":"My Custom Field","custom":true,"orderable":true,"navigable":true,"searchable":true, # "clauseNames":["cf[12807]","My Custom Field"],"schema":{"type":"array","items":"string", # "custom":"com.atlassian.jira.plugin.system.customfieldtypes:multiselect","customId":12807}} # There are likely others that will need to be updated on a case-by-case basis custom_string_types_with_special_handling = [ 'com.atlassian.jira.plugin.system.customfieldtypes:multicheckboxes', 'com.atlassian.jira.plugin.system.customfieldtypes:multiselect', 'com.atlassian.jira.plugin.system.customfieldtypes:radiobuttons', ] def __init__(self, rule): super(JiraAlerter, self).__init__(rule) self.server = self.rule['jira_server'] self.get_account(self.rule['jira_account_file']) self.project = self.rule['jira_project'] self.issue_type = self.rule['jira_issuetype'] # Deferred settings refer to values that can only be resolved when a match # is found and as such loading them will be delayed until we find a match self.deferred_settings = [] # We used to support only a single component. This allows us to maintain backwards compatibility # while also giving the user-facing API a more representative name self.components = self.rule.get('jira_components', self.rule.get('jira_component')) # We used to support only a single label. This allows us to maintain backwards compatibility # while also giving the user-facing API a more representative name self.labels = self.rule.get('jira_labels', self.rule.get('jira_label')) self.description = self.rule.get('jira_description', '') self.assignee = self.rule.get('jira_assignee') self.max_age = self.rule.get('jira_max_age', 30) self.priority = self.rule.get('jira_priority') self.bump_tickets = self.rule.get('jira_bump_tickets', False) self.bump_not_in_statuses = self.rule.get('jira_bump_not_in_statuses') self.bump_in_statuses = self.rule.get('jira_bump_in_statuses') self.bump_after_inactivity = self.rule.get('jira_bump_after_inactivity', 0) self.bump_only = self.rule.get('jira_bump_only', False) self.transition = self.rule.get('jira_transition_to', False) self.watchers = self.rule.get('jira_watchers') self.client = None if self.bump_in_statuses and self.bump_not_in_statuses: msg = 'Both jira_bump_in_statuses (%s) and jira_bump_not_in_statuses (%s) are set.' % \ (','.join(self.bump_in_statuses), ','.join(self.bump_not_in_statuses)) intersection = list(set(self.bump_in_statuses) & set(self.bump_in_statuses)) if intersection: msg = '%s Both have common statuses of (%s). As such, no tickets will ever be found.' % ( msg, ','.join(intersection)) msg += ' This should be simplified to use only one or the other.' logging.warning(msg) self.reset_jira_args() try: self.client = JIRA(self.server, basic_auth=(self.user, self.password)) self.get_priorities() self.jira_fields = self.client.fields() self.get_arbitrary_fields() except JIRAError as e: # JIRAError may contain HTML, pass along only first 1024 chars raise EAException("Error connecting to JIRA: %s" % (str(e)[:1024])).with_traceback(sys.exc_info()[2]) self.set_priority() def set_priority(self): try: if self.priority is not None and self.client is not None: self.jira_args['priority'] = {'id': self.priority_ids[self.priority]} except KeyError: logging.error("Priority %s not found. Valid priorities are %s" % (self.priority, list(self.priority_ids.keys()))) def reset_jira_args(self): self.jira_args = {'project': {'key': self.project}, 'issuetype': {'name': self.issue_type}} if self.components: # Support single component or list if type(self.components) != list: self.jira_args['components'] = [{'name': self.components}] else: self.jira_args['components'] = [{'name': component} for component in self.components] if self.labels: # Support single label or list if type(self.labels) != list: self.labels = [self.labels] self.jira_args['labels'] = self.labels if self.watchers: # Support single watcher or list if type(self.watchers) != list: self.watchers = [self.watchers] if self.assignee: self.jira_args['assignee'] = {'name': self.assignee} self.set_priority() def set_jira_arg(self, jira_field, value, fields): # Remove the jira_ part. Convert underscores to spaces normalized_jira_field = jira_field[5:].replace('_', ' ').lower() # All jira fields should be found in the 'id' or the 'name' field. Therefore, try both just in case for identifier in ['name', 'id']: field = next((f for f in fields if normalized_jira_field == f[identifier].replace('_', ' ').lower()), None) if field: break if not field: # Log a warning to ElastAlert saying that we couldn't find that type? # OR raise and fail to load the alert entirely? Probably the latter... raise Exception("Could not find a definition for the jira field '{0}'".format(normalized_jira_field)) arg_name = field['id'] # Check the schema information to decide how to set the value correctly # If the schema information is not available, raise an exception since we don't know how to set it # Note this is only the case for two built-in types, id: issuekey and id: thumbnail if not ('schema' in field or 'type' in field['schema']): raise Exception("Could not determine schema information for the jira field '{0}'".format(normalized_jira_field)) arg_type = field['schema']['type'] # Handle arrays of simple types like strings or numbers if arg_type == 'array': # As a convenience, support the scenario wherein the user only provides # a single value for a multi-value field e.g. jira_labels: Only_One_Label if type(value) != list: value = [value] array_items = field['schema']['items'] # Simple string types if array_items in ['string', 'date', 'datetime']: # Special case for multi-select custom types (the JIRA metadata says that these are strings, but # in reality, they are required to be provided as an object. if 'custom' in field['schema'] and field['schema']['custom'] in self.custom_string_types_with_special_handling: self.jira_args[arg_name] = [{'value': v} for v in value] else: self.jira_args[arg_name] = value elif array_items == 'number': self.jira_args[arg_name] = [int(v) for v in value] # Also attempt to handle arrays of complex types that have to be passed as objects with an identifier 'key' elif array_items == 'option': self.jira_args[arg_name] = [{'value': v} for v in value] else: # Try setting it as an object, using 'name' as the key # This may not work, as the key might actually be 'key', 'id', 'value', or something else # If it works, great! If not, it will manifest itself as an API error that will bubble up self.jira_args[arg_name] = [{'name': v} for v in value] # Handle non-array types else: # Simple string types if arg_type in ['string', 'date', 'datetime']: # Special case for custom types (the JIRA metadata says that these are strings, but # in reality, they are required to be provided as an object. if 'custom' in field['schema'] and field['schema']['custom'] in self.custom_string_types_with_special_handling: self.jira_args[arg_name] = {'value': value} else: self.jira_args[arg_name] = value # Number type elif arg_type == 'number': self.jira_args[arg_name] = int(value) elif arg_type == 'option': self.jira_args[arg_name] = {'value': value} # Complex type else: self.jira_args[arg_name] = {'name': value} def get_arbitrary_fields(self): # Clear jira_args self.reset_jira_args() for jira_field, value in self.rule.items(): # If we find a field that is not covered by the set that we are aware of, it means it is either: # 1. A built-in supported field in JIRA that we don't have on our radar # 2. A custom field that a JIRA admin has configured if jira_field.startswith('jira_') and jira_field not in self.known_field_list and str(value)[:1] != '#': self.set_jira_arg(jira_field, value, self.jira_fields) if jira_field.startswith('jira_') and jira_field not in self.known_field_list and str(value)[:1] == '#': self.deferred_settings.append(jira_field) def get_priorities(self): """ Creates a mapping of priority index to id. """ priorities = self.client.priorities() self.priority_ids = {} for x in range(len(priorities)): self.priority_ids[x] = priorities[x].id def set_assignee(self, assignee): self.assignee = assignee if assignee: self.jira_args['assignee'] = {'name': assignee} elif 'assignee' in self.jira_args: self.jira_args.pop('assignee') def find_existing_ticket(self, matches): # Default title, get stripped search version if 'alert_subject' not in self.rule: title = self.create_default_title(matches, True) else: title = self.create_title(matches) if 'jira_ignore_in_title' in self.rule: title = title.replace(matches[0].get(self.rule['jira_ignore_in_title'], ''), '') # This is necessary for search to work. Other special characters and dashes # directly adjacent to words appear to be ok title = title.replace(' - ', ' ') title = title.replace('\\', '\\\\') date = (datetime.datetime.now() - datetime.timedelta(days=self.max_age)).strftime('%Y-%m-%d') jql = 'project=%s AND summary~"%s" and created >= "%s"' % (self.project, title, date) if self.bump_in_statuses: jql = '%s and status in (%s)' % (jql, ','.join(["\"%s\"" % status if ' ' in status else status for status in self.bump_in_statuses])) if self.bump_not_in_statuses: jql = '%s and status not in (%s)' % (jql, ','.join(["\"%s\"" % status if ' ' in status else status for status in self.bump_not_in_statuses])) try: issues = self.client.search_issues(jql) except JIRAError as e: logging.exception("Error while searching for JIRA ticket using jql '%s': %s" % (jql, e)) return None if len(issues): return issues[0] def comment_on_ticket(self, ticket, match): text = str(JiraFormattedMatchString(self.rule, match)) timestamp = pretty_ts(lookup_es_key(match, self.rule['timestamp_field'])) comment = "This alert was triggered again at %s\n%s" % (timestamp, text) self.client.add_comment(ticket, comment) def transition_ticket(self, ticket): transitions = self.client.transitions(ticket) for t in transitions: if t['name'] == self.transition: self.client.transition_issue(ticket, t['id']) def alert(self, matches): # Reset arbitrary fields to pick up changes self.get_arbitrary_fields() if len(self.deferred_settings) > 0: fields = self.client.fields() for jira_field in self.deferred_settings: value = lookup_es_key(matches[0], self.rule[jira_field][1:]) self.set_jira_arg(jira_field, value, fields) title = self.create_title(matches) if self.bump_tickets: ticket = self.find_existing_ticket(matches) if ticket: inactivity_datetime = ts_now() - datetime.timedelta(days=self.bump_after_inactivity) if ts_to_dt(ticket.fields.updated) >= inactivity_datetime: if self.pipeline is not None: self.pipeline['jira_ticket'] = None self.pipeline['jira_server'] = self.server return None elastalert_logger.info('Commenting on existing ticket %s' % (ticket.key)) for match in matches: try: self.comment_on_ticket(ticket, match) except JIRAError as e: logging.exception("Error while commenting on ticket %s: %s" % (ticket, e)) if self.labels: for l in self.labels: try: ticket.fields.labels.append(l) except JIRAError as e: logging.exception("Error while appending labels to ticket %s: %s" % (ticket, e)) if self.transition: elastalert_logger.info('Transitioning existing ticket %s' % (ticket.key)) try: self.transition_ticket(ticket) except JIRAError as e: logging.exception("Error while transitioning ticket %s: %s" % (ticket, e)) if self.pipeline is not None: self.pipeline['jira_ticket'] = ticket self.pipeline['jira_server'] = self.server return None if self.bump_only: return None self.jira_args['summary'] = title self.jira_args['description'] = self.create_alert_body(matches) try: self.issue = self.client.create_issue(**self.jira_args) # You can not add watchers on initial creation. Only as a follow-up action if self.watchers: for watcher in self.watchers: try: self.client.add_watcher(self.issue.key, watcher) except Exception as ex: # Re-raise the exception, preserve the stack-trace, and give some # context as to which watcher failed to be added raise Exception( "Exception encountered when trying to add '{0}' as a watcher. Does the user exist?\n{1}" .format( watcher, ex )).with_traceback(sys.exc_info()[2]) except JIRAError as e: raise EAException("Error creating JIRA ticket using jira_args (%s): %s" % (self.jira_args, e)) elastalert_logger.info("Opened Jira ticket: %s" % (self.issue)) if self.pipeline is not None: self.pipeline['jira_ticket'] = self.issue self.pipeline['jira_server'] = self.server def create_alert_body(self, matches): body = self.description + '\n' body += self.get_aggregation_summary_text(matches) if self.rule.get('alert_text_type') != 'aggregation_summary_only': for match in matches: body += str(JiraFormattedMatchString(self.rule, match)) if len(matches) > 1: body += '\n----------------------------------------\n' return body def get_aggregation_summary_text(self, matches): text = super(JiraAlerter, self).get_aggregation_summary_text(matches) if text: text = '{{noformat}}{0}{{noformat}}'.format(text) return text def create_default_title(self, matches, for_search=False): # If there is a query_key, use that in the title if 'query_key' in self.rule and lookup_es_key(matches[0], self.rule['query_key']): title = 'ElastAlert: %s matched %s' % (lookup_es_key(matches[0], self.rule['query_key']), self.rule['name']) else: title = 'ElastAlert: %s' % (self.rule['name']) if for_search: return title title += ' - %s' % (pretty_ts(matches[0][self.rule['timestamp_field']], self.rule.get('use_local_time'))) # Add count for spikes count = matches[0].get('spike_count') if count: title += ' - %s+ events' % (count) return title def get_info(self): return {'type': 'jira'} class CommandAlerter(Alerter): required_options = set(['command']) def __init__(self, *args): super(CommandAlerter, self).__init__(*args) self.last_command = [] self.shell = False if isinstance(self.rule['command'], str): self.shell = True if '%' in self.rule['command']: logging.warning('Warning! You could be vulnerable to shell injection!') self.rule['command'] = [self.rule['command']] self.new_style_string_format = False if 'new_style_string_format' in self.rule and self.rule['new_style_string_format']: self.new_style_string_format = True def alert(self, matches): # Format the command and arguments try: command = [resolve_string(command_arg, matches[0]) for command_arg in self.rule['command']] self.last_command = command except KeyError as e: raise EAException("Error formatting command: %s" % (e)) # Run command and pipe data try: subp = subprocess.Popen(command, stdin=subprocess.PIPE, shell=self.shell) if self.rule.get('pipe_match_json'): match_json = json.dumps(matches, cls=DateTimeEncoder) + '\n' stdout, stderr = subp.communicate(input=match_json.encode()) elif self.rule.get('pipe_alert_text'): alert_text = self.create_alert_body(matches) stdout, stderr = subp.communicate(input=alert_text.encode()) if self.rule.get("fail_on_non_zero_exit", False) and subp.wait(): raise EAException("Non-zero exit code while running command %s" % (' '.join(command))) except OSError as e: raise EAException("Error while running command %s: %s" % (' '.join(command), e)) def get_info(self): return {'type': 'command', 'command': ' '.join(self.last_command)} class SnsAlerter(Alerter): """ Send alert using AWS SNS service """ required_options = frozenset(['sns_topic_arn']) def __init__(self, *args): super(SnsAlerter, self).__init__(*args) self.sns_topic_arn = self.rule.get('sns_topic_arn', '') self.aws_access_key_id = self.rule.get('aws_access_key_id') self.aws_secret_access_key = self.rule.get('aws_secret_access_key') self.aws_region = self.rule.get('aws_region', 'us-east-1') self.profile = self.rule.get('boto_profile', None) # Deprecated self.profile = self.rule.get('aws_profile', None) def create_default_title(self, matches): subject = 'ElastAlert: %s' % (self.rule['name']) return subject def alert(self, matches): body = self.create_alert_body(matches) session = boto3.Session( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, region_name=self.aws_region, profile_name=self.profile ) sns_client = session.client('sns') sns_client.publish( TopicArn=self.sns_topic_arn, Message=body, Subject=self.create_title(matches) ) elastalert_logger.info("Sent sns notification to %s" % (self.sns_topic_arn)) class HipChatAlerter(Alerter): """ Creates a HipChat room notification for each alert """ required_options = frozenset(['hipchat_auth_token', 'hipchat_room_id']) def __init__(self, rule): super(HipChatAlerter, self).__init__(rule) self.hipchat_msg_color = self.rule.get('hipchat_msg_color', 'red') self.hipchat_message_format = self.rule.get('hipchat_message_format', 'html') self.hipchat_auth_token = self.rule['hipchat_auth_token'] self.hipchat_room_id = self.rule['hipchat_room_id'] self.hipchat_domain = self.rule.get('hipchat_domain', 'api.hipchat.com') self.hipchat_ignore_ssl_errors = self.rule.get('hipchat_ignore_ssl_errors', False) self.hipchat_notify = self.rule.get('hipchat_notify', True) self.hipchat_from = self.rule.get('hipchat_from', '') self.url = 'https://%s/v2/room/%s/notification?auth_token=%s' % ( self.hipchat_domain, self.hipchat_room_id, self.hipchat_auth_token) self.hipchat_proxy = self.rule.get('hipchat_proxy', None) def create_alert_body(self, matches): body = super(HipChatAlerter, self).create_alert_body(matches) # HipChat sends 400 bad request on messages longer than 10000 characters if self.hipchat_message_format == 'html': # Use appropriate line ending for text/html br = '
' body = body.replace('\n', br) truncated_message = '
...(truncated)' truncate_to = 10000 - len(truncated_message) else: truncated_message = '..(truncated)' truncate_to = 10000 - len(truncated_message) if (len(body) > 9999): body = body[:truncate_to] + truncated_message return body def alert(self, matches): body = self.create_alert_body(matches) # Post to HipChat headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.hipchat_proxy} if self.hipchat_proxy else None payload = { 'color': self.hipchat_msg_color, 'message': body, 'message_format': self.hipchat_message_format, 'notify': self.hipchat_notify, 'from': self.hipchat_from } try: if self.hipchat_ignore_ssl_errors: requests.packages.urllib3.disable_warnings() if self.rule.get('hipchat_mentions', []): ping_users = self.rule.get('hipchat_mentions', []) ping_msg = payload.copy() ping_msg['message'] = "ping {}".format( ", ".join("@{}".format(user) for user in ping_users) ) ping_msg['message_format'] = "text" response = requests.post( self.url, data=json.dumps(ping_msg, cls=DateTimeEncoder), headers=headers, verify=not self.hipchat_ignore_ssl_errors, proxies=proxies) response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, verify=not self.hipchat_ignore_ssl_errors, proxies=proxies) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to HipChat: %s" % e) elastalert_logger.info("Alert sent to HipChat room %s" % self.hipchat_room_id) def get_info(self): return {'type': 'hipchat', 'hipchat_room_id': self.hipchat_room_id} class MsTeamsAlerter(Alerter): """ Creates a Microsoft Teams Conversation Message for each alert """ required_options = frozenset(['ms_teams_webhook_url', 'ms_teams_alert_summary']) def __init__(self, rule): super(MsTeamsAlerter, self).__init__(rule) self.ms_teams_webhook_url = self.rule['ms_teams_webhook_url'] if isinstance(self.ms_teams_webhook_url, str): self.ms_teams_webhook_url = [self.ms_teams_webhook_url] self.ms_teams_proxy = self.rule.get('ms_teams_proxy', None) self.ms_teams_alert_summary = self.rule.get('ms_teams_alert_summary', 'ElastAlert Message') self.ms_teams_alert_fixed_width = self.rule.get('ms_teams_alert_fixed_width', False) self.ms_teams_theme_color = self.rule.get('ms_teams_theme_color', '') def format_body(self, body): if self.ms_teams_alert_fixed_width: body = body.replace('`', "'") body = "```{0}```".format('```\n\n```'.join(x for x in body.split('\n'))).replace('\n``````', '') return body def alert(self, matches): body = self.create_alert_body(matches) body = self.format_body(body) # post to Teams headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.ms_teams_proxy} if self.ms_teams_proxy else None payload = { '@type': 'MessageCard', '@context': 'http://schema.org/extensions', 'summary': self.ms_teams_alert_summary, 'title': self.create_title(matches), 'text': body } if self.ms_teams_theme_color != '': payload['themeColor'] = self.ms_teams_theme_color for url in self.ms_teams_webhook_url: try: response = requests.post(url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies) response.raise_for_status() except RequestException as e: raise EAException("Error posting to ms teams: %s" % e) elastalert_logger.info("Alert sent to MS Teams") def get_info(self): return {'type': 'ms_teams', 'ms_teams_webhook_url': self.ms_teams_webhook_url} class SlackAlerter(Alerter): """ Creates a Slack room message for each alert """ required_options = frozenset(['slack_webhook_url']) def __init__(self, rule): super(SlackAlerter, self).__init__(rule) self.slack_webhook_url = self.rule['slack_webhook_url'] if isinstance(self.slack_webhook_url, str): self.slack_webhook_url = [self.slack_webhook_url] self.slack_proxy = self.rule.get('slack_proxy', None) self.slack_username_override = self.rule.get('slack_username_override', 'elastalert') self.slack_channel_override = self.rule.get('slack_channel_override', '') if isinstance(self.slack_channel_override, str): self.slack_channel_override = [self.slack_channel_override] self.slack_title_link = self.rule.get('slack_title_link', '') self.slack_title = self.rule.get('slack_title', '') self.slack_emoji_override = self.rule.get('slack_emoji_override', ':ghost:') self.slack_icon_url_override = self.rule.get('slack_icon_url_override', '') self.slack_msg_color = self.rule.get('slack_msg_color', 'danger') self.slack_parse_override = self.rule.get('slack_parse_override', 'none') self.slack_text_string = self.rule.get('slack_text_string', '') self.slack_alert_fields = self.rule.get('slack_alert_fields', '') self.slack_ignore_ssl_errors = self.rule.get('slack_ignore_ssl_errors', False) self.slack_timeout = self.rule.get('slack_timeout', 10) self.slack_ca_certs = self.rule.get('slack_ca_certs') self.slack_attach_kibana_discover_url = self.rule.get('slack_attach_kibana_discover_url', False) self.slack_kibana_discover_color = self.rule.get('slack_kibana_discover_color', '#ec4b98') self.slack_kibana_discover_title = self.rule.get('slack_kibana_discover_title', 'Discover in Kibana') def format_body(self, body): # https://api.slack.com/docs/formatting return body def get_aggregation_summary_text__maximum_width(self): width = super(SlackAlerter, self).get_aggregation_summary_text__maximum_width() # Reduced maximum width for prettier Slack display. return min(width, 75) def get_aggregation_summary_text(self, matches): text = super(SlackAlerter, self).get_aggregation_summary_text(matches) if text: text = '```\n{0}```\n'.format(text) return text def populate_fields(self, matches): alert_fields = [] for arg in self.slack_alert_fields: arg = copy.copy(arg) arg['value'] = lookup_es_key(matches[0], arg['value']) alert_fields.append(arg) return alert_fields def alert(self, matches): body = self.create_alert_body(matches) body = self.format_body(body) # post to slack headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.slack_proxy} if self.slack_proxy else None payload = { 'username': self.slack_username_override, 'parse': self.slack_parse_override, 'text': self.slack_text_string, 'attachments': [ { 'color': self.slack_msg_color, 'title': self.create_title(matches), 'text': body, 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ] } # if we have defined fields, populate noteable fields for the alert if self.slack_alert_fields != '': payload['attachments'][0]['fields'] = self.populate_fields(matches) if self.slack_icon_url_override != '': payload['icon_url'] = self.slack_icon_url_override else: payload['icon_emoji'] = self.slack_emoji_override if self.slack_title != '': payload['attachments'][0]['title'] = self.slack_title if self.slack_title_link != '': payload['attachments'][0]['title_link'] = self.slack_title_link if self.slack_attach_kibana_discover_url: kibana_discover_url = lookup_es_key(matches[0], 'kibana_discover_url') if kibana_discover_url: payload['attachments'].append({ 'color': self.slack_kibana_discover_color, 'title': self.slack_kibana_discover_title, 'title_link': kibana_discover_url }) for url in self.slack_webhook_url: for channel_override in self.slack_channel_override: try: if self.slack_ca_certs: verify = self.slack_ca_certs else: verify = self.slack_ignore_ssl_errors if self.slack_ignore_ssl_errors: requests.packages.urllib3.disable_warnings() payload['channel'] = channel_override response = requests.post( url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, verify=verify, proxies=proxies, timeout=self.slack_timeout) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to slack: %s" % e) elastalert_logger.info("Alert '%s' sent to Slack" % self.rule['name']) def get_info(self): return {'type': 'slack', 'slack_username_override': self.slack_username_override} class MattermostAlerter(Alerter): """ Creates a Mattermsot post for each alert """ required_options = frozenset(['mattermost_webhook_url']) def __init__(self, rule): super(MattermostAlerter, self).__init__(rule) # HTTP config self.mattermost_webhook_url = self.rule['mattermost_webhook_url'] if isinstance(self.mattermost_webhook_url, str): self.mattermost_webhook_url = [self.mattermost_webhook_url] self.mattermost_proxy = self.rule.get('mattermost_proxy', None) self.mattermost_ignore_ssl_errors = self.rule.get('mattermost_ignore_ssl_errors', False) # Override webhook config self.mattermost_username_override = self.rule.get('mattermost_username_override', 'elastalert') self.mattermost_channel_override = self.rule.get('mattermost_channel_override', '') self.mattermost_icon_url_override = self.rule.get('mattermost_icon_url_override', '') # Message properties self.mattermost_msg_pretext = self.rule.get('mattermost_msg_pretext', '') self.mattermost_msg_color = self.rule.get('mattermost_msg_color', 'danger') self.mattermost_msg_fields = self.rule.get('mattermost_msg_fields', '') def get_aggregation_summary_text__maximum_width(self): width = super(MattermostAlerter, self).get_aggregation_summary_text__maximum_width() # Reduced maximum width for prettier Mattermost display. return min(width, 75) def get_aggregation_summary_text(self, matches): text = super(MattermostAlerter, self).get_aggregation_summary_text(matches) if text: text = '```\n{0}```\n'.format(text) return text def populate_fields(self, matches): alert_fields = [] missing = self.rule.get('alert_missing_value', '') for field in self.mattermost_msg_fields: field = copy.copy(field) if 'args' in field: args_values = [lookup_es_key(matches[0], arg) or missing for arg in field['args']] if 'value' in field: field['value'] = field['value'].format(*args_values) else: field['value'] = "\n".join(str(arg) for arg in args_values) del(field['args']) alert_fields.append(field) return alert_fields def alert(self, matches): body = self.create_alert_body(matches) title = self.create_title(matches) # post to mattermost headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.mattermost_proxy} if self.mattermost_proxy else None payload = { 'attachments': [ { 'fallback': "{0}: {1}".format(title, self.mattermost_msg_pretext), 'color': self.mattermost_msg_color, 'title': title, 'pretext': self.mattermost_msg_pretext, 'fields': [] } ] } if self.rule.get('alert_text_type') == 'alert_text_only': payload['attachments'][0]['text'] = body else: payload['text'] = body if self.mattermost_msg_fields != '': payload['attachments'][0]['fields'] = self.populate_fields(matches) if self.mattermost_icon_url_override != '': payload['icon_url'] = self.mattermost_icon_url_override if self.mattermost_username_override != '': payload['username'] = self.mattermost_username_override if self.mattermost_channel_override != '': payload['channel'] = self.mattermost_channel_override for url in self.mattermost_webhook_url: try: if self.mattermost_ignore_ssl_errors: requests.urllib3.disable_warnings() response = requests.post( url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, verify=not self.mattermost_ignore_ssl_errors, proxies=proxies) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to Mattermost: %s" % e) elastalert_logger.info("Alert sent to Mattermost") def get_info(self): return {'type': 'mattermost', 'mattermost_username_override': self.mattermost_username_override, 'mattermost_webhook_url': self.mattermost_webhook_url} class PagerDutyAlerter(Alerter): """ Create an incident on PagerDuty for each alert """ required_options = frozenset(['pagerduty_service_key', 'pagerduty_client_name']) def __init__(self, rule): super(PagerDutyAlerter, self).__init__(rule) self.pagerduty_service_key = self.rule['pagerduty_service_key'] self.pagerduty_client_name = self.rule['pagerduty_client_name'] self.pagerduty_incident_key = self.rule.get('pagerduty_incident_key', '') self.pagerduty_incident_key_args = self.rule.get('pagerduty_incident_key_args', None) self.pagerduty_event_type = self.rule.get('pagerduty_event_type', 'trigger') self.pagerduty_proxy = self.rule.get('pagerduty_proxy', None) self.pagerduty_api_version = self.rule.get('pagerduty_api_version', 'v1') self.pagerduty_v2_payload_class = self.rule.get('pagerduty_v2_payload_class', '') self.pagerduty_v2_payload_class_args = self.rule.get('pagerduty_v2_payload_class_args', None) self.pagerduty_v2_payload_component = self.rule.get('pagerduty_v2_payload_component', '') self.pagerduty_v2_payload_component_args = self.rule.get('pagerduty_v2_payload_component_args', None) self.pagerduty_v2_payload_group = self.rule.get('pagerduty_v2_payload_group', '') self.pagerduty_v2_payload_group_args = self.rule.get('pagerduty_v2_payload_group_args', None) self.pagerduty_v2_payload_severity = self.rule.get('pagerduty_v2_payload_severity', 'critical') self.pagerduty_v2_payload_source = self.rule.get('pagerduty_v2_payload_source', 'ElastAlert') self.pagerduty_v2_payload_source_args = self.rule.get('pagerduty_v2_payload_source_args', None) if self.pagerduty_api_version == 'v2': self.url = 'https://events.pagerduty.com/v2/enqueue' else: self.url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json' def alert(self, matches): body = self.create_alert_body(matches) # post to pagerduty headers = {'content-type': 'application/json'} if self.pagerduty_api_version == 'v2': payload = { 'routing_key': self.pagerduty_service_key, 'event_action': self.pagerduty_event_type, 'dedup_key': self.get_incident_key(matches), 'client': self.pagerduty_client_name, 'payload': { 'class': self.resolve_formatted_key(self.pagerduty_v2_payload_class, self.pagerduty_v2_payload_class_args, matches), 'component': self.resolve_formatted_key(self.pagerduty_v2_payload_component, self.pagerduty_v2_payload_component_args, matches), 'group': self.resolve_formatted_key(self.pagerduty_v2_payload_group, self.pagerduty_v2_payload_group_args, matches), 'severity': self.pagerduty_v2_payload_severity, 'source': self.resolve_formatted_key(self.pagerduty_v2_payload_source, self.pagerduty_v2_payload_source_args, matches), 'summary': self.create_title(matches), 'custom_details': { 'information': body, }, }, } match_timestamp = lookup_es_key(matches[0], self.rule.get('timestamp_field', '@timestamp')) if match_timestamp: payload['payload']['timestamp'] = match_timestamp else: payload = { 'service_key': self.pagerduty_service_key, 'description': self.create_title(matches), 'event_type': self.pagerduty_event_type, 'incident_key': self.get_incident_key(matches), 'client': self.pagerduty_client_name, 'details': { "information": body, }, } # set https proxy, if it was provided proxies = {'https': self.pagerduty_proxy} if self.pagerduty_proxy else None try: response = requests.post( self.url, data=json.dumps(payload, cls=DateTimeEncoder, ensure_ascii=False), headers=headers, proxies=proxies ) response.raise_for_status() except RequestException as e: raise EAException("Error posting to pagerduty: %s" % e) if self.pagerduty_event_type == 'trigger': elastalert_logger.info("Trigger sent to PagerDuty") elif self.pagerduty_event_type == 'resolve': elastalert_logger.info("Resolve sent to PagerDuty") elif self.pagerduty_event_type == 'acknowledge': elastalert_logger.info("acknowledge sent to PagerDuty") def resolve_formatted_key(self, key, args, matches): if args: key_values = [lookup_es_key(matches[0], arg) for arg in args] # Populate values with rule level properties too for i in range(len(key_values)): if key_values[i] is None: key_value = self.rule.get(args[i]) if key_value: key_values[i] = key_value missing = self.rule.get('alert_missing_value', '') key_values = [missing if val is None else val for val in key_values] return key.format(*key_values) else: return key def get_incident_key(self, matches): if self.pagerduty_incident_key_args: incident_key_values = [lookup_es_key(matches[0], arg) for arg in self.pagerduty_incident_key_args] # Populate values with rule level properties too for i in range(len(incident_key_values)): if incident_key_values[i] is None: key_value = self.rule.get(self.pagerduty_incident_key_args[i]) if key_value: incident_key_values[i] = key_value missing = self.rule.get('alert_missing_value', '') incident_key_values = [missing if val is None else val for val in incident_key_values] return self.pagerduty_incident_key.format(*incident_key_values) else: return self.pagerduty_incident_key def get_info(self): return {'type': 'pagerduty', 'pagerduty_client_name': self.pagerduty_client_name} class PagerTreeAlerter(Alerter): """ Creates a PagerTree Incident for each alert """ required_options = frozenset(['pagertree_integration_url']) def __init__(self, rule): super(PagerTreeAlerter, self).__init__(rule) self.url = self.rule['pagertree_integration_url'] self.pagertree_proxy = self.rule.get('pagertree_proxy', None) def alert(self, matches): # post to pagertree headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.pagertree_proxy} if self.pagertree_proxy else None payload = { "event_type": "create", "Id": str(uuid.uuid4()), "Title": self.create_title(matches), "Description": self.create_alert_body(matches) } try: response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies) response.raise_for_status() except RequestException as e: raise EAException("Error posting to PagerTree: %s" % e) elastalert_logger.info("Trigger sent to PagerTree") def get_info(self): return {'type': 'pagertree', 'pagertree_integration_url': self.url} class ExotelAlerter(Alerter): required_options = frozenset(['exotel_account_sid', 'exotel_auth_token', 'exotel_to_number', 'exotel_from_number']) def __init__(self, rule): super(ExotelAlerter, self).__init__(rule) self.exotel_account_sid = self.rule['exotel_account_sid'] self.exotel_auth_token = self.rule['exotel_auth_token'] self.exotel_to_number = self.rule['exotel_to_number'] self.exotel_from_number = self.rule['exotel_from_number'] self.sms_body = self.rule.get('exotel_message_body', '') def alert(self, matches): client = Exotel(self.exotel_account_sid, self.exotel_auth_token) try: message_body = self.rule['name'] + self.sms_body response = client.sms(self.rule['exotel_from_number'], self.rule['exotel_to_number'], message_body) if response != 200: raise EAException("Error posting to Exotel, response code is %s" % response) except RequestException: raise EAException("Error posting to Exotel").with_traceback(sys.exc_info()[2]) elastalert_logger.info("Trigger sent to Exotel") def get_info(self): return {'type': 'exotel', 'exotel_account': self.exotel_account_sid} class TwilioAlerter(Alerter): required_options = frozenset(['twilio_account_sid', 'twilio_auth_token', 'twilio_to_number', 'twilio_from_number']) def __init__(self, rule): super(TwilioAlerter, self).__init__(rule) self.twilio_account_sid = self.rule['twilio_account_sid'] self.twilio_auth_token = self.rule['twilio_auth_token'] self.twilio_to_number = self.rule['twilio_to_number'] self.twilio_from_number = self.rule['twilio_from_number'] def alert(self, matches): client = TwilioClient(self.twilio_account_sid, self.twilio_auth_token) try: client.messages.create(body=self.rule['name'], to=self.twilio_to_number, from_=self.twilio_from_number) except TwilioRestException as e: raise EAException("Error posting to twilio: %s" % e) elastalert_logger.info("Trigger sent to Twilio") def get_info(self): return {'type': 'twilio', 'twilio_client_name': self.twilio_from_number} class VictorOpsAlerter(Alerter): """ Creates a VictorOps Incident for each alert """ required_options = frozenset(['victorops_api_key', 'victorops_routing_key', 'victorops_message_type']) def __init__(self, rule): super(VictorOpsAlerter, self).__init__(rule) self.victorops_api_key = self.rule['victorops_api_key'] self.victorops_routing_key = self.rule['victorops_routing_key'] self.victorops_message_type = self.rule['victorops_message_type'] self.victorops_entity_id = self.rule.get('victorops_entity_id', None) self.victorops_entity_display_name = self.rule.get('victorops_entity_display_name', 'no entity display name') self.url = 'https://alert.victorops.com/integrations/generic/20131114/alert/%s/%s' % ( self.victorops_api_key, self.victorops_routing_key) self.victorops_proxy = self.rule.get('victorops_proxy', None) def alert(self, matches): body = self.create_alert_body(matches) # post to victorops headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.victorops_proxy} if self.victorops_proxy else None payload = { "message_type": self.victorops_message_type, "entity_display_name": self.victorops_entity_display_name, "monitoring_tool": "ElastAlert", "state_message": body } if self.victorops_entity_id: payload["entity_id"] = self.victorops_entity_id try: response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies) response.raise_for_status() except RequestException as e: raise EAException("Error posting to VictorOps: %s" % e) elastalert_logger.info("Trigger sent to VictorOps") def get_info(self): return {'type': 'victorops', 'victorops_routing_key': self.victorops_routing_key} class TelegramAlerter(Alerter): """ Send a Telegram message via bot api for each alert """ required_options = frozenset(['telegram_bot_token', 'telegram_room_id']) def __init__(self, rule): super(TelegramAlerter, self).__init__(rule) self.telegram_bot_token = self.rule['telegram_bot_token'] self.telegram_room_id = self.rule['telegram_room_id'] self.telegram_api_url = self.rule.get('telegram_api_url', 'api.telegram.org') self.url = 'https://%s/bot%s/%s' % (self.telegram_api_url, self.telegram_bot_token, "sendMessage") self.telegram_proxy = self.rule.get('telegram_proxy', None) self.telegram_proxy_login = self.rule.get('telegram_proxy_login', None) self.telegram_proxy_password = self.rule.get('telegram_proxy_pass', None) def alert(self, matches): body = '⚠ *%s* ⚠ ```\n' % (self.create_title(matches)) for match in matches: body += str(BasicMatchString(self.rule, match)) # Separate text of aggregated alerts with dashes if len(matches) > 1: body += '\n----------------------------------------\n' if len(body) > 4095: body = body[0:4000] + "\n⚠ *message was cropped according to telegram limits!* ⚠" body += ' ```' headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.telegram_proxy} if self.telegram_proxy else None auth = HTTPProxyAuth(self.telegram_proxy_login, self.telegram_proxy_password) if self.telegram_proxy_login else None payload = { 'chat_id': self.telegram_room_id, 'text': body, 'parse_mode': 'markdown', 'disable_web_page_preview': True } try: response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies, auth=auth) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to Telegram: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) elastalert_logger.info( "Alert sent to Telegram room %s" % self.telegram_room_id) def get_info(self): return {'type': 'telegram', 'telegram_room_id': self.telegram_room_id} class GoogleChatAlerter(Alerter): """ Send a notification via Google Chat webhooks """ required_options = frozenset(['googlechat_webhook_url']) def __init__(self, rule): super(GoogleChatAlerter, self).__init__(rule) self.googlechat_webhook_url = self.rule['googlechat_webhook_url'] if isinstance(self.googlechat_webhook_url, str): self.googlechat_webhook_url = [self.googlechat_webhook_url] self.googlechat_format = self.rule.get('googlechat_format', 'basic') self.googlechat_header_title = self.rule.get('googlechat_header_title', None) self.googlechat_header_subtitle = self.rule.get('googlechat_header_subtitle', None) self.googlechat_header_image = self.rule.get('googlechat_header_image', None) self.googlechat_footer_kibanalink = self.rule.get('googlechat_footer_kibanalink', None) def create_header(self): header = None if self.googlechat_header_title: header = { "title": self.googlechat_header_title, "subtitle": self.googlechat_header_subtitle, "imageUrl": self.googlechat_header_image } return header def create_footer(self): footer = None if self.googlechat_footer_kibanalink: footer = {"widgets": [{ "buttons": [{ "textButton": { "text": "VISIT KIBANA", "onClick": { "openLink": { "url": self.googlechat_footer_kibanalink } } } }] }] } return footer def create_card(self, matches): card = {"cards": [{ "sections": [{ "widgets": [ {"textParagraph": {"text": self.create_alert_body(matches)}} ]} ]} ]} # Add the optional header header = self.create_header() if header: card['cards'][0]['header'] = header # Add the optional footer footer = self.create_footer() if footer: card['cards'][0]['sections'].append(footer) return card def create_basic(self, matches): body = self.create_alert_body(matches) return {'text': body} def alert(self, matches): # Format message if self.googlechat_format == 'card': message = self.create_card(matches) else: message = self.create_basic(matches) # Post to webhook headers = {'content-type': 'application/json'} for url in self.googlechat_webhook_url: try: response = requests.post(url, data=json.dumps(message), headers=headers) response.raise_for_status() except RequestException as e: raise EAException("Error posting to google chat: {}".format(e)) elastalert_logger.info("Alert sent to Google Chat!") def get_info(self): return {'type': 'googlechat', 'googlechat_webhook_url': self.googlechat_webhook_url} class GitterAlerter(Alerter): """ Creates a Gitter activity message for each alert """ required_options = frozenset(['gitter_webhook_url']) def __init__(self, rule): super(GitterAlerter, self).__init__(rule) self.gitter_webhook_url = self.rule['gitter_webhook_url'] self.gitter_proxy = self.rule.get('gitter_proxy', None) self.gitter_msg_level = self.rule.get('gitter_msg_level', 'error') def alert(self, matches): body = self.create_alert_body(matches) # post to Gitter headers = {'content-type': 'application/json'} # set https proxy, if it was provided proxies = {'https': self.gitter_proxy} if self.gitter_proxy else None payload = { 'message': body, 'level': self.gitter_msg_level } try: response = requests.post(self.gitter_webhook_url, json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies) response.raise_for_status() except RequestException as e: raise EAException("Error posting to Gitter: %s" % e) elastalert_logger.info("Alert sent to Gitter") def get_info(self): return {'type': 'gitter', 'gitter_webhook_url': self.gitter_webhook_url} class ServiceNowAlerter(Alerter): """ Creates a ServiceNow alert """ required_options = set([ 'username', 'password', 'servicenow_rest_url', 'short_description', 'comments', 'assignment_group', 'category', 'subcategory', 'cmdb_ci', 'caller_id' ]) def __init__(self, rule): super(ServiceNowAlerter, self).__init__(rule) self.servicenow_rest_url = self.rule['servicenow_rest_url'] self.servicenow_proxy = self.rule.get('servicenow_proxy', None) def alert(self, matches): for match in matches: # Parse everything into description. description = str(BasicMatchString(self.rule, match)) # Set proper headers headers = { "Content-Type": "application/json", "Accept": "application/json;charset=utf-8" } proxies = {'https': self.servicenow_proxy} if self.servicenow_proxy else None payload = { "description": description, "short_description": self.rule['short_description'], "comments": self.rule['comments'], "assignment_group": self.rule['assignment_group'], "category": self.rule['category'], "subcategory": self.rule['subcategory'], "cmdb_ci": self.rule['cmdb_ci'], "caller_id": self.rule["caller_id"] } try: response = requests.post( self.servicenow_rest_url, auth=(self.rule['username'], self.rule['password']), headers=headers, data=json.dumps(payload, cls=DateTimeEncoder), proxies=proxies ) response.raise_for_status() except RequestException as e: raise EAException("Error posting to ServiceNow: %s" % e) elastalert_logger.info("Alert sent to ServiceNow") def get_info(self): return {'type': 'ServiceNow', 'self.servicenow_rest_url': self.servicenow_rest_url} class AlertaAlerter(Alerter): """ Creates an Alerta event for each alert """ required_options = frozenset(['alerta_api_url']) def __init__(self, rule): super(AlertaAlerter, self).__init__(rule) # Setup defaul parameters self.url = self.rule.get('alerta_api_url', None) self.api_key = self.rule.get('alerta_api_key', None) self.timeout = self.rule.get('alerta_timeout', 86400) self.use_match_timestamp = self.rule.get('alerta_use_match_timestamp', False) self.use_qk_as_resource = self.rule.get('alerta_use_qk_as_resource', False) self.verify_ssl = not self.rule.get('alerta_api_skip_ssl', False) self.missing_text = self.rule.get('alert_missing_value', '') # Fill up default values of the API JSON payload self.severity = self.rule.get('alerta_severity', 'warning') self.resource = self.rule.get('alerta_resource', 'elastalert') self.environment = self.rule.get('alerta_environment', 'Production') self.origin = self.rule.get('alerta_origin', 'elastalert') self.service = self.rule.get('alerta_service', ['elastalert']) self.text = self.rule.get('alerta_text', 'elastalert') self.type = self.rule.get('alerta_type', 'elastalert') self.event = self.rule.get('alerta_event', 'elastalert') self.correlate = self.rule.get('alerta_correlate', []) self.tags = self.rule.get('alerta_tags', []) self.group = self.rule.get('alerta_group', '') self.attributes_keys = self.rule.get('alerta_attributes_keys', []) self.attributes_values = self.rule.get('alerta_attributes_values', []) self.value = self.rule.get('alerta_value', '') def alert(self, matches): # Override the resource if requested if self.use_qk_as_resource and 'query_key' in self.rule and lookup_es_key(matches[0], self.rule['query_key']): self.resource = lookup_es_key(matches[0], self.rule['query_key']) headers = {'content-type': 'application/json'} if self.api_key is not None: headers['Authorization'] = 'Key %s' % (self.rule['alerta_api_key']) alerta_payload = self.get_json_payload(matches[0]) try: response = requests.post(self.url, data=alerta_payload, headers=headers, verify=self.verify_ssl) response.raise_for_status() except RequestException as e: raise EAException("Error posting to Alerta: %s" % e) elastalert_logger.info("Alert sent to Alerta") def create_default_title(self, matches): title = '%s' % (self.rule['name']) # If the rule has a query_key, add that value if 'query_key' in self.rule: qk = matches[0].get(self.rule['query_key']) if qk: title += '.%s' % (qk) return title def get_info(self): return {'type': 'alerta', 'alerta_url': self.url} def get_json_payload(self, match): """ Builds the API Create Alert body, as in http://alerta.readthedocs.io/en/latest/api/reference.html#create-an-alert For the values that could have references to fields on the match, resolve those references. """ # Using default text and event title if not defined in rule alerta_text = self.rule['type'].get_match_str([match]) if self.text == '' else resolve_string(self.text, match, self.missing_text) alerta_event = self.create_default_title([match]) if self.event == '' else resolve_string(self.event, match, self.missing_text) match_timestamp = lookup_es_key(match, self.rule.get('timestamp_field', '@timestamp')) if match_timestamp is None: match_timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ") if self.use_match_timestamp: createTime = ts_to_dt(match_timestamp).strftime("%Y-%m-%dT%H:%M:%S.%fZ") else: createTime = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ") alerta_payload_dict = { 'resource': resolve_string(self.resource, match, self.missing_text), 'severity': self.severity, 'timeout': self.timeout, 'createTime': createTime, 'type': self.type, 'environment': resolve_string(self.environment, match, self.missing_text), 'origin': resolve_string(self.origin, match, self.missing_text), 'group': resolve_string(self.group, match, self.missing_text), 'event': alerta_event, 'text': alerta_text, 'value': resolve_string(self.value, match, self.missing_text), 'service': [resolve_string(a_service, match, self.missing_text) for a_service in self.service], 'tags': [resolve_string(a_tag, match, self.missing_text) for a_tag in self.tags], 'correlate': [resolve_string(an_event, match, self.missing_text) for an_event in self.correlate], 'attributes': dict(list(zip(self.attributes_keys, [resolve_string(a_value, match, self.missing_text) for a_value in self.attributes_values]))), 'rawData': self.create_alert_body([match]), } try: payload = json.dumps(alerta_payload_dict, cls=DateTimeEncoder) except Exception as e: raise Exception("Error building Alerta request: %s" % e) return payload class HTTPPostAlerter(Alerter): """ Requested elasticsearch indices are sent by HTTP POST. Encoded with JSON. """ def __init__(self, rule): super(HTTPPostAlerter, self).__init__(rule) post_url = self.rule.get('http_post_url') if isinstance(post_url, str): post_url = [post_url] self.post_url = post_url self.post_proxy = self.rule.get('http_post_proxy') self.post_payload = self.rule.get('http_post_payload', {}) self.post_static_payload = self.rule.get('http_post_static_payload', {}) self.post_all_values = self.rule.get('http_post_all_values', not self.post_payload) self.post_http_headers = self.rule.get('http_post_headers', {}) self.timeout = self.rule.get('http_post_timeout', 10) def alert(self, matches): """ Each match will trigger a POST to the specified endpoint(s). """ for match in matches: payload = match if self.post_all_values else {} payload.update(self.post_static_payload) for post_key, es_key in list(self.post_payload.items()): payload[post_key] = lookup_es_key(match, es_key) headers = { "Content-Type": "application/json", "Accept": "application/json;charset=utf-8" } headers.update(self.post_http_headers) proxies = {'https': self.post_proxy} if self.post_proxy else None for url in self.post_url: try: response = requests.post(url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, proxies=proxies, timeout=self.timeout) response.raise_for_status() except RequestException as e: raise EAException("Error posting HTTP Post alert: %s" % e) elastalert_logger.info("HTTP Post alert sent.") def get_info(self): return {'type': 'http_post', 'http_post_webhook_url': self.post_url} class StrideHTMLParser(HTMLParser): """Parse html into stride's fabric structure""" def __init__(self): """ Define a couple markup place holders. """ self.content = [] self.mark = None HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): """Identify and verify starting tag is fabric compatible.""" if tag == 'b' or tag == 'strong': self.mark = dict(type='strong') if tag == 'u': self.mark = dict(type='underline') if tag == 'a': self.mark = dict(type='link', attrs=dict(attrs)) def handle_endtag(self, tag): """Clear mark on endtag.""" self.mark = None def handle_data(self, data): """Construct data node for our data.""" node = dict(type='text', text=data) if self.mark: node['marks'] = [self.mark] self.content.append(node) class StrideAlerter(Alerter): """ Creates a Stride conversation message for each alert """ required_options = frozenset( ['stride_access_token', 'stride_cloud_id', 'stride_conversation_id']) def __init__(self, rule): super(StrideAlerter, self).__init__(rule) self.stride_access_token = self.rule['stride_access_token'] self.stride_cloud_id = self.rule['stride_cloud_id'] self.stride_conversation_id = self.rule['stride_conversation_id'] self.stride_ignore_ssl_errors = self.rule.get('stride_ignore_ssl_errors', False) self.stride_proxy = self.rule.get('stride_proxy', None) self.url = 'https://api.atlassian.com/site/%s/conversation/%s/message' % ( self.stride_cloud_id, self.stride_conversation_id) def alert(self, matches): body = self.create_alert_body(matches).strip() # parse body with StrideHTMLParser parser = StrideHTMLParser() parser.feed(body) # Post to Stride headers = { 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(self.stride_access_token) } # set https proxy, if it was provided proxies = {'https': self.stride_proxy} if self.stride_proxy else None # build stride json payload # https://developer.atlassian.com/cloud/stride/apis/document/structure/ payload = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': parser.content} ]} ]}} try: if self.stride_ignore_ssl_errors: requests.packages.urllib3.disable_warnings() response = requests.post( self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, verify=not self.stride_ignore_ssl_errors, proxies=proxies) warnings.resetwarnings() response.raise_for_status() except RequestException as e: raise EAException("Error posting to Stride: %s" % e) elastalert_logger.info( "Alert sent to Stride conversation %s" % self.stride_conversation_id) def get_info(self): return {'type': 'stride', 'stride_cloud_id': self.stride_cloud_id, 'stride_converstation_id': self.stride_converstation_id} class LineNotifyAlerter(Alerter): """ Created a Line Notify for each alert """ required_option = frozenset(["linenotify_access_token"]) def __init__(self, rule): super(LineNotifyAlerter, self).__init__(rule) self.linenotify_access_token = self.rule["linenotify_access_token"] def alert(self, matches): body = self.create_alert_body(matches) # post to Line Notify headers = { "Content-Type": "application/x-www-form-urlencoded", "Authorization": "Bearer {}".format(self.linenotify_access_token) } payload = { "message": body } try: response = requests.post("https://notify-api.line.me/api/notify", data=payload, headers=headers) response.raise_for_status() except RequestException as e: raise EAException("Error posting to Line Notify: %s" % e) elastalert_logger.info("Alert sent to Line Notify") def get_info(self): return {"type": "linenotify", "linenotify_access_token": self.linenotify_access_token} class HiveAlerter(Alerter): """ Use matched data to create alerts containing observables in an instance of TheHive """ required_options = set(['hive_connection', 'hive_alert_config']) def alert(self, matches): connection_details = self.rule['hive_connection'] for match in matches: context = {'rule': self.rule, 'match': match} artifacts = [] for mapping in self.rule.get('hive_observable_data_mapping', []): for observable_type, match_data_key in mapping.items(): try: match_data_keys = re.findall(r'\{match\[([^\]]*)\]', match_data_key) rule_data_keys = re.findall(r'\{rule\[([^\]]*)\]', match_data_key) data_keys = match_data_keys + rule_data_keys context_keys = list(context['match'].keys()) + list(context['rule'].keys()) if all([True if k in context_keys else False for k in data_keys]): artifact = {'tlp': 2, 'tags': [], 'message': None, 'dataType': observable_type, 'data': match_data_key.format(**context)} artifacts.append(artifact) except KeyError: raise KeyError('\nformat string\n{}\nmatch data\n{}'.format(match_data_key, context)) alert_config = { 'artifacts': artifacts, 'sourceRef': str(uuid.uuid4())[0:6], 'customFields': {}, 'caseTemplate': None, 'title': '{rule[index]}_{rule[name]}'.format(**context), 'date': int(time.time()) * 1000 } alert_config.update(self.rule.get('hive_alert_config', {})) custom_fields = {} for alert_config_field, alert_config_value in alert_config.items(): if alert_config_field == 'customFields': n = 0 for cf_key, cf_value in alert_config_value.items(): cf = {'order': n, cf_value['type']: cf_value['value'].format(**context)} n += 1 custom_fields[cf_key] = cf elif isinstance(alert_config_value, str): alert_config[alert_config_field] = alert_config_value.format(**context) elif isinstance(alert_config_value, (list, tuple)): formatted_list = [] for element in alert_config_value: try: formatted_list.append(element.format(**context)) except (AttributeError, KeyError, IndexError): formatted_list.append(element) alert_config[alert_config_field] = formatted_list if custom_fields: alert_config['customFields'] = custom_fields alert_body = json.dumps(alert_config, indent=4, sort_keys=True) req = '{}:{}/api/alert'.format(connection_details['hive_host'], connection_details['hive_port']) headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer {}'.format(connection_details.get('hive_apikey', ''))} proxies = connection_details.get('hive_proxies', {'http': '', 'https': ''}) verify = connection_details.get('hive_verify', False) response = requests.post(req, headers=headers, data=alert_body, proxies=proxies, verify=verify) if response.status_code != 201: raise Exception('alert not successfully created in TheHive\n{}'.format(response.text)) def get_info(self): return { 'type': 'hivealerter', 'hive_host': self.rule.get('hive_connection', {}).get('hive_host', '') } elastalert-0.2.4/elastalert/auth.py000066400000000000000000000041061364615736500173510ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import boto3 from aws_requests_auth.aws_auth import AWSRequestsAuth class RefeshableAWSRequestsAuth(AWSRequestsAuth): """ A class ensuring that AWS request signing uses a refreshed credential """ def __init__(self, refreshable_credential, aws_host, aws_region, aws_service): """ :param refreshable_credential: A credential class that refreshes STS or IAM Instance Profile credentials :type refreshable_credential: :class:`botocore.credentials.RefreshableCredentials` """ self.refreshable_credential = refreshable_credential self.aws_host = aws_host self.aws_region = aws_region self.service = aws_service @property def aws_access_key(self): return self.refreshable_credential.access_key @property def aws_secret_access_key(self): return self.refreshable_credential.secret_key @property def aws_token(self): return self.refreshable_credential.token class Auth(object): def __call__(self, host, username, password, aws_region, profile_name): """ Return the authorization header. :param host: Elasticsearch host. :param username: Username used for authenticating the requests to Elasticsearch. :param password: Password used for authenticating the requests to Elasticsearch. :param aws_region: AWS Region to use. Only required when signing requests. :param profile_name: AWS profile to use for connecting. Only required when signing requests. """ if username and password: return username + ':' + password if not aws_region and not os.environ.get('AWS_DEFAULT_REGION'): return None session = boto3.session.Session(profile_name=profile_name, region_name=aws_region) return RefeshableAWSRequestsAuth( refreshable_credential=session.get_credentials(), aws_host=host, aws_region=session.region_name, aws_service='es') elastalert-0.2.4/elastalert/config.py000066400000000000000000000121151364615736500176540ustar00rootroot00000000000000# -*- coding: utf-8 -*- import datetime import logging import logging.config from envparse import Env from staticconf.loader import yaml_loader from . import loaders from .util import EAException from .util import elastalert_logger from .util import get_module # Required global (config.yaml) configuration options required_globals = frozenset(['run_every', 'es_host', 'es_port', 'writeback_index', 'buffer_time']) # Settings that can be derived from ENV variables env_settings = {'ES_USE_SSL': 'use_ssl', 'ES_PASSWORD': 'es_password', 'ES_USERNAME': 'es_username', 'ES_HOST': 'es_host', 'ES_PORT': 'es_port', 'ES_URL_PREFIX': 'es_url_prefix'} env = Env(ES_USE_SSL=bool) # Used to map the names of rule loaders to their classes loader_mapping = { 'file': loaders.FileRulesLoader, } def load_conf(args, defaults=None, overwrites=None): """ Creates a conf dictionary for ElastAlerter. Loads the global config file and then each rule found in rules_folder. :param args: The parsed arguments to ElastAlert :param defaults: Dictionary of default conf values :param overwrites: Dictionary of conf values to override :return: The global configuration, a dictionary. """ filename = args.config if filename: conf = yaml_loader(filename) else: try: conf = yaml_loader('config.yaml') except FileNotFoundError: raise EAException('No --config or config.yaml found') # init logging from config and set log levels according to command line options configure_logging(args, conf) for env_var, conf_var in list(env_settings.items()): val = env(env_var, None) if val is not None: conf[conf_var] = val for key, value in (iter(defaults.items()) if defaults is not None else []): if key not in conf: conf[key] = value for key, value in (iter(overwrites.items()) if overwrites is not None else []): conf[key] = value # Make sure we have all required globals if required_globals - frozenset(list(conf.keys())): raise EAException('%s must contain %s' % (filename, ', '.join(required_globals - frozenset(list(conf.keys()))))) conf.setdefault('writeback_alias', 'elastalert_alerts') conf.setdefault('max_query_size', 10000) conf.setdefault('scroll_keepalive', '30s') conf.setdefault('max_scrolling_count', 0) conf.setdefault('disable_rules_on_error', True) conf.setdefault('scan_subdirectories', True) conf.setdefault('rules_loader', 'file') # Convert run_every, buffer_time into a timedelta object try: conf['run_every'] = datetime.timedelta(**conf['run_every']) conf['buffer_time'] = datetime.timedelta(**conf['buffer_time']) if 'alert_time_limit' in conf: conf['alert_time_limit'] = datetime.timedelta(**conf['alert_time_limit']) else: conf['alert_time_limit'] = datetime.timedelta(days=2) if 'old_query_limit' in conf: conf['old_query_limit'] = datetime.timedelta(**conf['old_query_limit']) else: conf['old_query_limit'] = datetime.timedelta(weeks=1) except (KeyError, TypeError) as e: raise EAException('Invalid time format used: %s' % e) # Initialise the rule loader and load each rule configuration rules_loader_class = loader_mapping.get(conf['rules_loader']) or get_module(conf['rules_loader']) rules_loader = rules_loader_class(conf) conf['rules_loader'] = rules_loader # Make sure we have all the required globals for the loader # Make sure we have all required globals if rules_loader.required_globals - frozenset(list(conf.keys())): raise EAException( '%s must contain %s' % (filename, ', '.join(rules_loader.required_globals - frozenset(list(conf.keys()))))) return conf def configure_logging(args, conf): # configure logging from config file if provided if 'logging' in conf: # load new logging config logging.config.dictConfig(conf['logging']) if args.verbose and args.debug: elastalert_logger.info( "Note: --debug and --verbose flags are set. --debug takes precedent." ) # re-enable INFO log level on elastalert_logger in verbose/debug mode # (but don't touch it if it is already set to INFO or below by config) if args.verbose or args.debug: if elastalert_logger.level > logging.INFO or elastalert_logger.level == logging.NOTSET: elastalert_logger.setLevel(logging.INFO) if args.debug: elastalert_logger.info( """Note: In debug mode, alerts will be logged to console but NOT actually sent. To send them but remain verbose, use --verbose instead.""" ) if not args.es_debug and 'logging' not in conf: logging.getLogger('elasticsearch').setLevel(logging.WARNING) if args.es_debug_trace: tracer = logging.getLogger('elasticsearch.trace') tracer.setLevel(logging.INFO) tracer.addHandler(logging.FileHandler(args.es_debug_trace)) elastalert-0.2.4/elastalert/create_index.py000066400000000000000000000316321364615736500210460ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import getpass import json import os import time import elasticsearch.helpers import yaml from elasticsearch import RequestsHttpConnection from elasticsearch.client import Elasticsearch from elasticsearch.client import IndicesClient from elasticsearch.exceptions import NotFoundError from envparse import Env from .auth import Auth env = Env(ES_USE_SSL=bool) def create_index_mappings(es_client, ea_index, recreate=False, old_ea_index=None): esversion = es_client.info()["version"]["number"] print("Elastic Version: " + esversion) es_index_mappings = read_es_index_mappings() if is_atleastsix(esversion) else read_es_index_mappings(5) es_index = IndicesClient(es_client) if not recreate: if es_index.exists(ea_index): print('Index ' + ea_index + ' already exists. Skipping index creation.') return None # (Re-)Create indices. if is_atleastsix(esversion): index_names = ( ea_index, ea_index + '_status', ea_index + '_silence', ea_index + '_error', ea_index + '_past', ) else: index_names = ( ea_index, ) for index_name in index_names: if es_index.exists(index_name): print('Deleting index ' + index_name + '.') try: es_index.delete(index_name) except NotFoundError: # Why does this ever occur?? It shouldn't. But it does. pass es_index.create(index_name) # To avoid a race condition. TODO: replace this with a real check time.sleep(2) if is_atleastseven(esversion): # TODO remove doc_type completely when elasicsearch client allows doc_type=None # doc_type is a deprecated feature and will be completely removed in Elasicsearch 8 es_client.indices.put_mapping(index=ea_index, doc_type='_doc', body=es_index_mappings['elastalert'], include_type_name=True) es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc', body=es_index_mappings['elastalert_status'], include_type_name=True) es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc', body=es_index_mappings['silence'], include_type_name=True) es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc', body=es_index_mappings['elastalert_error'], include_type_name=True) es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc', body=es_index_mappings['past_elastalert'], include_type_name=True) elif is_atleastsixtwo(esversion): es_client.indices.put_mapping(index=ea_index, doc_type='_doc', body=es_index_mappings['elastalert']) es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc', body=es_index_mappings['elastalert_status']) es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc', body=es_index_mappings['silence']) es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc', body=es_index_mappings['elastalert_error']) es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc', body=es_index_mappings['past_elastalert']) elif is_atleastsix(esversion): es_client.indices.put_mapping(index=ea_index, doc_type='elastalert', body=es_index_mappings['elastalert']) es_client.indices.put_mapping(index=ea_index + '_status', doc_type='elastalert_status', body=es_index_mappings['elastalert_status']) es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='silence', body=es_index_mappings['silence']) es_client.indices.put_mapping(index=ea_index + '_error', doc_type='elastalert_error', body=es_index_mappings['elastalert_error']) es_client.indices.put_mapping(index=ea_index + '_past', doc_type='past_elastalert', body=es_index_mappings['past_elastalert']) else: es_client.indices.put_mapping(index=ea_index, doc_type='elastalert', body=es_index_mappings['elastalert']) es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_status', body=es_index_mappings['elastalert_status']) es_client.indices.put_mapping(index=ea_index, doc_type='silence', body=es_index_mappings['silence']) es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_error', body=es_index_mappings['elastalert_error']) es_client.indices.put_mapping(index=ea_index, doc_type='past_elastalert', body=es_index_mappings['past_elastalert']) print('New index %s created' % ea_index) if old_ea_index: print("Copying all data from old index '{0}' to new index '{1}'".format(old_ea_index, ea_index)) # Use the defaults for chunk_size, scroll, scan_kwargs, and bulk_kwargs elasticsearch.helpers.reindex(es_client, old_ea_index, ea_index) print('Done!') def read_es_index_mappings(es_version=6): print('Reading Elastic {0} index mappings:'.format(es_version)) return { 'silence': read_es_index_mapping('silence', es_version), 'elastalert_status': read_es_index_mapping('elastalert_status', es_version), 'elastalert': read_es_index_mapping('elastalert', es_version), 'past_elastalert': read_es_index_mapping('past_elastalert', es_version), 'elastalert_error': read_es_index_mapping('elastalert_error', es_version) } def read_es_index_mapping(mapping, es_version=6): base_path = os.path.abspath(os.path.dirname(__file__)) mapping_path = 'es_mappings/{0}/{1}.json'.format(es_version, mapping) path = os.path.join(base_path, mapping_path) with open(path, 'r') as f: print("Reading index mapping '{0}'".format(mapping_path)) return json.load(f) def is_atleastsix(es_version): return int(es_version.split(".")[0]) >= 6 def is_atleastsixtwo(es_version): major, minor = list(map(int, es_version.split(".")[:2])) return major > 6 or (major == 6 and minor >= 2) def is_atleastseven(es_version): return int(es_version.split(".")[0]) >= 7 def main(): parser = argparse.ArgumentParser() parser.add_argument('--host', default=os.environ.get('ES_HOST', None), help='Elasticsearch host') parser.add_argument('--port', default=os.environ.get('ES_PORT', None), type=int, help='Elasticsearch port') parser.add_argument('--username', default=os.environ.get('ES_USERNAME', None), help='Elasticsearch username') parser.add_argument('--password', default=os.environ.get('ES_PASSWORD', None), help='Elasticsearch password') parser.add_argument('--url-prefix', help='Elasticsearch URL prefix') parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth') parser.add_argument('--ssl', action='store_true', default=env('ES_USE_SSL', None), help='Use TLS') parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use TLS') parser.add_argument('--verify-certs', action='store_true', default=None, help='Verify TLS certificates') parser.add_argument('--no-verify-certs', dest='verify_certs', action='store_false', help='Do not verify TLS certificates') parser.add_argument('--index', help='Index name to create') parser.add_argument('--alias', help='Alias name to create') parser.add_argument('--old-index', help='Old index name to copy') parser.add_argument('--send_get_body_as', default='GET', help='Method for querying Elasticsearch - POST, GET or source') parser.add_argument( '--boto-profile', default=None, dest='profile', help='DEPRECATED: (use --profile) Boto profile to use for signing requests') parser.add_argument( '--profile', default=None, help='AWS profile to use for signing requests. Optionally use the AWS_DEFAULT_PROFILE environment variable') parser.add_argument( '--aws-region', default=None, help='AWS Region to use for signing requests. Optionally use the AWS_DEFAULT_REGION environment variable') parser.add_argument('--timeout', default=60, type=int, help='Elasticsearch request timeout') parser.add_argument('--config', default='config.yaml', help='Global config file (default: config.yaml)') parser.add_argument('--recreate', type=bool, default=False, help='Force re-creation of the index (this will cause data loss).') args = parser.parse_args() if os.path.isfile(args.config): filename = args.config elif os.path.isfile('../config.yaml'): filename = '../config.yaml' else: filename = '' if filename: with open(filename) as config_file: data = yaml.load(config_file, Loader=yaml.FullLoader) host = args.host if args.host else data.get('es_host') port = args.port if args.port else data.get('es_port') username = args.username if args.username else data.get('es_username') password = args.password if args.password else data.get('es_password') url_prefix = args.url_prefix if args.url_prefix is not None else data.get('es_url_prefix', '') use_ssl = args.ssl if args.ssl is not None else data.get('use_ssl') verify_certs = args.verify_certs if args.verify_certs is not None else data.get('verify_certs') is not False aws_region = data.get('aws_region', None) send_get_body_as = data.get('send_get_body_as', 'GET') ca_certs = data.get('ca_certs') client_cert = data.get('client_cert') client_key = data.get('client_key') index = args.index if args.index is not None else data.get('writeback_index') alias = args.alias if args.alias is not None else data.get('writeback_alias') old_index = args.old_index if args.old_index is not None else None else: username = args.username if args.username else None password = args.password if args.password else None aws_region = args.aws_region host = args.host if args.host else input('Enter Elasticsearch host: ') port = args.port if args.port else int(input('Enter Elasticsearch port: ')) use_ssl = (args.ssl if args.ssl is not None else input('Use SSL? t/f: ').lower() in ('t', 'true')) if use_ssl: verify_certs = (args.verify_certs if args.verify_certs is not None else input('Verify TLS certificates? t/f: ').lower() not in ('f', 'false')) else: verify_certs = True if args.no_auth is None and username is None: username = input('Enter optional basic-auth username (or leave blank): ') password = getpass.getpass('Enter optional basic-auth password (or leave blank): ') url_prefix = (args.url_prefix if args.url_prefix is not None else input('Enter optional Elasticsearch URL prefix (prepends a string to the URL of every request): ')) send_get_body_as = args.send_get_body_as ca_certs = None client_cert = None client_key = None index = args.index if args.index is not None else input('New index name? (Default elastalert_status) ') if not index: index = 'elastalert_status' alias = args.alias if args.alias is not None else input('New alias name? (Default elastalert_alerts) ') if not alias: alias = 'elastalert_alias' old_index = (args.old_index if args.old_index is not None else input('Name of existing index to copy? (Default None) ')) timeout = args.timeout auth = Auth() http_auth = auth(host=host, username=username, password=password, aws_region=aws_region, profile_name=args.profile) es = Elasticsearch( host=host, port=port, timeout=timeout, use_ssl=use_ssl, verify_certs=verify_certs, connection_class=RequestsHttpConnection, http_auth=http_auth, url_prefix=url_prefix, send_get_body_as=send_get_body_as, client_cert=client_cert, ca_certs=ca_certs, client_key=client_key) create_index_mappings(es_client=es, ea_index=index, recreate=args.recreate, old_ea_index=old_index) if __name__ == '__main__': main() elastalert-0.2.4/elastalert/elastalert.py000077500000000000000000002672771364615736500205770ustar00rootroot00000000000000# -*- coding: utf-8 -*- import argparse import copy import datetime import json import logging import os import random import signal import sys import threading import time import timeit import traceback from email.mime.text import MIMEText from smtplib import SMTP from smtplib import SMTPException from socket import error import dateutil.tz import pytz from apscheduler.schedulers.background import BackgroundScheduler from croniter import croniter from elasticsearch.exceptions import ConnectionError from elasticsearch.exceptions import ElasticsearchException from elasticsearch.exceptions import NotFoundError from elasticsearch.exceptions import TransportError from . import kibana from .alerts import DebugAlerter from .config import load_conf from .enhancements import DropMatchException from .kibana_discover import generate_kibana_discover_url from .ruletypes import FlatlineRule from .util import add_raw_postfix from .util import cronite_datetime_to_timestamp from .util import dt_to_ts from .util import dt_to_unix from .util import EAException from .util import elastalert_logger from .util import elasticsearch_client from .util import format_index from .util import lookup_es_key from .util import parse_deadline from .util import parse_duration from .util import pretty_ts from .util import replace_dots_in_field_names from .util import seconds from .util import set_es_key from .util import should_scrolling_continue from .util import total_seconds from .util import ts_add from .util import ts_now from .util import ts_to_dt from .util import unix_to_dt class ElastAlerter(object): """ The main ElastAlert runner. This class holds all state about active rules, controls when queries are run, and passes information between rules and alerts. :param args: An argparse arguments instance. Should contain debug and start :param conf: The configuration dictionary. At the top level, this contains global options, and under 'rules', contains all state relating to rules and alerts. In each rule in conf['rules'], the RuleType and Alerter instances live under 'type' and 'alerts', respectively. The conf dictionary should not be passed directly from a configuration file, but must be populated by config.py:load_rules instead. """ thread_data = threading.local() def parse_args(self, args): parser = argparse.ArgumentParser() parser.add_argument( '--config', action='store', dest='config', default="config.yaml", help='Global config file (default: config.yaml)') parser.add_argument('--debug', action='store_true', dest='debug', help='Suppresses alerts and prints information instead. ' 'Not compatible with `--verbose`') parser.add_argument('--rule', dest='rule', help='Run only a specific rule (by filename, must still be in rules folder)') parser.add_argument('--silence', dest='silence', help='Silence rule for a time period. Must be used with --rule. Usage: ' '--silence =, eg. --silence hours=2') parser.add_argument('--start', dest='start', help='YYYY-MM-DDTHH:MM:SS Start querying from this timestamp. ' 'Use "NOW" to start from current time. (Default: present)') parser.add_argument('--end', dest='end', help='YYYY-MM-DDTHH:MM:SS Query to this timestamp. (Default: present)') parser.add_argument('--verbose', action='store_true', dest='verbose', help='Increase verbosity without suppressing alerts. ' 'Not compatible with `--debug`') parser.add_argument('--patience', action='store', dest='timeout', type=parse_duration, default=datetime.timedelta(), help='Maximum time to wait for ElasticSearch to become responsive. Usage: ' '--patience =. e.g. --patience minutes=5') parser.add_argument( '--pin_rules', action='store_true', dest='pin_rules', help='Stop ElastAlert from monitoring config file changes') parser.add_argument('--es_debug', action='store_true', dest='es_debug', help='Enable verbose logging from Elasticsearch queries') parser.add_argument( '--es_debug_trace', action='store', dest='es_debug_trace', help='Enable logging from Elasticsearch queries as curl command. Queries will be logged to file. Note that ' 'this will incorrectly display localhost:9200 as the host/port') self.args = parser.parse_args(args) def __init__(self, args): self.es_clients = {} self.parse_args(args) self.debug = self.args.debug self.verbose = self.args.verbose if self.verbose and self.debug: elastalert_logger.info( "Note: --debug and --verbose flags are set. --debug takes precedent." ) if self.verbose or self.debug: elastalert_logger.setLevel(logging.INFO) if self.debug: elastalert_logger.info( """Note: In debug mode, alerts will be logged to console but NOT actually sent. To send them but remain verbose, use --verbose instead.""" ) if not self.args.es_debug: logging.getLogger('elasticsearch').setLevel(logging.WARNING) if self.args.es_debug_trace: tracer = logging.getLogger('elasticsearch.trace') tracer.setLevel(logging.INFO) tracer.addHandler(logging.FileHandler(self.args.es_debug_trace)) self.conf = load_conf(self.args) self.rules_loader = self.conf['rules_loader'] self.rules = self.rules_loader.load(self.conf, self.args) print(len(self.rules), 'rules loaded') self.max_query_size = self.conf['max_query_size'] self.scroll_keepalive = self.conf['scroll_keepalive'] self.writeback_index = self.conf['writeback_index'] self.writeback_alias = self.conf['writeback_alias'] self.run_every = self.conf['run_every'] self.alert_time_limit = self.conf['alert_time_limit'] self.old_query_limit = self.conf['old_query_limit'] self.disable_rules_on_error = self.conf['disable_rules_on_error'] self.notify_email = self.conf.get('notify_email', []) self.from_addr = self.conf.get('from_addr', 'ElastAlert') self.smtp_host = self.conf.get('smtp_host', 'localhost') self.max_aggregation = self.conf.get('max_aggregation', 10000) self.buffer_time = self.conf['buffer_time'] self.silence_cache = {} self.rule_hashes = self.rules_loader.get_hashes(self.conf, self.args.rule) self.starttime = self.args.start self.disabled_rules = [] self.replace_dots_in_field_names = self.conf.get('replace_dots_in_field_names', False) self.thread_data.num_hits = 0 self.thread_data.num_dupes = 0 self.scheduler = BackgroundScheduler() self.string_multi_field_name = self.conf.get('string_multi_field_name', False) self.add_metadata_alert = self.conf.get('add_metadata_alert', False) self.show_disabled_rules = self.conf.get('show_disabled_rules', True) self.writeback_es = elasticsearch_client(self.conf) remove = [] for rule in self.rules: if not self.init_rule(rule): remove.append(rule) list(map(self.rules.remove, remove)) if self.args.silence: self.silence() @staticmethod def get_index(rule, starttime=None, endtime=None): """ Gets the index for a rule. If strftime is set and starttime and endtime are provided, it will return a comma seperated list of indices. If strftime is set but starttime and endtime are not provided, it will replace all format tokens with a wildcard. """ index = rule['index'] add_extra = rule.get('search_extra_index', False) if rule.get('use_strftime_index'): if starttime and endtime: return format_index(index, starttime, endtime, add_extra) else: # Replace the substring containing format characters with a * format_start = index.find('%') format_end = index.rfind('%') + 2 return index[:format_start] + '*' + index[format_end:] else: return index @staticmethod def get_query(filters, starttime=None, endtime=None, sort=True, timestamp_field='@timestamp', to_ts_func=dt_to_ts, desc=False, five=False): """ Returns a query dict that will apply a list of filters, filter by start and end time, and sort results by timestamp. :param filters: A list of Elasticsearch filters to use. :param starttime: A timestamp to use as the start time of the query. :param endtime: A timestamp to use as the end time of the query. :param sort: If true, sort results by timestamp. (Default True) :return: A query dictionary to pass to Elasticsearch. """ starttime = to_ts_func(starttime) endtime = to_ts_func(endtime) filters = copy.copy(filters) es_filters = {'filter': {'bool': {'must': filters}}} if starttime and endtime: es_filters['filter']['bool']['must'].insert(0, {'range': {timestamp_field: {'gt': starttime, 'lte': endtime}}}) if five: query = {'query': {'bool': es_filters}} else: query = {'query': {'filtered': es_filters}} if sort: query['sort'] = [{timestamp_field: {'order': 'desc' if desc else 'asc'}}] return query def get_terms_query(self, query, rule, size, field, five=False): """ Takes a query generated by get_query and outputs a aggregation query """ query_element = query['query'] if 'sort' in query_element: query_element.pop('sort') if not five: query_element['filtered'].update({'aggs': {'counts': {'terms': {'field': field, 'size': size, 'min_doc_count': rule.get('min_doc_count', 1)}}}}) aggs_query = {'aggs': query_element} else: aggs_query = query aggs_query['aggs'] = {'counts': {'terms': {'field': field, 'size': size, 'min_doc_count': rule.get('min_doc_count', 1)}}} return aggs_query def get_aggregation_query(self, query, rule, query_key, terms_size, timestamp_field='@timestamp'): """ Takes a query generated by get_query and outputs a aggregation query """ query_element = query['query'] if 'sort' in query_element: query_element.pop('sort') metric_agg_element = rule['aggregation_query_element'] bucket_interval_period = rule.get('bucket_interval_period') if bucket_interval_period is not None: aggs_element = { 'interval_aggs': { 'date_histogram': { 'field': timestamp_field, 'interval': bucket_interval_period}, 'aggs': metric_agg_element } } if rule.get('bucket_offset_delta'): aggs_element['interval_aggs']['date_histogram']['offset'] = '+%ss' % (rule['bucket_offset_delta']) else: aggs_element = metric_agg_element if query_key is not None: for idx, key in reversed(list(enumerate(query_key.split(',')))): aggs_element = {'bucket_aggs': {'terms': {'field': key, 'size': terms_size, 'min_doc_count': rule.get('min_doc_count', 1)}, 'aggs': aggs_element}} if not rule['five']: query_element['filtered'].update({'aggs': aggs_element}) aggs_query = {'aggs': query_element} else: aggs_query = query aggs_query['aggs'] = aggs_element return aggs_query def get_index_start(self, index, timestamp_field='@timestamp'): """ Query for one result sorted by timestamp to find the beginning of the index. :param index: The index of which to find the earliest event. :return: Timestamp of the earliest event. """ query = {'sort': {timestamp_field: {'order': 'asc'}}} try: if self.thread_data.current_es.is_atleastsixsix(): res = self.thread_data.current_es.search(index=index, size=1, body=query, _source_includes=[timestamp_field], ignore_unavailable=True) else: res = self.thread_data.current_es.search(index=index, size=1, body=query, _source_include=[timestamp_field], ignore_unavailable=True) except ElasticsearchException as e: self.handle_error("Elasticsearch query error: %s" % (e), {'index': index, 'query': query}) return '1969-12-30T00:00:00Z' if len(res['hits']['hits']) == 0: # Index is completely empty, return a date before the epoch return '1969-12-30T00:00:00Z' return res['hits']['hits'][0][timestamp_field] @staticmethod def process_hits(rule, hits): """ Update the _source field for each hit received from ES based on the rule configuration. This replaces timestamps with datetime objects, folds important fields into _source and creates compound query_keys. :return: A list of processed _source dictionaries. """ processed_hits = [] for hit in hits: # Merge fields and _source hit.setdefault('_source', {}) for key, value in list(hit.get('fields', {}).items()): # Fields are returned as lists, assume any with length 1 are not arrays in _source # Except sometimes they aren't lists. This is dependent on ES version hit['_source'].setdefault(key, value[0] if type(value) is list and len(value) == 1 else value) # Convert the timestamp to a datetime ts = lookup_es_key(hit['_source'], rule['timestamp_field']) if not ts and not rule["_source_enabled"]: raise EAException( "Error: No timestamp was found for hit. '_source_enabled' is set to false, check your mappings for stored fields" ) set_es_key(hit['_source'], rule['timestamp_field'], rule['ts_to_dt'](ts)) set_es_key(hit, rule['timestamp_field'], lookup_es_key(hit['_source'], rule['timestamp_field'])) # Tack metadata fields into _source for field in ['_id', '_index', '_type']: if field in hit: hit['_source'][field] = hit[field] if rule.get('compound_query_key'): values = [lookup_es_key(hit['_source'], key) for key in rule['compound_query_key']] hit['_source'][rule['query_key']] = ', '.join([str(value) for value in values]) if rule.get('compound_aggregation_key'): values = [lookup_es_key(hit['_source'], key) for key in rule['compound_aggregation_key']] hit['_source'][rule['aggregation_key']] = ', '.join([str(value) for value in values]) processed_hits.append(hit['_source']) return processed_hits def get_hits(self, rule, starttime, endtime, index, scroll=False): """ Query Elasticsearch for the given rule and return the results. :param rule: The rule configuration. :param starttime: The earliest time to query. :param endtime: The latest time to query. :return: A list of hits, bounded by rule['max_query_size'] (or self.max_query_size). """ query = self.get_query( rule['filter'], starttime, endtime, timestamp_field=rule['timestamp_field'], to_ts_func=rule['dt_to_ts'], five=rule['five'], ) if self.thread_data.current_es.is_atleastsixsix(): extra_args = {'_source_includes': rule['include']} else: extra_args = {'_source_include': rule['include']} scroll_keepalive = rule.get('scroll_keepalive', self.scroll_keepalive) if not rule.get('_source_enabled'): if rule['five']: query['stored_fields'] = rule['include'] else: query['fields'] = rule['include'] extra_args = {} try: if scroll: res = self.thread_data.current_es.scroll(scroll_id=rule['scroll_id'], scroll=scroll_keepalive) else: res = self.thread_data.current_es.search( scroll=scroll_keepalive, index=index, size=rule.get('max_query_size', self.max_query_size), body=query, ignore_unavailable=True, **extra_args ) if '_scroll_id' in res: rule['scroll_id'] = res['_scroll_id'] if self.thread_data.current_es.is_atleastseven(): self.thread_data.total_hits = int(res['hits']['total']['value']) else: self.thread_data.total_hits = int(res['hits']['total']) if len(res.get('_shards', {}).get('failures', [])) > 0: try: errs = [e['reason']['reason'] for e in res['_shards']['failures'] if 'Failed to parse' in e['reason']['reason']] if len(errs): raise ElasticsearchException(errs) except (TypeError, KeyError): # Different versions of ES have this formatted in different ways. Fallback to str-ing the whole thing raise ElasticsearchException(str(res['_shards']['failures'])) logging.debug(str(res)) except ElasticsearchException as e: # Elasticsearch sometimes gives us GIGANTIC error messages # (so big that they will fill the entire terminal buffer) if len(str(e)) > 1024: e = str(e)[:1024] + '... (%d characters removed)' % (len(str(e)) - 1024) self.handle_error('Error running query: %s' % (e), {'rule': rule['name'], 'query': query}) return None hits = res['hits']['hits'] self.thread_data.num_hits += len(hits) lt = rule.get('use_local_time') status_log = "Queried rule %s from %s to %s: %s / %s hits" % ( rule['name'], pretty_ts(starttime, lt), pretty_ts(endtime, lt), self.thread_data.num_hits, len(hits) ) if self.thread_data.total_hits > rule.get('max_query_size', self.max_query_size): elastalert_logger.info("%s (scrolling..)" % status_log) else: elastalert_logger.info(status_log) hits = self.process_hits(rule, hits) # Record doc_type for use in get_top_counts if 'doc_type' not in rule and len(hits): rule['doc_type'] = hits[0]['_type'] return hits def get_hits_count(self, rule, starttime, endtime, index): """ Query Elasticsearch for the count of results and returns a list of timestamps equal to the endtime. This allows the results to be passed to rules which expect an object for each hit. :param rule: The rule configuration dictionary. :param starttime: The earliest time to query. :param endtime: The latest time to query. :return: A dictionary mapping timestamps to number of hits for that time period. """ query = self.get_query( rule['filter'], starttime, endtime, timestamp_field=rule['timestamp_field'], sort=False, to_ts_func=rule['dt_to_ts'], five=rule['five'] ) try: res = self.thread_data.current_es.count(index=index, doc_type=rule['doc_type'], body=query, ignore_unavailable=True) except ElasticsearchException as e: # Elasticsearch sometimes gives us GIGANTIC error messages # (so big that they will fill the entire terminal buffer) if len(str(e)) > 1024: e = str(e)[:1024] + '... (%d characters removed)' % (len(str(e)) - 1024) self.handle_error('Error running count query: %s' % (e), {'rule': rule['name'], 'query': query}) return None self.thread_data.num_hits += res['count'] lt = rule.get('use_local_time') elastalert_logger.info( "Queried rule %s from %s to %s: %s hits" % (rule['name'], pretty_ts(starttime, lt), pretty_ts(endtime, lt), res['count']) ) return {endtime: res['count']} def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=None): rule_filter = copy.copy(rule['filter']) if qk: qk_list = qk.split(",") end = None if rule['five']: end = '.keyword' else: end = '.raw' if len(qk_list) == 1: qk = qk_list[0] filter_key = rule['query_key'] if rule.get('raw_count_keys', True) and not rule['query_key'].endswith(end): filter_key = add_raw_postfix(filter_key, rule['five']) rule_filter.extend([{'term': {filter_key: qk}}]) else: filter_keys = rule['compound_query_key'] for i in range(len(filter_keys)): key_with_postfix = filter_keys[i] if rule.get('raw_count_keys', True) and not key.endswith(end): key_with_postfix = add_raw_postfix(key_with_postfix, rule['five']) rule_filter.extend([{'term': {key_with_postfix: qk_list[i]}}]) base_query = self.get_query( rule_filter, starttime, endtime, timestamp_field=rule['timestamp_field'], sort=False, to_ts_func=rule['dt_to_ts'], five=rule['five'] ) if size is None: size = rule.get('terms_size', 50) query = self.get_terms_query(base_query, rule, size, key, rule['five']) try: if not rule['five']: res = self.thread_data.current_es.deprecated_search( index=index, doc_type=rule['doc_type'], body=query, search_type='count', ignore_unavailable=True ) else: res = self.thread_data.current_es.deprecated_search(index=index, doc_type=rule['doc_type'], body=query, size=0, ignore_unavailable=True) except ElasticsearchException as e: # Elasticsearch sometimes gives us GIGANTIC error messages # (so big that they will fill the entire terminal buffer) if len(str(e)) > 1024: e = str(e)[:1024] + '... (%d characters removed)' % (len(str(e)) - 1024) self.handle_error('Error running terms query: %s' % (e), {'rule': rule['name'], 'query': query}) return None if 'aggregations' not in res: return {} if not rule['five']: buckets = res['aggregations']['filtered']['counts']['buckets'] else: buckets = res['aggregations']['counts']['buckets'] self.thread_data.num_hits += len(buckets) lt = rule.get('use_local_time') elastalert_logger.info( 'Queried rule %s from %s to %s: %s buckets' % (rule['name'], pretty_ts(starttime, lt), pretty_ts(endtime, lt), len(buckets)) ) return {endtime: buckets} def get_hits_aggregation(self, rule, starttime, endtime, index, query_key, term_size=None): rule_filter = copy.copy(rule['filter']) base_query = self.get_query( rule_filter, starttime, endtime, timestamp_field=rule['timestamp_field'], sort=False, to_ts_func=rule['dt_to_ts'], five=rule['five'] ) if term_size is None: term_size = rule.get('terms_size', 50) query = self.get_aggregation_query(base_query, rule, query_key, term_size, rule['timestamp_field']) try: if not rule['five']: res = self.thread_data.current_es.deprecated_search( index=index, doc_type=rule.get('doc_type'), body=query, search_type='count', ignore_unavailable=True ) else: res = self.thread_data.current_es.deprecated_search(index=index, doc_type=rule.get('doc_type'), body=query, size=0, ignore_unavailable=True) except ElasticsearchException as e: if len(str(e)) > 1024: e = str(e)[:1024] + '... (%d characters removed)' % (len(str(e)) - 1024) self.handle_error('Error running query: %s' % (e), {'rule': rule['name']}) return None if 'aggregations' not in res: return {} if not rule['five']: payload = res['aggregations']['filtered'] else: payload = res['aggregations'] if self.thread_data.current_es.is_atleastseven(): self.thread_data.num_hits += res['hits']['total']['value'] else: self.thread_data.num_hits += res['hits']['total'] return {endtime: payload} def remove_duplicate_events(self, data, rule): new_events = [] for event in data: if event['_id'] in rule['processed_hits']: continue # Remember the new data's IDs rule['processed_hits'][event['_id']] = lookup_es_key(event, rule['timestamp_field']) new_events.append(event) return new_events def remove_old_events(self, rule): # Anything older than the buffer time we can forget now = ts_now() remove = [] buffer_time = rule.get('buffer_time', self.buffer_time) if rule.get('query_delay'): buffer_time += rule['query_delay'] for _id, timestamp in rule['processed_hits'].items(): if now - timestamp > buffer_time: remove.append(_id) list(map(rule['processed_hits'].pop, remove)) def run_query(self, rule, start=None, end=None, scroll=False): """ Query for the rule and pass all of the results to the RuleType instance. :param rule: The rule configuration. :param start: The earliest time to query. :param end: The latest time to query. Returns True on success and False on failure. """ if start is None: start = self.get_index_start(rule['index']) if end is None: end = ts_now() # Reset hit counter and query rule_inst = rule['type'] rule['scrolling_cycle'] = rule.get('scrolling_cycle', 0) + 1 index = self.get_index(rule, start, end) if rule.get('use_count_query'): data = self.get_hits_count(rule, start, end, index) elif rule.get('use_terms_query'): data = self.get_hits_terms(rule, start, end, index, rule['query_key']) elif rule.get('aggregation_query_element'): data = self.get_hits_aggregation(rule, start, end, index, rule.get('query_key', None)) else: data = self.get_hits(rule, start, end, index, scroll) if data: old_len = len(data) data = self.remove_duplicate_events(data, rule) self.thread_data.num_dupes += old_len - len(data) # There was an exception while querying if data is None: return False elif data: if rule.get('use_count_query'): rule_inst.add_count_data(data) elif rule.get('use_terms_query'): rule_inst.add_terms_data(data) elif rule.get('aggregation_query_element'): rule_inst.add_aggregation_data(data) else: rule_inst.add_data(data) try: if rule.get('scroll_id') and self.thread_data.num_hits < self.thread_data.total_hits and should_scrolling_continue(rule): self.run_query(rule, start, end, scroll=True) except RuntimeError: # It's possible to scroll far enough to hit max recursive depth pass if 'scroll_id' in rule: scroll_id = rule.pop('scroll_id') try: self.thread_data.current_es.clear_scroll(scroll_id=scroll_id) except NotFoundError: pass return True def get_starttime(self, rule): """ Query ES for the last time we ran this rule. :param rule: The rule configuration. :return: A timestamp or None. """ sort = {'sort': {'@timestamp': {'order': 'desc'}}} query = {'filter': {'term': {'rule_name': '%s' % (rule['name'])}}} if self.writeback_es.is_atleastfive(): query = {'query': {'bool': query}} query.update(sort) try: doc_type = 'elastalert_status' index = self.writeback_es.resolve_writeback_index(self.writeback_index, doc_type) if self.writeback_es.is_atleastsixtwo(): if self.writeback_es.is_atleastsixsix(): res = self.writeback_es.search(index=index, size=1, body=query, _source_includes=['endtime', 'rule_name']) else: res = self.writeback_es.search(index=index, size=1, body=query, _source_include=['endtime', 'rule_name']) else: res = self.writeback_es.deprecated_search(index=index, doc_type=doc_type, size=1, body=query, _source_include=['endtime', 'rule_name']) if res['hits']['hits']: endtime = ts_to_dt(res['hits']['hits'][0]['_source']['endtime']) if ts_now() - endtime < self.old_query_limit: return endtime else: elastalert_logger.info("Found expired previous run for %s at %s" % (rule['name'], endtime)) return None except (ElasticsearchException, KeyError) as e: self.handle_error('Error querying for last run: %s' % (e), {'rule': rule['name']}) def set_starttime(self, rule, endtime): """ Given a rule and an endtime, sets the appropriate starttime for it. """ # This means we are starting fresh if 'starttime' not in rule: if not rule.get('scan_entire_timeframe'): # Try to get the last run from Elasticsearch last_run_end = self.get_starttime(rule) if last_run_end: rule['starttime'] = last_run_end self.adjust_start_time_for_overlapping_agg_query(rule) self.adjust_start_time_for_interval_sync(rule, endtime) rule['minimum_starttime'] = rule['starttime'] return None # Use buffer for normal queries, or run_every increments otherwise # or, if scan_entire_timeframe, use timeframe if not rule.get('use_count_query') and not rule.get('use_terms_query'): if not rule.get('scan_entire_timeframe'): buffer_time = rule.get('buffer_time', self.buffer_time) buffer_delta = endtime - buffer_time else: buffer_delta = endtime - rule['timeframe'] # If we started using a previous run, don't go past that if 'minimum_starttime' in rule and rule['minimum_starttime'] > buffer_delta: rule['starttime'] = rule['minimum_starttime'] # If buffer_time doesn't bring us past the previous endtime, use that instead elif 'previous_endtime' in rule and rule['previous_endtime'] < buffer_delta: rule['starttime'] = rule['previous_endtime'] self.adjust_start_time_for_overlapping_agg_query(rule) else: rule['starttime'] = buffer_delta self.adjust_start_time_for_interval_sync(rule, endtime) else: if not rule.get('scan_entire_timeframe'): # Query from the end of the last run, if it exists, otherwise a run_every sized window rule['starttime'] = rule.get('previous_endtime', endtime - self.run_every) else: rule['starttime'] = rule.get('previous_endtime', endtime - rule['timeframe']) def adjust_start_time_for_overlapping_agg_query(self, rule): if rule.get('aggregation_query_element'): if rule.get('allow_buffer_time_overlap') and not rule.get('use_run_every_query_size') and ( rule['buffer_time'] > rule['run_every']): rule['starttime'] = rule['starttime'] - (rule['buffer_time'] - rule['run_every']) rule['original_starttime'] = rule['starttime'] def adjust_start_time_for_interval_sync(self, rule, endtime): # If aggregation query adjust bucket offset if rule.get('aggregation_query_element'): if rule.get('bucket_interval'): es_interval_delta = rule.get('bucket_interval_timedelta') unix_starttime = dt_to_unix(rule['starttime']) es_interval_delta_in_sec = total_seconds(es_interval_delta) offset = int(unix_starttime % es_interval_delta_in_sec) if rule.get('sync_bucket_interval'): rule['starttime'] = unix_to_dt(unix_starttime - offset) endtime = unix_to_dt(dt_to_unix(endtime) - offset) else: rule['bucket_offset_delta'] = offset def get_segment_size(self, rule): """ The segment size is either buffer_size for queries which can overlap or run_every for queries which must be strictly separate. This mimicks the query size for when ElastAlert is running continuously. """ if not rule.get('use_count_query') and not rule.get('use_terms_query') and not rule.get('aggregation_query_element'): return rule.get('buffer_time', self.buffer_time) elif rule.get('aggregation_query_element'): if rule.get('use_run_every_query_size'): return self.run_every else: return rule.get('buffer_time', self.buffer_time) else: return self.run_every def get_query_key_value(self, rule, match): # get the value for the match's query_key (or none) to form the key used for the silence_cache. # Flatline ruletype sets "key" instead of the actual query_key if isinstance(rule['type'], FlatlineRule) and 'key' in match: return str(match['key']) return self.get_named_key_value(rule, match, 'query_key') def get_aggregation_key_value(self, rule, match): # get the value for the match's aggregation_key (or none) to form the key used for grouped aggregates. return self.get_named_key_value(rule, match, 'aggregation_key') def get_named_key_value(self, rule, match, key_name): # search the match for the key specified in the rule to get the value if key_name in rule: try: key_value = lookup_es_key(match, rule[key_name]) if key_value is not None: # Only do the unicode conversion if we actually found something) # otherwise we might transform None --> 'None' key_value = str(key_value) except KeyError: # Some matches may not have the specified key # use a special token for these key_value = '_missing' else: key_value = None return key_value def enhance_filter(self, rule): """ If there is a blacklist or whitelist in rule then we add it to the filter. It adds it as a query_string. If there is already an query string its is appended with blacklist or whitelist. :param rule: :return: """ if not rule.get('filter_by_list', True): return if 'blacklist' in rule: listname = 'blacklist' elif 'whitelist' in rule: listname = 'whitelist' else: return filters = rule['filter'] additional_terms = [] for term in rule[listname]: if not term.startswith('/') or not term.endswith('/'): additional_terms.append(rule['compare_key'] + ':"' + term + '"') else: # These are regular expressions and won't work if they are quoted additional_terms.append(rule['compare_key'] + ':' + term) if listname == 'whitelist': query = "NOT " + " AND NOT ".join(additional_terms) else: query = " OR ".join(additional_terms) query_str_filter = {'query_string': {'query': query}} if self.writeback_es.is_atleastfive(): filters.append(query_str_filter) else: filters.append({'query': query_str_filter}) logging.debug("Enhanced filter with {} terms: {}".format(listname, str(query_str_filter))) def run_rule(self, rule, endtime, starttime=None): """ Run a rule for a given time period, including querying and alerting on results. :param rule: The rule configuration. :param starttime: The earliest timestamp to query. :param endtime: The latest timestamp to query. :return: The number of matches that the rule produced. """ run_start = time.time() self.thread_data.current_es = self.es_clients.setdefault(rule['name'], elasticsearch_client(rule)) # If there are pending aggregate matches, try processing them for x in range(len(rule['agg_matches'])): match = rule['agg_matches'].pop() self.add_aggregated_alert(match, rule) # Start from provided time if it's given if starttime: rule['starttime'] = starttime else: self.set_starttime(rule, endtime) rule['original_starttime'] = rule['starttime'] rule['scrolling_cycle'] = 0 # Don't run if starttime was set to the future if ts_now() <= rule['starttime']: logging.warning("Attempted to use query start time in the future (%s), sleeping instead" % (starttime)) return 0 # Run the rule. If querying over a large time period, split it up into segments self.thread_data.num_hits = 0 self.thread_data.num_dupes = 0 self.thread_data.cumulative_hits = 0 segment_size = self.get_segment_size(rule) tmp_endtime = rule['starttime'] while endtime - rule['starttime'] > segment_size: tmp_endtime = tmp_endtime + segment_size if not self.run_query(rule, rule['starttime'], tmp_endtime): return 0 self.thread_data.cumulative_hits += self.thread_data.num_hits self.thread_data.num_hits = 0 rule['starttime'] = tmp_endtime rule['type'].garbage_collect(tmp_endtime) if rule.get('aggregation_query_element'): if endtime - tmp_endtime == segment_size: self.run_query(rule, tmp_endtime, endtime) self.thread_data.cumulative_hits += self.thread_data.num_hits elif total_seconds(rule['original_starttime'] - tmp_endtime) == 0: rule['starttime'] = rule['original_starttime'] return 0 else: endtime = tmp_endtime else: if not self.run_query(rule, rule['starttime'], endtime): return 0 self.thread_data.cumulative_hits += self.thread_data.num_hits rule['type'].garbage_collect(endtime) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) match['num_hits'] = self.thread_data.cumulative_hits match['num_matches'] = num_matches # If realert is set, silence the rule for that duration # Silence is cached by query_key, if it exists # Default realert time is 0 seconds silence_cache_key = rule['name'] query_key_value = self.get_query_key_value(rule, match) if query_key_value is not None: silence_cache_key += '.' + query_key_value if self.is_silenced(rule['name'] + "._silence") or self.is_silenced(silence_cache_key): elastalert_logger.info('Ignoring match for silenced rule %s' % (silence_cache_key,)) continue if rule['realert']: next_alert, exponent = self.next_alert_time(rule, silence_cache_key, ts_now()) self.set_realert(silence_cache_key, next_alert, exponent) if rule.get('run_enhancements_first'): try: for enhancement in rule['match_enhancements']: try: enhancement.process(match) except EAException as e: self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) except DropMatchException: continue # If no aggregation, alert immediately if not rule['aggregation']: self.alert([match], rule) continue # Add it as an aggregated match self.add_aggregated_alert(match, rule) # Mark this endtime for next run's start rule['previous_endtime'] = endtime time_taken = time.time() - run_start # Write to ES that we've run this rule against this time period body = {'rule_name': rule['name'], 'endtime': endtime, 'starttime': rule['original_starttime'], 'matches': num_matches, 'hits': max(self.thread_data.num_hits, self.thread_data.cumulative_hits), '@timestamp': ts_now(), 'time_taken': time_taken} self.writeback('elastalert_status', body) return num_matches def init_rule(self, new_rule, new=True): ''' Copies some necessary non-config state from an exiting rule to a new rule. ''' if not new: self.scheduler.remove_job(job_id=new_rule['name']) try: self.modify_rule_for_ES5(new_rule) except TransportError as e: elastalert_logger.warning('Error connecting to Elasticsearch for rule {}. ' 'The rule has been disabled.'.format(new_rule['name'])) self.send_notification_email(exception=e, rule=new_rule) return False self.enhance_filter(new_rule) # Change top_count_keys to .raw if 'top_count_keys' in new_rule and new_rule.get('raw_count_keys', True): if self.string_multi_field_name: string_multi_field_name = self.string_multi_field_name elif self.writeback_es.is_atleastfive(): string_multi_field_name = '.keyword' else: string_multi_field_name = '.raw' for i, key in enumerate(new_rule['top_count_keys']): if not key.endswith(string_multi_field_name): new_rule['top_count_keys'][i] += string_multi_field_name if 'download_dashboard' in new_rule['filter']: # Download filters from Kibana and set the rules filters to them db_filters = self.filters_from_kibana(new_rule, new_rule['filter']['download_dashboard']) if db_filters is not None: new_rule['filter'] = db_filters else: raise EAException("Could not download filters from %s" % (new_rule['filter']['download_dashboard'])) blank_rule = {'agg_matches': [], 'aggregate_alert_time': {}, 'current_aggregate_id': {}, 'processed_hits': {}, 'run_every': self.run_every, 'has_run_once': False} rule = blank_rule # Set rule to either a blank template or existing rule with same name if not new: for rule in self.rules: if rule['name'] == new_rule['name']: break else: rule = blank_rule copy_properties = ['agg_matches', 'current_aggregate_id', 'aggregate_alert_time', 'processed_hits', 'starttime', 'minimum_starttime', 'has_run_once'] for prop in copy_properties: if prop not in rule: continue new_rule[prop] = rule[prop] job = self.scheduler.add_job(self.handle_rule_execution, 'interval', args=[new_rule], seconds=new_rule['run_every'].total_seconds(), id=new_rule['name'], max_instances=1, jitter=5) job.modify(next_run_time=datetime.datetime.now() + datetime.timedelta(seconds=random.randint(0, 15))) return new_rule @staticmethod def modify_rule_for_ES5(new_rule): # Get ES version per rule rule_es = elasticsearch_client(new_rule) if rule_es.is_atleastfive(): new_rule['five'] = True else: new_rule['five'] = False return # In ES5, filters starting with 'query' should have the top wrapper removed new_filters = [] for es_filter in new_rule.get('filter', []): if es_filter.get('query'): new_filters.append(es_filter['query']) else: new_filters.append(es_filter) new_rule['filter'] = new_filters def load_rule_changes(self): """ Using the modification times of rule config files, syncs the running rules to match the files in rules_folder by removing, adding or reloading rules. """ new_rule_hashes = self.rules_loader.get_hashes(self.conf, self.args.rule) # Check each current rule for changes for rule_file, hash_value in self.rule_hashes.items(): if rule_file not in new_rule_hashes: # Rule file was deleted elastalert_logger.info('Rule file %s not found, stopping rule execution' % (rule_file)) for rule in self.rules: if rule['rule_file'] == rule_file: break else: continue self.scheduler.remove_job(job_id=rule['name']) self.rules.remove(rule) continue if hash_value != new_rule_hashes[rule_file]: # Rule file was changed, reload rule try: new_rule = self.rules_loader.load_configuration(rule_file, self.conf) if not new_rule: logging.error('Invalid rule file skipped: %s' % rule_file) continue if 'is_enabled' in new_rule and not new_rule['is_enabled']: elastalert_logger.info('Rule file %s is now disabled.' % (rule_file)) # Remove this rule if it's been disabled self.rules = [rule for rule in self.rules if rule['rule_file'] != rule_file] continue except EAException as e: message = 'Could not load rule %s: %s' % (rule_file, e) self.handle_error(message) # Want to send email to address specified in the rule. Try and load the YAML to find it. try: rule_yaml = self.rules_loader.load_yaml(rule_file) except EAException: self.send_notification_email(exception=e) continue self.send_notification_email(exception=e, rule=rule_yaml) continue elastalert_logger.info("Reloading configuration for rule %s" % (rule_file)) # Re-enable if rule had been disabled for disabled_rule in self.disabled_rules: if disabled_rule['name'] == new_rule['name']: self.rules.append(disabled_rule) self.disabled_rules.remove(disabled_rule) break # Initialize the rule that matches rule_file new_rule = self.init_rule(new_rule, False) self.rules = [rule for rule in self.rules if rule['rule_file'] != rule_file] if new_rule: self.rules.append(new_rule) # Load new rules if not self.args.rule: for rule_file in set(new_rule_hashes.keys()) - set(self.rule_hashes.keys()): try: new_rule = self.rules_loader.load_configuration(rule_file, self.conf) if not new_rule: logging.error('Invalid rule file skipped: %s' % rule_file) continue if 'is_enabled' in new_rule and not new_rule['is_enabled']: continue if new_rule['name'] in [rule['name'] for rule in self.rules]: raise EAException("A rule with the name %s already exists" % (new_rule['name'])) except EAException as e: self.handle_error('Could not load rule %s: %s' % (rule_file, e)) self.send_notification_email(exception=e, rule_file=rule_file) continue if self.init_rule(new_rule): elastalert_logger.info('Loaded new rule %s' % (rule_file)) if new_rule['name'] in self.es_clients: self.es_clients.pop(new_rule['name']) self.rules.append(new_rule) self.rule_hashes = new_rule_hashes def start(self): """ Periodically go through each rule and run it """ if self.starttime: if self.starttime == 'NOW': self.starttime = ts_now() else: try: self.starttime = ts_to_dt(self.starttime) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (self.starttime)) exit(1) for rule in self.rules: rule['initial_starttime'] = self.starttime self.wait_until_responsive(timeout=self.args.timeout) self.running = True elastalert_logger.info("Starting up") self.scheduler.add_job(self.handle_pending_alerts, 'interval', seconds=self.run_every.total_seconds(), id='_internal_handle_pending_alerts') self.scheduler.add_job(self.handle_config_change, 'interval', seconds=self.run_every.total_seconds(), id='_internal_handle_config_change') self.scheduler.start() while self.running: next_run = datetime.datetime.utcnow() + self.run_every # Quit after end_time has been reached if self.args.end: endtime = ts_to_dt(self.args.end) if next_run.replace(tzinfo=dateutil.tz.tzutc()) > endtime: exit(0) if next_run < datetime.datetime.utcnow(): continue # Show disabled rules if self.show_disabled_rules: elastalert_logger.info("Disabled rules are: %s" % (str(self.get_disabled_rules()))) # Wait before querying again sleep_duration = total_seconds(next_run - datetime.datetime.utcnow()) self.sleep_for(sleep_duration) def wait_until_responsive(self, timeout, clock=timeit.default_timer): """Wait until ElasticSearch becomes responsive (or too much time passes).""" # Elapsed time is a floating point number of seconds. timeout = timeout.total_seconds() # Don't poll unless we're asked to. if timeout <= 0.0: return # Periodically poll ElasticSearch. Keep going until ElasticSearch is # responsive *and* the writeback index exists. ref = clock() while (clock() - ref) < timeout: try: if self.writeback_es.indices.exists(self.writeback_alias): return except ConnectionError: pass time.sleep(1.0) if self.writeback_es.ping(): logging.error( 'Writeback alias "%s" does not exist, did you run `elastalert-create-index`?', self.writeback_alias, ) else: logging.error( 'Could not reach ElasticSearch at "%s:%d".', self.conf['es_host'], self.conf['es_port'], ) exit(1) def run_all_rules(self): """ Run each rule one time """ self.handle_pending_alerts() for rule in self.rules: self.handle_rule_execution(rule) self.handle_config_change() def handle_pending_alerts(self): self.thread_data.alerts_sent = 0 self.send_pending_alerts() elastalert_logger.info("Background alerts thread %s pending alerts sent at %s" % (self.thread_data.alerts_sent, pretty_ts(ts_now()))) def handle_config_change(self): if not self.args.pin_rules: self.load_rule_changes() elastalert_logger.info("Background configuration change check run at %s" % (pretty_ts(ts_now()))) def handle_rule_execution(self, rule): self.thread_data.alerts_sent = 0 next_run = datetime.datetime.utcnow() + rule['run_every'] # Set endtime based on the rule's delay delay = rule.get('query_delay') if hasattr(self.args, 'end') and self.args.end: endtime = ts_to_dt(self.args.end) elif delay: endtime = ts_now() - delay else: endtime = ts_now() # Apply rules based on execution time limits if rule.get('limit_execution'): rule['next_starttime'] = None rule['next_min_starttime'] = None exec_next = next(croniter(rule['limit_execution'])) endtime_epoch = dt_to_unix(endtime) # If the estimated next endtime (end + run_every) isn't at least a minute past the next exec time # That means that we need to pause execution after this run if endtime_epoch + rule['run_every'].total_seconds() < exec_next - 59: # apscheduler requires pytz tzinfos, so don't use unix_to_dt here! rule['next_starttime'] = datetime.datetime.utcfromtimestamp(exec_next).replace(tzinfo=pytz.utc) if rule.get('limit_execution_coverage'): rule['next_min_starttime'] = rule['next_starttime'] if not rule['has_run_once']: self.reset_rule_schedule(rule) return rule['has_run_once'] = True try: num_matches = self.run_rule(rule, endtime, rule.get('initial_starttime')) except EAException as e: self.handle_error("Error running rule %s: %s" % (rule['name'], e), {'rule': rule['name']}) except Exception as e: self.handle_uncaught_exception(e, rule) else: old_starttime = pretty_ts(rule.get('original_starttime'), rule.get('use_local_time')) elastalert_logger.info("Ran %s from %s to %s: %s query hits (%s already seen), %s matches," " %s alerts sent" % (rule['name'], old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.thread_data.num_hits, self.thread_data.num_dupes, num_matches, self.thread_data.alerts_sent)) self.thread_data.alerts_sent = 0 if next_run < datetime.datetime.utcnow(): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. logging.warning( "Querying from %s to %s took longer than %s!" % ( old_starttime, pretty_ts(endtime, rule.get('use_local_time')), self.run_every ) ) rule['initial_starttime'] = None self.remove_old_events(rule) self.reset_rule_schedule(rule) def reset_rule_schedule(self, rule): # We hit the end of a execution schedule, pause ourselves until next run if rule.get('limit_execution') and rule['next_starttime']: self.scheduler.modify_job(job_id=rule['name'], next_run_time=rule['next_starttime']) # If we are preventing covering non-scheduled time periods, reset min_starttime and previous_endtime if rule['next_min_starttime']: rule['minimum_starttime'] = rule['next_min_starttime'] rule['previous_endtime'] = rule['next_min_starttime'] elastalert_logger.info('Pausing %s until next run at %s' % (rule['name'], pretty_ts(rule['next_starttime']))) def stop(self): """ Stop an ElastAlert runner that's been started """ self.running = False def get_disabled_rules(self): """ Return disabled rules """ return [rule['name'] for rule in self.disabled_rules] def sleep_for(self, duration): """ Sleep for a set duration """ elastalert_logger.info("Sleeping for %s seconds" % (duration)) time.sleep(duration) def generate_kibana4_db(self, rule, match): ''' Creates a link for a kibana4 dashboard which has time set to the match. ''' db_name = rule.get('use_kibana4_dashboard') start = ts_add( lookup_es_key(match, rule['timestamp_field']), -rule.get('kibana4_start_timedelta', rule.get('timeframe', datetime.timedelta(minutes=10))) ) end = ts_add( lookup_es_key(match, rule['timestamp_field']), rule.get('kibana4_end_timedelta', rule.get('timeframe', datetime.timedelta(minutes=10))) ) return kibana.kibana4_dashboard_link(db_name, start, end) def generate_kibana_db(self, rule, match): ''' Uses a template dashboard to upload a temp dashboard showing the match. Returns the url to the dashboard. ''' db = copy.deepcopy(kibana.dashboard_temp) # Set timestamp fields to match our rule especially if # we have configured something other than @timestamp kibana.set_timestamp_field(db, rule['timestamp_field']) # Set filters for filter in rule['filter']: if filter: kibana.add_filter(db, filter) kibana.set_included_fields(db, rule['include']) # Set index index = self.get_index(rule) kibana.set_index_name(db, index) return self.upload_dashboard(db, rule, match) def upload_dashboard(self, db, rule, match): ''' Uploads a dashboard schema to the kibana-int Elasticsearch index associated with rule. Returns the url to the dashboard. ''' # Set time range start = ts_add(lookup_es_key(match, rule['timestamp_field']), -rule.get('timeframe', datetime.timedelta(minutes=10))) end = ts_add(lookup_es_key(match, rule['timestamp_field']), datetime.timedelta(minutes=10)) kibana.set_time(db, start, end) # Set dashboard name db_name = 'ElastAlert - %s - %s' % (rule['name'], end) kibana.set_name(db, db_name) # Add filter for query_key value if 'query_key' in rule: for qk in rule.get('compound_query_key', [rule['query_key']]): if qk in match: term = {'term': {qk: match[qk]}} kibana.add_filter(db, term) # Add filter for aggregation_key value if 'aggregation_key' in rule: for qk in rule.get('compound_aggregation_key', [rule['aggregation_key']]): if qk in match: term = {'term': {qk: match[qk]}} kibana.add_filter(db, term) # Convert to json db_js = json.dumps(db) db_body = {'user': 'guest', 'group': 'guest', 'title': db_name, 'dashboard': db_js} # Upload es = elasticsearch_client(rule) # TODO: doc_type = _doc for elastic >= 6 res = es.index(index='kibana-int', doc_type='temp', body=db_body) # Return dashboard URL kibana_url = rule.get('kibana_url') if not kibana_url: kibana_url = 'http://%s:%s/_plugin/kibana/' % (rule['es_host'], rule['es_port']) return kibana_url + '#/dashboard/temp/%s' % (res['_id']) def get_dashboard(self, rule, db_name): """ Download dashboard which matches use_kibana_dashboard from Elasticsearch. """ es = elasticsearch_client(rule) if not db_name: raise EAException("use_kibana_dashboard undefined") query = {'query': {'term': {'_id': db_name}}} try: # TODO use doc_type = _doc res = es.deprecated_search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) except ElasticsearchException as e: raise EAException("Error querying for dashboard: %s" % (e)).with_traceback(sys.exc_info()[2]) if res['hits']['hits']: return json.loads(res['hits']['hits'][0]['_source']['dashboard']) else: raise EAException("Could not find dashboard named %s" % (db_name)) def use_kibana_link(self, rule, match): """ Uploads an existing dashboard as a temp dashboard modified for match time. Returns the url to the dashboard. """ # Download or get cached dashboard dashboard = rule.get('dashboard_schema') if not dashboard: db_name = rule.get('use_kibana_dashboard') dashboard = self.get_dashboard(rule, db_name) if dashboard: rule['dashboard_schema'] = dashboard else: return None dashboard = copy.deepcopy(dashboard) return self.upload_dashboard(dashboard, rule, match) def filters_from_kibana(self, rule, db_name): """ Downloads a dashboard from Kibana and returns corresponding filters, None on error. """ try: db = rule.get('dashboard_schema') if not db: db = self.get_dashboard(rule, db_name) filters = kibana.filters_from_dashboard(db) except EAException: return None return filters def alert(self, matches, rule, alert_time=None, retried=False): """ Wraps alerting, Kibana linking and enhancements in an exception handler """ try: return self.send_alert(matches, rule, alert_time=alert_time, retried=retried) except Exception as e: self.handle_uncaught_exception(e, rule) def send_alert(self, matches, rule, alert_time=None, retried=False): """ Send out an alert. :param matches: A list of matches. :param rule: A rule configuration. """ if not matches: return if alert_time is None: alert_time = ts_now() # Compute top count keys if rule.get('top_count_keys'): for match in matches: if 'query_key' in rule: qk = lookup_es_key(match, rule['query_key']) else: qk = None if isinstance(rule['type'], FlatlineRule): # flatline rule triggers when there have been no events from now()-timeframe to now(), # so using now()-timeframe will return no results. for now we can just mutliple the timeframe # by 2, but this could probably be timeframe+run_every to prevent too large of a lookup? timeframe = datetime.timedelta(seconds=2 * rule.get('timeframe').total_seconds()) else: timeframe = rule.get('timeframe', datetime.timedelta(minutes=10)) start = ts_to_dt(lookup_es_key(match, rule['timestamp_field'])) - timeframe end = ts_to_dt(lookup_es_key(match, rule['timestamp_field'])) + datetime.timedelta(minutes=10) keys = rule.get('top_count_keys') counts = self.get_top_counts(rule, start, end, keys, qk=qk) match.update(counts) # Generate a kibana3 dashboard for the first match if rule.get('generate_kibana_link') or rule.get('use_kibana_dashboard'): try: if rule.get('generate_kibana_link'): kb_link = self.generate_kibana_db(rule, matches[0]) else: kb_link = self.use_kibana_link(rule, matches[0]) except EAException as e: self.handle_error("Could not generate Kibana dash for %s match: %s" % (rule['name'], e)) else: if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('use_kibana4_dashboard'): kb_link = self.generate_kibana4_db(rule, matches[0]) if kb_link: matches[0]['kibana_link'] = kb_link if rule.get('generate_kibana_discover_url'): kb_link = generate_kibana_discover_url(rule, matches[0]) if kb_link: matches[0]['kibana_discover_url'] = kb_link # Enhancements were already run at match time if # run_enhancements_first is set or # retried==True, which means this is a retry of a failed alert if not rule.get('run_enhancements_first') and not retried: for enhancement in rule['match_enhancements']: valid_matches = [] for match in matches: try: enhancement.process(match) valid_matches.append(match) except DropMatchException: pass except EAException as e: self.handle_error("Error running match enhancement: %s" % (e), {'rule': rule['name']}) matches = valid_matches if not matches: return None # Don't send real alerts in debug mode if self.debug: alerter = DebugAlerter(rule) alerter.alert(matches) return None # Run the alerts alert_sent = False alert_exception = None # Alert.pipeline is a single object shared between every alerter # This allows alerters to pass objects and data between themselves alert_pipeline = {"alert_time": alert_time} for alert in rule['alert']: alert.pipeline = alert_pipeline try: alert.alert(matches) except EAException as e: self.handle_error('Error while running alert %s: %s' % (alert.get_info()['type'], e), {'rule': rule['name']}) alert_exception = str(e) else: self.thread_data.alerts_sent += 1 alert_sent = True # Write the alert(s) to ES agg_id = None for match in matches: alert_body = self.get_alert_body(match, rule, alert_sent, alert_time, alert_exception) # Set all matches to aggregate together if agg_id: alert_body['aggregate_id'] = agg_id res = self.writeback('elastalert', alert_body, rule) if res and not agg_id: agg_id = res['_id'] def get_alert_body(self, match, rule, alert_sent, alert_time, alert_exception=None): body = { 'match_body': match, 'rule_name': rule['name'], 'alert_info': rule['alert'][0].get_info() if not self.debug else {}, 'alert_sent': alert_sent, 'alert_time': alert_time } if rule.get('include_match_in_root'): body.update({k: v for k, v in match.items() if not k.startswith('_')}) if self.add_metadata_alert: body['category'] = rule['category'] body['description'] = rule['description'] body['owner'] = rule['owner'] body['priority'] = rule['priority'] match_time = lookup_es_key(match, rule['timestamp_field']) if match_time is not None: body['match_time'] = match_time # TODO record info about multiple alerts # If the alert failed to send, record the exception if not alert_sent: body['alert_exception'] = alert_exception return body def writeback(self, doc_type, body, rule=None, match_body=None): # ES 2.0 - 2.3 does not support dots in field names. if self.replace_dots_in_field_names: writeback_body = replace_dots_in_field_names(body) else: writeback_body = body for key in list(writeback_body.keys()): # Convert any datetime objects to timestamps if isinstance(writeback_body[key], datetime.datetime): writeback_body[key] = dt_to_ts(writeback_body[key]) if self.debug: elastalert_logger.info("Skipping writing to ES: %s" % (writeback_body)) return None if '@timestamp' not in writeback_body: writeback_body['@timestamp'] = dt_to_ts(ts_now()) try: index = self.writeback_es.resolve_writeback_index(self.writeback_index, doc_type) if self.writeback_es.is_atleastsixtwo(): res = self.writeback_es.index(index=index, body=body) else: res = self.writeback_es.index(index=index, doc_type=doc_type, body=body) return res except ElasticsearchException as e: logging.exception("Error writing alert info to Elasticsearch: %s" % (e)) def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send and are newer than time_limit """ # XXX only fetches 1000 results. If limit is reached, next loop will catch them # unless there is constantly more than 1000 alerts to send. # Fetch recent, unsent alerts that aren't part of an aggregate, earlier alerts first. inner_query = {'query_string': {'query': '!_exists_:aggregate_id AND alert_sent:false'}} time_filter = {'range': {'alert_time': {'from': dt_to_ts(ts_now() - time_limit), 'to': dt_to_ts(ts_now())}}} sort = {'sort': {'alert_time': {'order': 'asc'}}} if self.writeback_es.is_atleastfive(): query = {'query': {'bool': {'must': inner_query, 'filter': time_filter}}} else: query = {'query': inner_query, 'filter': time_filter} query.update(sort) try: if self.writeback_es.is_atleastsixtwo(): res = self.writeback_es.search(index=self.writeback_index, body=query, size=1000) else: res = self.writeback_es.deprecated_search(index=self.writeback_index, doc_type='elastalert', body=query, size=1000) if res['hits']['hits']: return res['hits']['hits'] except ElasticsearchException as e: logging.exception("Error finding recent pending alerts: %s %s" % (e, query)) return [] def send_pending_alerts(self): pending_alerts = self.find_recent_pending_alerts(self.alert_time_limit) for alert in pending_alerts: _id = alert['_id'] alert = alert['_source'] try: rule_name = alert.pop('rule_name') alert_time = alert.pop('alert_time') match_body = alert.pop('match_body') except KeyError: # Malformed alert, drop it continue # Find original rule for rule in self.rules: if rule['name'] == rule_name: break else: # Original rule is missing, keep alert for later if rule reappears continue # Set current_es for top_count_keys query self.thread_data.current_es = elasticsearch_client(rule) # Send the alert unless it's a future alert if ts_now() > ts_to_dt(alert_time): aggregated_matches = self.get_aggregated_matches(_id) if aggregated_matches: matches = [match_body] + [agg_match['match_body'] for agg_match in aggregated_matches] self.alert(matches, rule, alert_time=alert_time) else: # If this rule isn't using aggregation, this must be a retry of a failed alert retried = False if not rule.get('aggregation'): retried = True self.alert([match_body], rule, alert_time=alert_time, retried=retried) if rule['current_aggregate_id']: for qk, agg_id in rule['current_aggregate_id'].items(): if agg_id == _id: rule['current_aggregate_id'].pop(qk) break # Delete it from the index try: if self.writeback_es.is_atleastsixtwo(): self.writeback_es.delete(index=self.writeback_index, id=_id) else: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=_id) except ElasticsearchException: # TODO: Give this a more relevant exception, try:except: is evil. self.handle_error("Failed to delete alert %s at %s" % (_id, alert_time)) # Send in memory aggregated alerts for rule in self.rules: if rule['agg_matches']: for aggregation_key_value, aggregate_alert_time in rule['aggregate_alert_time'].items(): if ts_now() > aggregate_alert_time: alertable_matches = [ agg_match for agg_match in rule['agg_matches'] if self.get_aggregation_key_value(rule, agg_match) == aggregation_key_value ] self.alert(alertable_matches, rule) rule['agg_matches'] = [ agg_match for agg_match in rule['agg_matches'] if self.get_aggregation_key_value(rule, agg_match) != aggregation_key_value ] def get_aggregated_matches(self, _id): """ Removes and returns all matches from writeback_es that have aggregate_id == _id """ # XXX if there are more than self.max_aggregation matches, you have big alerts and we will leave entries in ES. query = {'query': {'query_string': {'query': 'aggregate_id:%s' % (_id)}}, 'sort': {'@timestamp': 'asc'}} matches = [] try: if self.writeback_es.is_atleastsixtwo(): res = self.writeback_es.search(index=self.writeback_index, body=query, size=self.max_aggregation) else: res = self.writeback_es.deprecated_search(index=self.writeback_index, doc_type='elastalert', body=query, size=self.max_aggregation) for match in res['hits']['hits']: matches.append(match['_source']) if self.writeback_es.is_atleastsixtwo(): self.writeback_es.delete(index=self.writeback_index, id=match['_id']) else: self.writeback_es.delete(index=self.writeback_index, doc_type='elastalert', id=match['_id']) except (KeyError, ElasticsearchException) as e: self.handle_error("Error fetching aggregated matches: %s" % (e), {'id': _id}) return matches def find_pending_aggregate_alert(self, rule, aggregation_key_value=None): query = {'filter': {'bool': {'must': [{'term': {'rule_name': rule['name']}}, {'range': {'alert_time': {'gt': ts_now()}}}, {'term': {'alert_sent': 'false'}}], 'must_not': [{'exists': {'field': 'aggregate_id'}}]}}} if aggregation_key_value: query['filter']['bool']['must'].append({'term': {'aggregation_key': aggregation_key_value}}) if self.writeback_es.is_atleastfive(): query = {'query': {'bool': query}} query['sort'] = {'alert_time': {'order': 'desc'}} try: if self.writeback_es.is_atleastsixtwo(): res = self.writeback_es.search(index=self.writeback_index, body=query, size=1) else: res = self.writeback_es.deprecated_search(index=self.writeback_index, doc_type='elastalert', body=query, size=1) if len(res['hits']['hits']) == 0: return None except (KeyError, ElasticsearchException) as e: self.handle_error("Error searching for pending aggregated matches: %s" % (e), {'rule_name': rule['name']}) return None return res['hits']['hits'][0] def add_aggregated_alert(self, match, rule): """ Save a match as a pending aggregate alert to Elasticsearch. """ # Optionally include the 'aggregation_key' as a dimension for aggregations aggregation_key_value = self.get_aggregation_key_value(rule, match) if (not rule['current_aggregate_id'].get(aggregation_key_value) or ('aggregate_alert_time' in rule and aggregation_key_value in rule['aggregate_alert_time'] and rule[ 'aggregate_alert_time'].get(aggregation_key_value) < ts_to_dt(lookup_es_key(match, rule['timestamp_field'])))): # ElastAlert may have restarted while pending alerts exist pending_alert = self.find_pending_aggregate_alert(rule, aggregation_key_value) if pending_alert: alert_time = ts_to_dt(pending_alert['_source']['alert_time']) rule['aggregate_alert_time'][aggregation_key_value] = alert_time agg_id = pending_alert['_id'] rule['current_aggregate_id'] = {aggregation_key_value: agg_id} elastalert_logger.info( 'Adding alert for %s to aggregation(id: %s, aggregation_key: %s), next alert at %s' % ( rule['name'], agg_id, aggregation_key_value, alert_time ) ) else: # First match, set alert_time alert_time = '' if isinstance(rule['aggregation'], dict) and rule['aggregation'].get('schedule'): croniter._datetime_to_timestamp = cronite_datetime_to_timestamp # For Python 2.6 compatibility try: iter = croniter(rule['aggregation']['schedule'], ts_now()) alert_time = unix_to_dt(iter.get_next()) except Exception as e: self.handle_error("Error parsing aggregate send time Cron format %s" % (e), rule['aggregation']['schedule']) else: if rule.get('aggregate_by_match_time', False): match_time = ts_to_dt(lookup_es_key(match, rule['timestamp_field'])) alert_time = match_time + rule['aggregation'] else: alert_time = ts_now() + rule['aggregation'] rule['aggregate_alert_time'][aggregation_key_value] = alert_time agg_id = None elastalert_logger.info( 'New aggregation for %s, aggregation_key: %s. next alert at %s.' % (rule['name'], aggregation_key_value, alert_time) ) else: # Already pending aggregation, use existing alert_time alert_time = rule['aggregate_alert_time'].get(aggregation_key_value) agg_id = rule['current_aggregate_id'].get(aggregation_key_value) elastalert_logger.info( 'Adding alert for %s to aggregation(id: %s, aggregation_key: %s), next alert at %s' % ( rule['name'], agg_id, aggregation_key_value, alert_time ) ) alert_body = self.get_alert_body(match, rule, False, alert_time) if agg_id: alert_body['aggregate_id'] = agg_id if aggregation_key_value: alert_body['aggregation_key'] = aggregation_key_value res = self.writeback('elastalert', alert_body, rule) # If new aggregation, save _id if res and not agg_id: rule['current_aggregate_id'][aggregation_key_value] = res['_id'] # Couldn't write the match to ES, save it in memory for now if not res: rule['agg_matches'].append(match) return res def silence(self, silence_cache_key=None): """ Silence an alert for a period of time. --silence and --rule must be passed as args. """ if self.debug: logging.error('--silence not compatible with --debug') exit(1) if not self.args.rule: logging.error('--silence must be used with --rule') exit(1) # With --rule, self.rules will only contain that specific rule if not silence_cache_key: silence_cache_key = self.rules[0]['name'] + "._silence" try: silence_ts = parse_deadline(self.args.silence) except (ValueError, TypeError): logging.error('%s is not a valid time period' % (self.args.silence)) exit(1) if not self.set_realert(silence_cache_key, silence_ts, 0): logging.error('Failed to save silence command to Elasticsearch') exit(1) elastalert_logger.info('Success. %s will be silenced until %s' % (silence_cache_key, silence_ts)) def set_realert(self, silence_cache_key, timestamp, exponent): """ Write a silence to Elasticsearch for silence_cache_key until timestamp. """ body = {'exponent': exponent, 'rule_name': silence_cache_key, '@timestamp': ts_now(), 'until': timestamp} self.silence_cache[silence_cache_key] = (timestamp, exponent) return self.writeback('silence', body) def is_silenced(self, rule_name): """ Checks if rule_name is currently silenced. Returns false on exception. """ if rule_name in self.silence_cache: if ts_now() < self.silence_cache[rule_name][0]: return True if self.debug: return False query = {'term': {'rule_name': rule_name}} sort = {'sort': {'until': {'order': 'desc'}}} if self.writeback_es.is_atleastfive(): query = {'query': query} else: query = {'filter': query} query.update(sort) try: doc_type = 'silence' index = self.writeback_es.resolve_writeback_index(self.writeback_index, doc_type) if self.writeback_es.is_atleastsixtwo(): if self.writeback_es.is_atleastsixsix(): res = self.writeback_es.search(index=index, size=1, body=query, _source_includes=['until', 'exponent']) else: res = self.writeback_es.search(index=index, size=1, body=query, _source_include=['until', 'exponent']) else: res = self.writeback_es.deprecated_search(index=index, doc_type=doc_type, size=1, body=query, _source_include=['until', 'exponent']) except ElasticsearchException as e: self.handle_error("Error while querying for alert silence status: %s" % (e), {'rule': rule_name}) return False if res['hits']['hits']: until_ts = res['hits']['hits'][0]['_source']['until'] exponent = res['hits']['hits'][0]['_source'].get('exponent', 0) if rule_name not in list(self.silence_cache.keys()): self.silence_cache[rule_name] = (ts_to_dt(until_ts), exponent) else: self.silence_cache[rule_name] = (ts_to_dt(until_ts), self.silence_cache[rule_name][1]) if ts_now() < ts_to_dt(until_ts): return True return False def handle_error(self, message, data=None): ''' Logs message at error level and writes message, data and traceback to Elasticsearch. ''' logging.error(message) body = {'message': message} tb = traceback.format_exc() body['traceback'] = tb.strip().split('\n') if data: body['data'] = data self.writeback('elastalert_error', body) def handle_uncaught_exception(self, exception, rule): """ Disables a rule and sends a notification. """ logging.error(traceback.format_exc()) self.handle_error('Uncaught exception running rule %s: %s' % (rule['name'], exception), {'rule': rule['name']}) if self.disable_rules_on_error: self.rules = [running_rule for running_rule in self.rules if running_rule['name'] != rule['name']] self.disabled_rules.append(rule) self.scheduler.pause_job(job_id=rule['name']) elastalert_logger.info('Rule %s disabled', rule['name']) if self.notify_email: self.send_notification_email(exception=exception, rule=rule) def send_notification_email(self, text='', exception=None, rule=None, subject=None, rule_file=None): email_body = text rule_name = None if rule: rule_name = rule['name'] elif rule_file: rule_name = rule_file if exception and rule_name: if not subject: subject = 'Uncaught exception in ElastAlert - %s' % (rule_name) email_body += '\n\n' email_body += 'The rule %s has raised an uncaught exception.\n\n' % (rule_name) if self.disable_rules_on_error: modified = ' or if the rule config file has been modified' if not self.args.pin_rules else '' email_body += 'It has been disabled and will be re-enabled when ElastAlert restarts%s.\n\n' % (modified) tb = traceback.format_exc() email_body += tb if isinstance(self.notify_email, str): self.notify_email = [self.notify_email] email = MIMEText(email_body) email['Subject'] = subject if subject else 'ElastAlert notification' recipients = self.notify_email if rule and rule.get('notify_email'): if isinstance(rule['notify_email'], str): rule['notify_email'] = [rule['notify_email']] recipients = recipients + rule['notify_email'] recipients = list(set(recipients)) email['To'] = ', '.join(recipients) email['From'] = self.from_addr email['Reply-To'] = self.conf.get('email_reply_to', email['To']) try: smtp = SMTP(self.smtp_host) smtp.sendmail(self.from_addr, recipients, email.as_string()) except (SMTPException, error) as e: self.handle_error('Error connecting to SMTP host: %s' % (e), {'email_body': email_body}) def get_top_counts(self, rule, starttime, endtime, keys, number=None, qk=None): """ Counts the number of events for each unique value for each key field. Returns a dictionary with top_events_ mapped to the top 5 counts for each key. """ all_counts = {} if not number: number = rule.get('top_count_number', 5) for key in keys: index = self.get_index(rule, starttime, endtime) hits_terms = self.get_hits_terms(rule, starttime, endtime, index, key, qk, number) if hits_terms is None: top_events_count = {} else: buckets = list(hits_terms.values())[0] # get_hits_terms adds to num_hits, but we don't want to count these self.thread_data.num_hits -= len(buckets) terms = {} for bucket in buckets: terms[bucket['key']] = bucket['doc_count'] counts = list(terms.items()) counts.sort(key=lambda x: x[1], reverse=True) top_events_count = dict(counts[:number]) # Save a dict with the top 5 events by key all_counts['top_events_%s' % (key)] = top_events_count return all_counts def next_alert_time(self, rule, name, timestamp): """ Calculate an 'until' time and exponent based on how much past the last 'until' we are. """ if name in self.silence_cache: last_until, exponent = self.silence_cache[name] else: # If this isn't cached, this is the first alert or writeback_es is down, normal realert return timestamp + rule['realert'], 0 if not rule.get('exponential_realert'): return timestamp + rule['realert'], 0 diff = seconds(timestamp - last_until) # Increase exponent if we've alerted recently if diff < seconds(rule['realert']) * 2 ** exponent: exponent += 1 else: # Continue decreasing exponent the longer it's been since the last alert while diff > seconds(rule['realert']) * 2 ** exponent and exponent > 0: diff -= seconds(rule['realert']) * 2 ** exponent exponent -= 1 wait = datetime.timedelta(seconds=seconds(rule['realert']) * 2 ** exponent) if wait >= rule['exponential_realert']: return timestamp + rule['exponential_realert'], exponent - 1 return timestamp + wait, exponent def handle_signal(signal, frame): elastalert_logger.info('SIGINT received, stopping ElastAlert...') # use os._exit to exit immediately and avoid someone catching SystemExit os._exit(0) def main(args=None): signal.signal(signal.SIGINT, handle_signal) if not args: args = sys.argv[1:] client = ElastAlerter(args) if not client.args.silence: client.start() if __name__ == '__main__': sys.exit(main(sys.argv[1:])) elastalert-0.2.4/elastalert/enhancements.py000066400000000000000000000014661364615736500210660ustar00rootroot00000000000000# -*- coding: utf-8 -*- from .util import pretty_ts class BaseEnhancement(object): """ Enhancements take a match dictionary object and modify it in some way to enhance an alert. These are specified in each rule under the match_enhancements option. Generally, the key value pairs in the match module will be contained in the alert body. """ def __init__(self, rule): self.rule = rule def process(self, match): """ Modify the contents of match, a dictionary, in some way """ raise NotImplementedError() class TimeEnhancement(BaseEnhancement): def process(self, match): match['@timestamp'] = pretty_ts(match['@timestamp']) class DropMatchException(Exception): """ ElastAlert will drop a match if this exception type is raised by an enhancement """ pass elastalert-0.2.4/elastalert/es_mappings/000077500000000000000000000000001364615736500203425ustar00rootroot00000000000000elastalert-0.2.4/elastalert/es_mappings/5/000077500000000000000000000000001364615736500205065ustar00rootroot00000000000000elastalert-0.2.4/elastalert/es_mappings/5/elastalert.json000066400000000000000000000011161364615736500235400ustar00rootroot00000000000000{ "elastalert": { "properties": { "rule_name": { "index": "not_analyzed", "type": "string" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" }, "alert_time": { "type": "date", "format": "dateOptionalTime" }, "match_time": { "type": "date", "format": "dateOptionalTime" }, "match_body": { "type": "object", "enabled": "false" }, "aggregate_id": { "index": "not_analyzed", "type": "string" } } } } elastalert-0.2.4/elastalert/es_mappings/5/elastalert_error.json000066400000000000000000000003431364615736500247520ustar00rootroot00000000000000{ "elastalert_error": { "properties": { "data": { "type": "object", "enabled": "false" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } } elastalert-0.2.4/elastalert/es_mappings/5/elastalert_status.json000066400000000000000000000003561364615736500251500ustar00rootroot00000000000000{ "elastalert_status": { "properties": { "rule_name": { "index": "not_analyzed", "type": "string" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } } elastalert-0.2.4/elastalert/es_mappings/5/past_elastalert.json000066400000000000000000000006331364615736500245720ustar00rootroot00000000000000{ "past_elastalert": { "properties": { "rule_name": { "index": "not_analyzed", "type": "string" }, "match_body": { "type": "object", "enabled": "false" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" }, "aggregate_id": { "index": "not_analyzed", "type": "string" } } } } elastalert-0.2.4/elastalert/es_mappings/5/silence.json000066400000000000000000000004731364615736500230270ustar00rootroot00000000000000{ "silence": { "properties": { "rule_name": { "index": "not_analyzed", "type": "string" }, "until": { "type": "date", "format": "dateOptionalTime" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } } elastalert-0.2.4/elastalert/es_mappings/6/000077500000000000000000000000001364615736500205075ustar00rootroot00000000000000elastalert-0.2.4/elastalert/es_mappings/6/elastalert.json000066400000000000000000000012711364615736500235430ustar00rootroot00000000000000{ "numeric_detection": true, "date_detection": false, "dynamic_templates": [ { "strings_as_keyword": { "mapping": { "ignore_above": 1024, "type": "keyword" }, "match_mapping_type": "string" } } ], "properties": { "rule_name": { "type": "keyword" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" }, "alert_time": { "type": "date", "format": "dateOptionalTime" }, "match_time": { "type": "date", "format": "dateOptionalTime" }, "match_body": { "type": "object" }, "aggregate_id": { "type": "keyword" } } } elastalert-0.2.4/elastalert/es_mappings/6/elastalert_error.json000066400000000000000000000002631364615736500247540ustar00rootroot00000000000000{ "properties": { "data": { "type": "object", "enabled": "false" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } elastalert-0.2.4/elastalert/es_mappings/6/elastalert_status.json000066400000000000000000000002371364615736500251470ustar00rootroot00000000000000{ "properties": { "rule_name": { "type": "keyword" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } elastalert-0.2.4/elastalert/es_mappings/6/past_elastalert.json000066400000000000000000000004401364615736500245670ustar00rootroot00000000000000{ "properties": { "rule_name": { "type": "keyword" }, "match_body": { "type": "object", "enabled": "false" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" }, "aggregate_id": { "type": "keyword" } } } elastalert-0.2.4/elastalert/es_mappings/6/silence.json000066400000000000000000000003561364615736500230300ustar00rootroot00000000000000{ "properties": { "rule_name": { "type": "keyword" }, "until": { "type": "date", "format": "dateOptionalTime" }, "@timestamp": { "type": "date", "format": "dateOptionalTime" } } } elastalert-0.2.4/elastalert/kibana.py000066400000000000000000000322431364615736500176400ustar00rootroot00000000000000# -*- coding: utf-8 -*- # flake8: noqa import os.path import urllib.error import urllib.parse import urllib.request from .util import EAException dashboard_temp = {'editable': True, 'failover': False, 'index': {'default': 'NO_TIME_FILTER_OR_INDEX_PATTERN_NOT_MATCHED', 'interval': 'none', 'pattern': '', 'warm_fields': True}, 'loader': {'hide': False, 'load_elasticsearch': True, 'load_elasticsearch_size': 20, 'load_gist': True, 'load_local': True, 'save_default': True, 'save_elasticsearch': True, 'save_gist': False, 'save_local': True, 'save_temp': True, 'save_temp_ttl': '30d', 'save_temp_ttl_enable': True}, 'nav': [{'collapse': False, 'enable': True, 'filter_id': 0, 'notice': False, 'now': False, 'refresh_intervals': ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'], 'status': 'Stable', 'time_options': ['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d'], 'timefield': '@timestamp', 'type': 'timepicker'}], 'panel_hints': True, 'pulldowns': [{'collapse': False, 'enable': True, 'notice': True, 'type': 'filtering'}], 'refresh': False, 'rows': [{'collapsable': True, 'collapse': False, 'editable': True, 'height': '350px', 'notice': False, 'panels': [{'annotate': {'enable': False, 'field': '_type', 'query': '*', 'size': 20, 'sort': ['_score', 'desc']}, 'auto_int': True, 'bars': True, 'derivative': False, 'editable': True, 'fill': 3, 'grid': {'max': None, 'min': 0}, 'group': ['default'], 'interactive': True, 'interval': '1m', 'intervals': ['auto', '1s', '1m', '5m', '10m', '30m', '1h', '3h', '12h', '1d', '1w', '1M', '1y'], 'legend': True, 'legend_counts': True, 'lines': False, 'linewidth': 3, 'mode': 'count', 'options': True, 'percentage': False, 'pointradius': 5, 'points': False, 'queries': {'ids': [0], 'mode': 'all'}, 'resolution': 100, 'scale': 1, 'show_query': True, 'span': 12, 'spyable': True, 'stack': True, 'time_field': '@timestamp', 'timezone': 'browser', 'title': 'Events over time', 'tooltip': {'query_as_alias': True, 'value_type': 'cumulative'}, 'type': 'histogram', 'value_field': None, 'x-axis': True, 'y-axis': True, 'y_format': 'none', 'zerofill': True, 'zoomlinks': True}], 'title': 'Graph'}, {'collapsable': True, 'collapse': False, 'editable': True, 'height': '350px', 'notice': False, 'panels': [{'all_fields': False, 'editable': True, 'error': False, 'field_list': True, 'fields': [], 'group': ['default'], 'header': True, 'highlight': [], 'localTime': True, 'normTimes': True, 'offset': 0, 'overflow': 'min-height', 'pages': 5, 'paging': True, 'queries': {'ids': [0], 'mode': 'all'}, 'size': 100, 'sort': ['@timestamp', 'desc'], 'sortable': True, 'span': 12, 'spyable': True, 'status': 'Stable', 'style': {'font-size': '9pt'}, 'timeField': '@timestamp', 'title': 'All events', 'trimFactor': 300, 'type': 'table'}], 'title': 'Events'}], 'services': {'filter': {'ids': [0], 'list': {'0': {'active': True, 'alias': '', 'field': '@timestamp', 'from': 'now-24h', 'id': 0, 'mandate': 'must', 'to': 'now', 'type': 'time'}}}, 'query': {'ids': [0], 'list': {'0': {'alias': '', 'color': '#7EB26D', 'enable': True, 'id': 0, 'pin': False, 'query': '', 'type': 'lucene'}}}}, 'style': 'dark', 'title': 'ElastAlert Alert Dashboard'} kibana4_time_temp = "(refreshInterval:(display:Off,section:0,value:0),time:(from:'%s',mode:absolute,to:'%s'))" def set_time(dashboard, start, end): dashboard['services']['filter']['list']['0']['from'] = start dashboard['services']['filter']['list']['0']['to'] = end def set_index_name(dashboard, name): dashboard['index']['default'] = name def set_timestamp_field(dashboard, field): # set the nav timefield if we don't want @timestamp dashboard['nav'][0]['timefield'] = field # set the time_field for each of our panels for row in dashboard.get('rows'): for panel in row.get('panels'): panel['time_field'] = field # set our filter's time field dashboard['services']['filter']['list']['0']['field'] = field def add_filter(dashboard, es_filter): next_id = max(dashboard['services']['filter']['ids']) + 1 kibana_filter = {'active': True, 'alias': '', 'id': next_id, 'mandate': 'must'} if 'not' in es_filter: es_filter = es_filter['not'] kibana_filter['mandate'] = 'mustNot' if 'query' in es_filter: es_filter = es_filter['query'] if 'query_string' in es_filter: kibana_filter['type'] = 'querystring' kibana_filter['query'] = es_filter['query_string']['query'] elif 'term' in es_filter: kibana_filter['type'] = 'field' f_field, f_query = list(es_filter['term'].items())[0] # Wrap query in quotes, otherwise certain characters cause Kibana to throw errors if isinstance(f_query, str): f_query = '"%s"' % (f_query.replace('"', '\\"')) if isinstance(f_query, list): # Escape quotes f_query = [item.replace('"', '\\"') for item in f_query] # Wrap in quotes f_query = ['"%s"' % (item) for item in f_query] # Convert into joined query f_query = '(%s)' % (' AND '.join(f_query)) kibana_filter['field'] = f_field kibana_filter['query'] = f_query elif 'range' in es_filter: kibana_filter['type'] = 'range' f_field, f_range = list(es_filter['range'].items())[0] kibana_filter['field'] = f_field kibana_filter.update(f_range) else: raise EAException("Could not parse filter %s for Kibana" % (es_filter)) dashboard['services']['filter']['ids'].append(next_id) dashboard['services']['filter']['list'][str(next_id)] = kibana_filter def set_name(dashboard, name): dashboard['title'] = name def set_included_fields(dashboard, fields): dashboard['rows'][1]['panels'][0]['fields'] = list(set(fields)) def filters_from_dashboard(db): filters = db['services']['filter']['list'] config_filters = [] or_filters = [] for filter in list(filters.values()): filter_type = filter['type'] if filter_type == 'time': continue if filter_type == 'querystring': config_filter = {'query': {'query_string': {'query': filter['query']}}} if filter_type == 'field': config_filter = {'term': {filter['field']: filter['query']}} if filter_type == 'range': config_filter = {'range': {filter['field']: {'from': filter['from'], 'to': filter['to']}}} if filter['mandate'] == 'mustNot': config_filter = {'not': config_filter} if filter['mandate'] == 'either': or_filters.append(config_filter) else: config_filters.append(config_filter) if or_filters: config_filters.append({'or': or_filters}) return config_filters def kibana4_dashboard_link(dashboard, starttime, endtime): dashboard = os.path.expandvars(dashboard) time_settings = kibana4_time_temp % (starttime, endtime) time_settings = urllib.parse.quote(time_settings) return "%s?_g=%s" % (dashboard, time_settings) elastalert-0.2.4/elastalert/kibana_discover.py000066400000000000000000000130141364615736500215310ustar00rootroot00000000000000# -*- coding: utf-8 -*- # flake8: noqa import datetime import logging import json import os.path import prison import urllib.parse from .util import EAException from .util import lookup_es_key from .util import ts_add kibana_default_timedelta = datetime.timedelta(minutes=10) kibana5_kibana6_versions = frozenset(['5.6', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5', '6.6', '6.7', '6.8']) kibana7_versions = frozenset(['7.0', '7.1', '7.2', '7.3']) def generate_kibana_discover_url(rule, match): ''' Creates a link for a kibana discover app. ''' discover_app_url = rule.get('kibana_discover_app_url') if not discover_app_url: logging.warning( 'Missing kibana_discover_app_url for rule %s' % ( rule.get('name', '') ) ) return None kibana_version = rule.get('kibana_discover_version') if not kibana_version: logging.warning( 'Missing kibana_discover_version for rule %s' % ( rule.get('name', '') ) ) return None index = rule.get('kibana_discover_index_pattern_id') if not index: logging.warning( 'Missing kibana_discover_index_pattern_id for rule %s' % ( rule.get('name', '') ) ) return None columns = rule.get('kibana_discover_columns', ['_source']) filters = rule.get('filter', []) if 'query_key' in rule: query_keys = rule.get('compound_query_key', [rule['query_key']]) else: query_keys = [] timestamp = lookup_es_key(match, rule['timestamp_field']) timeframe = rule.get('timeframe', kibana_default_timedelta) from_timedelta = rule.get('kibana_discover_from_timedelta', timeframe) from_time = ts_add(timestamp, -from_timedelta) to_timedelta = rule.get('kibana_discover_to_timedelta', timeframe) to_time = ts_add(timestamp, to_timedelta) if kibana_version in kibana5_kibana6_versions: globalState = kibana6_disover_global_state(from_time, to_time) appState = kibana_discover_app_state(index, columns, filters, query_keys, match) elif kibana_version in kibana7_versions: globalState = kibana7_disover_global_state(from_time, to_time) appState = kibana_discover_app_state(index, columns, filters, query_keys, match) else: logging.warning( 'Unknown kibana discover application version %s for rule %s' % ( kibana_version, rule.get('name', '') ) ) return None return "%s?_g=%s&_a=%s" % ( os.path.expandvars(discover_app_url), urllib.parse.quote(globalState), urllib.parse.quote(appState) ) def kibana6_disover_global_state(from_time, to_time): return prison.dumps( { 'refreshInterval': { 'pause': True, 'value': 0 }, 'time': { 'from': from_time, 'mode': 'absolute', 'to': to_time } } ) def kibana7_disover_global_state(from_time, to_time): return prison.dumps( { 'filters': [], 'refreshInterval': { 'pause': True, 'value': 0 }, 'time': { 'from': from_time, 'to': to_time } } ) def kibana_discover_app_state(index, columns, filters, query_keys, match): app_filters = [] if filters: bool_filter = { 'must': filters } app_filters.append( { '$state': { 'store': 'appState' }, 'bool': bool_filter, 'meta': { 'alias': 'filter', 'disabled': False, 'index': index, 'key': 'bool', 'negate': False, 'type': 'custom', 'value': json.dumps(bool_filter, separators=(',', ':')) }, } ) for query_key in query_keys: query_value = lookup_es_key(match, query_key) if query_value is None: app_filters.append( { '$state': { 'store': 'appState' }, 'exists': { 'field': query_key }, 'meta': { 'alias': None, 'disabled': False, 'index': index, 'key': query_key, 'negate': True, 'type': 'exists', 'value': 'exists' } } ) else: app_filters.append( { '$state': { 'store': 'appState' }, 'meta': { 'alias': None, 'disabled': False, 'index': index, 'key': query_key, 'negate': False, 'params': { 'query': query_value, 'type': 'phrase' }, 'type': 'phrase', 'value': str(query_value) }, 'query': { 'match': { query_key: { 'query': query_value, 'type': 'phrase' } } } } ) return prison.dumps( { 'columns': columns, 'filters': app_filters, 'index': index, 'interval': 'auto' } ) elastalert-0.2.4/elastalert/loaders.py000066400000000000000000000562241364615736500200510ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import hashlib import logging import os import sys import jsonschema import yaml import yaml.scanner from staticconf.loader import yaml_loader from . import alerts from . import enhancements from . import ruletypes from .opsgenie import OpsGenieAlerter from .util import dt_to_ts from .util import dt_to_ts_with_format from .util import dt_to_unix from .util import dt_to_unixms from .util import EAException from .util import get_module from .util import ts_to_dt from .util import ts_to_dt_with_format from .util import unix_to_dt from .util import unixms_to_dt class RulesLoader(object): # import rule dependency import_rules = {} # Required global (config.yaml) configuration options for the loader required_globals = frozenset([]) # Required local (rule.yaml) configuration options required_locals = frozenset(['alert', 'type', 'name', 'index']) # Used to map the names of rules to their classes rules_mapping = { 'frequency': ruletypes.FrequencyRule, 'any': ruletypes.AnyRule, 'spike': ruletypes.SpikeRule, 'blacklist': ruletypes.BlacklistRule, 'whitelist': ruletypes.WhitelistRule, 'change': ruletypes.ChangeRule, 'flatline': ruletypes.FlatlineRule, 'new_term': ruletypes.NewTermsRule, 'cardinality': ruletypes.CardinalityRule, 'metric_aggregation': ruletypes.MetricAggregationRule, 'percentage_match': ruletypes.PercentageMatchRule, 'spike_aggregation': ruletypes.SpikeMetricAggregationRule, } # Used to map names of alerts to their classes alerts_mapping = { 'email': alerts.EmailAlerter, 'jira': alerts.JiraAlerter, 'opsgenie': OpsGenieAlerter, 'stomp': alerts.StompAlerter, 'debug': alerts.DebugAlerter, 'command': alerts.CommandAlerter, 'sns': alerts.SnsAlerter, 'hipchat': alerts.HipChatAlerter, 'stride': alerts.StrideAlerter, 'ms_teams': alerts.MsTeamsAlerter, 'slack': alerts.SlackAlerter, 'mattermost': alerts.MattermostAlerter, 'pagerduty': alerts.PagerDutyAlerter, 'exotel': alerts.ExotelAlerter, 'twilio': alerts.TwilioAlerter, 'victorops': alerts.VictorOpsAlerter, 'telegram': alerts.TelegramAlerter, 'googlechat': alerts.GoogleChatAlerter, 'gitter': alerts.GitterAlerter, 'servicenow': alerts.ServiceNowAlerter, 'alerta': alerts.AlertaAlerter, 'post': alerts.HTTPPostAlerter, 'hivealerter': alerts.HiveAlerter } # A partial ordering of alert types. Relative order will be preserved in the resulting alerts list # For example, jira goes before email so the ticket # will be added to the resulting email. alerts_order = { 'jira': 0, 'email': 1 } base_config = {} def __init__(self, conf): # schema for rule yaml self.rule_schema = jsonschema.Draft7Validator( yaml.load(open(os.path.join(os.path.dirname(__file__), 'schema.yaml')), Loader=yaml.FullLoader)) self.base_config = copy.deepcopy(conf) def load(self, conf, args=None): """ Discover and load all the rules as defined in the conf and args. :param dict conf: Configuration dict :param dict args: Arguments dict :return: List of rules :rtype: list """ names = [] use_rule = None if args is None else args.rule # Load each rule configuration file rules = [] rule_files = self.get_names(conf, use_rule) for rule_file in rule_files: try: rule = self.load_configuration(rule_file, conf, args) # A rule failed to load, don't try to process it if not rule: logging.error('Invalid rule file skipped: %s' % rule_file) continue # By setting "is_enabled: False" in rule file, a rule is easily disabled if 'is_enabled' in rule and not rule['is_enabled']: continue if rule['name'] in names: raise EAException('Duplicate rule named %s' % (rule['name'])) except EAException as e: raise EAException('Error loading file %s: %s' % (rule_file, e)) rules.append(rule) names.append(rule['name']) return rules def get_names(self, conf, use_rule=None): """ Return a list of rule names that can be passed to `get_yaml` to retrieve. :param dict conf: Configuration dict :param str use_rule: Limit to only specified rule :return: A list of rule names :rtype: list """ raise NotImplementedError() def get_hashes(self, conf, use_rule=None): """ Discover and get the hashes of all the rules as defined in the conf. :param dict conf: Configuration :param str use_rule: Limit to only specified rule :return: Dict of rule name to hash :rtype: dict """ raise NotImplementedError() def get_yaml(self, filename): """ Get and parse the yaml of the specified rule. :param str filename: Rule to get the yaml :return: Rule YAML dict :rtype: dict """ raise NotImplementedError() def get_import_rule(self, rule): """ Retrieve the name of the rule to import. :param dict rule: Rule dict :return: rule name that will all `get_yaml` to retrieve the yaml of the rule :rtype: str """ return rule['import'] def load_configuration(self, filename, conf, args=None): """ Load a yaml rule file and fill in the relevant fields with objects. :param str filename: The name of a rule configuration file. :param dict conf: The global configuration dictionary, used for populating defaults. :param dict args: Arguments :return: The rule configuration, a dictionary. """ rule = self.load_yaml(filename) self.load_options(rule, conf, filename, args) self.load_modules(rule, args) return rule def load_yaml(self, filename): """ Load the rule including all dependency rules. :param str filename: Rule to load :return: Loaded rule dict :rtype: dict """ rule = { 'rule_file': filename, } self.import_rules.pop(filename, None) # clear `filename` dependency while True: loaded = self.get_yaml(filename) # Special case for merging filters - if both files specify a filter merge (AND) them if 'filter' in rule and 'filter' in loaded: rule['filter'] = loaded['filter'] + rule['filter'] loaded.update(rule) rule = loaded if 'import' in rule: # Find the path of the next file. import_filename = self.get_import_rule(rule) # set dependencies rules = self.import_rules.get(filename, []) rules.append(import_filename) self.import_rules[filename] = rules filename = import_filename del (rule['import']) # or we could go on forever! else: break return rule def load_options(self, rule, conf, filename, args=None): """ Converts time objects, sets defaults, and validates some settings. :param rule: A dictionary of parsed YAML from a rule config file. :param conf: The global configuration dictionary, used for populating defaults. :param filename: Name of the rule :param args: Arguments """ self.adjust_deprecated_values(rule) try: self.rule_schema.validate(rule) except jsonschema.ValidationError as e: raise EAException("Invalid Rule file: %s\n%s" % (filename, e)) try: # Set all time based parameters if 'timeframe' in rule: rule['timeframe'] = datetime.timedelta(**rule['timeframe']) if 'realert' in rule: rule['realert'] = datetime.timedelta(**rule['realert']) else: if 'aggregation' in rule: rule['realert'] = datetime.timedelta(minutes=0) else: rule['realert'] = datetime.timedelta(minutes=1) if 'aggregation' in rule and not rule['aggregation'].get('schedule'): rule['aggregation'] = datetime.timedelta(**rule['aggregation']) if 'query_delay' in rule: rule['query_delay'] = datetime.timedelta(**rule['query_delay']) if 'buffer_time' in rule: rule['buffer_time'] = datetime.timedelta(**rule['buffer_time']) if 'run_every' in rule: rule['run_every'] = datetime.timedelta(**rule['run_every']) if 'bucket_interval' in rule: rule['bucket_interval_timedelta'] = datetime.timedelta(**rule['bucket_interval']) if 'exponential_realert' in rule: rule['exponential_realert'] = datetime.timedelta(**rule['exponential_realert']) if 'kibana4_start_timedelta' in rule: rule['kibana4_start_timedelta'] = datetime.timedelta(**rule['kibana4_start_timedelta']) if 'kibana4_end_timedelta' in rule: rule['kibana4_end_timedelta'] = datetime.timedelta(**rule['kibana4_end_timedelta']) if 'kibana_discover_from_timedelta' in rule: rule['kibana_discover_from_timedelta'] = datetime.timedelta(**rule['kibana_discover_from_timedelta']) if 'kibana_discover_to_timedelta' in rule: rule['kibana_discover_to_timedelta'] = datetime.timedelta(**rule['kibana_discover_to_timedelta']) except (KeyError, TypeError) as e: raise EAException('Invalid time format used: %s' % e) # Set defaults, copy defaults from config.yaml for key, val in list(self.base_config.items()): rule.setdefault(key, val) rule.setdefault('name', os.path.splitext(filename)[0]) rule.setdefault('realert', datetime.timedelta(seconds=0)) rule.setdefault('aggregation', datetime.timedelta(seconds=0)) rule.setdefault('query_delay', datetime.timedelta(seconds=0)) rule.setdefault('timestamp_field', '@timestamp') rule.setdefault('filter', []) rule.setdefault('timestamp_type', 'iso') rule.setdefault('timestamp_format', '%Y-%m-%dT%H:%M:%SZ') rule.setdefault('_source_enabled', True) rule.setdefault('use_local_time', True) rule.setdefault('description', "") # Set timestamp_type conversion function, used when generating queries and processing hits rule['timestamp_type'] = rule['timestamp_type'].strip().lower() if rule['timestamp_type'] == 'iso': rule['ts_to_dt'] = ts_to_dt rule['dt_to_ts'] = dt_to_ts elif rule['timestamp_type'] == 'unix': rule['ts_to_dt'] = unix_to_dt rule['dt_to_ts'] = dt_to_unix elif rule['timestamp_type'] == 'unix_ms': rule['ts_to_dt'] = unixms_to_dt rule['dt_to_ts'] = dt_to_unixms elif rule['timestamp_type'] == 'custom': def _ts_to_dt_with_format(ts): return ts_to_dt_with_format(ts, ts_format=rule['timestamp_format']) def _dt_to_ts_with_format(dt): ts = dt_to_ts_with_format(dt, ts_format=rule['timestamp_format']) if 'timestamp_format_expr' in rule: # eval expression passing 'ts' and 'dt' return eval(rule['timestamp_format_expr'], {'ts': ts, 'dt': dt}) else: return ts rule['ts_to_dt'] = _ts_to_dt_with_format rule['dt_to_ts'] = _dt_to_ts_with_format else: raise EAException('timestamp_type must be one of iso, unix, or unix_ms') # Add support for client ssl certificate auth if 'verify_certs' in conf: rule.setdefault('verify_certs', conf.get('verify_certs')) rule.setdefault('ca_certs', conf.get('ca_certs')) rule.setdefault('client_cert', conf.get('client_cert')) rule.setdefault('client_key', conf.get('client_key')) # Set HipChat options from global config rule.setdefault('hipchat_msg_color', 'red') rule.setdefault('hipchat_domain', 'api.hipchat.com') rule.setdefault('hipchat_notify', True) rule.setdefault('hipchat_from', '') rule.setdefault('hipchat_ignore_ssl_errors', False) # Make sure we have required options if self.required_locals - frozenset(list(rule.keys())): raise EAException('Missing required option(s): %s' % (', '.join(self.required_locals - frozenset(list(rule.keys()))))) if 'include' in rule and type(rule['include']) != list: raise EAException('include option must be a list') raw_query_key = rule.get('query_key') if isinstance(raw_query_key, list): if len(raw_query_key) > 1: rule['compound_query_key'] = raw_query_key rule['query_key'] = ','.join(raw_query_key) elif len(raw_query_key) == 1: rule['query_key'] = raw_query_key[0] else: del(rule['query_key']) if isinstance(rule.get('aggregation_key'), list): rule['compound_aggregation_key'] = rule['aggregation_key'] rule['aggregation_key'] = ','.join(rule['aggregation_key']) if isinstance(rule.get('compare_key'), list): rule['compound_compare_key'] = rule['compare_key'] rule['compare_key'] = ','.join(rule['compare_key']) elif 'compare_key' in rule: rule['compound_compare_key'] = [rule['compare_key']] # Add QK, CK and timestamp to include include = rule.get('include', ['*']) if 'query_key' in rule: include.append(rule['query_key']) if 'compound_query_key' in rule: include += rule['compound_query_key'] if 'compound_aggregation_key' in rule: include += rule['compound_aggregation_key'] if 'compare_key' in rule: include.append(rule['compare_key']) if 'compound_compare_key' in rule: include += rule['compound_compare_key'] if 'top_count_keys' in rule: include += rule['top_count_keys'] include.append(rule['timestamp_field']) rule['include'] = list(set(include)) # Check that generate_kibana_url is compatible with the filters if rule.get('generate_kibana_link'): for es_filter in rule.get('filter'): if es_filter: if 'not' in es_filter: es_filter = es_filter['not'] if 'query' in es_filter: es_filter = es_filter['query'] if list(es_filter.keys())[0] not in ('term', 'query_string', 'range'): raise EAException( 'generate_kibana_link is incompatible with filters other than term, query_string and range.' 'Consider creating a dashboard and using use_kibana_dashboard instead.') # Check that doc_type is provided if use_count/terms_query if rule.get('use_count_query') or rule.get('use_terms_query'): if 'doc_type' not in rule: raise EAException('doc_type must be specified.') # Check that query_key is set if use_terms_query if rule.get('use_terms_query'): if 'query_key' not in rule: raise EAException('query_key must be specified with use_terms_query') # Warn if use_strf_index is used with %y, %M or %D # (%y = short year, %M = minutes, %D = full date) if rule.get('use_strftime_index'): for token in ['%y', '%M', '%D']: if token in rule.get('index'): logging.warning('Did you mean to use %s in the index? ' 'The index will be formatted like %s' % (token, datetime.datetime.now().strftime( rule.get('index')))) if rule.get('scan_entire_timeframe') and not rule.get('timeframe'): raise EAException('scan_entire_timeframe can only be used if there is a timeframe specified') def load_modules(self, rule, args=None): """ Loads things that could be modules. Enhancements, alerts and rule type. """ # Set match enhancements match_enhancements = [] for enhancement_name in rule.get('match_enhancements', []): if enhancement_name in dir(enhancements): enhancement = getattr(enhancements, enhancement_name) else: enhancement = get_module(enhancement_name) if not issubclass(enhancement, enhancements.BaseEnhancement): raise EAException("Enhancement module %s not a subclass of BaseEnhancement" % enhancement_name) match_enhancements.append(enhancement(rule)) rule['match_enhancements'] = match_enhancements # Convert rule type into RuleType object if rule['type'] in self.rules_mapping: rule['type'] = self.rules_mapping[rule['type']] else: rule['type'] = get_module(rule['type']) if not issubclass(rule['type'], ruletypes.RuleType): raise EAException('Rule module %s is not a subclass of RuleType' % (rule['type'])) # Make sure we have required alert and type options reqs = rule['type'].required_options if reqs - frozenset(list(rule.keys())): raise EAException('Missing required option(s): %s' % (', '.join(reqs - frozenset(list(rule.keys()))))) # Instantiate rule try: rule['type'] = rule['type'](rule, args) except (KeyError, EAException) as e: raise EAException('Error initializing rule %s: %s' % (rule['name'], e)).with_traceback(sys.exc_info()[2]) # Instantiate alerts only if we're not in debug mode # In debug mode alerts are not actually sent so don't bother instantiating them if not args or not args.debug: rule['alert'] = self.load_alerts(rule, alert_field=rule['alert']) def load_alerts(self, rule, alert_field): def normalize_config(alert): """Alert config entries are either "alertType" or {"alertType": {"key": "data"}}. This function normalizes them both to the latter format. """ if isinstance(alert, str): return alert, rule elif isinstance(alert, dict): name, config = next(iter(list(alert.items()))) config_copy = copy.copy(rule) config_copy.update(config) # warning, this (intentionally) mutates the rule dict return name, config_copy else: raise EAException() def create_alert(alert, alert_config): alert_class = self.alerts_mapping.get(alert) or get_module(alert) if not issubclass(alert_class, alerts.Alerter): raise EAException('Alert module %s is not a subclass of Alerter' % alert) missing_options = (rule['type'].required_options | alert_class.required_options) - frozenset( alert_config or []) if missing_options: raise EAException('Missing required option(s): %s' % (', '.join(missing_options))) return alert_class(alert_config) try: if type(alert_field) != list: alert_field = [alert_field] alert_field = [normalize_config(x) for x in alert_field] alert_field = sorted(alert_field, key=lambda a_b: self.alerts_order.get(a_b[0], 1)) # Convert all alerts into Alerter objects alert_field = [create_alert(a, b) for a, b in alert_field] except (KeyError, EAException) as e: raise EAException('Error initiating alert %s: %s' % (rule['alert'], e)).with_traceback(sys.exc_info()[2]) return alert_field @staticmethod def adjust_deprecated_values(rule): # From rename of simple HTTP alerter if rule.get('type') == 'simple': rule['type'] = 'post' if 'simple_proxy' in rule: rule['http_post_proxy'] = rule['simple_proxy'] if 'simple_webhook_url' in rule: rule['http_post_url'] = rule['simple_webhook_url'] logging.warning( '"simple" alerter has been renamed "post" and comptability may be removed in a future release.') class FileRulesLoader(RulesLoader): # Required global (config.yaml) configuration options for the loader required_globals = frozenset(['rules_folder']) def get_names(self, conf, use_rule=None): # Passing a filename directly can bypass rules_folder and .yaml checks if use_rule and os.path.isfile(use_rule): return [use_rule] rule_folder = conf['rules_folder'] rule_files = [] if 'scan_subdirectories' in conf and conf['scan_subdirectories']: for root, folders, files in os.walk(rule_folder): for filename in files: if use_rule and use_rule != filename: continue if self.is_yaml(filename): rule_files.append(os.path.join(root, filename)) else: for filename in os.listdir(rule_folder): fullpath = os.path.join(rule_folder, filename) if os.path.isfile(fullpath) and self.is_yaml(filename): rule_files.append(fullpath) return rule_files def get_hashes(self, conf, use_rule=None): rule_files = self.get_names(conf, use_rule) rule_mod_times = {} for rule_file in rule_files: rule_mod_times[rule_file] = self.get_rule_file_hash(rule_file) return rule_mod_times def get_yaml(self, filename): try: return yaml_loader(filename) except yaml.scanner.ScannerError as e: raise EAException('Could not parse file %s: %s' % (filename, e)) def get_import_rule(self, rule): """ Allow for relative paths to the import rule. :param dict rule: :return: Path the import rule :rtype: str """ if os.path.isabs(rule['import']): return rule['import'] else: return os.path.join(os.path.dirname(rule['rule_file']), rule['import']) def get_rule_file_hash(self, rule_file): rule_file_hash = '' if os.path.exists(rule_file): with open(rule_file, 'rb') as fh: rule_file_hash = hashlib.sha1(fh.read()).digest() for import_rule_file in self.import_rules.get(rule_file, []): rule_file_hash += self.get_rule_file_hash(import_rule_file) return rule_file_hash @staticmethod def is_yaml(filename): return filename.endswith('.yaml') or filename.endswith('.yml') elastalert-0.2.4/elastalert/opsgenie.py000066400000000000000000000171251364615736500202260ustar00rootroot00000000000000# -*- coding: utf-8 -*- import json import logging import os.path import requests from .alerts import Alerter from .alerts import BasicMatchString from .util import EAException from .util import elastalert_logger from .util import lookup_es_key class OpsGenieAlerter(Alerter): '''Sends a http request to the OpsGenie API to signal for an alert''' required_options = frozenset(['opsgenie_key']) def __init__(self, *args): super(OpsGenieAlerter, self).__init__(*args) self.account = self.rule.get('opsgenie_account') self.api_key = self.rule.get('opsgenie_key', 'key') self.default_reciepients = self.rule.get('opsgenie_default_receipients', None) self.recipients = self.rule.get('opsgenie_recipients') self.recipients_args = self.rule.get('opsgenie_recipients_args') self.default_teams = self.rule.get('opsgenie_default_teams', None) self.teams = self.rule.get('opsgenie_teams') self.teams_args = self.rule.get('opsgenie_teams_args') self.tags = self.rule.get('opsgenie_tags', []) + ['ElastAlert', self.rule['name']] self.to_addr = self.rule.get('opsgenie_addr', 'https://api.opsgenie.com/v2/alerts') self.custom_message = self.rule.get('opsgenie_message') self.opsgenie_subject = self.rule.get('opsgenie_subject') self.opsgenie_subject_args = self.rule.get('opsgenie_subject_args') self.alias = self.rule.get('opsgenie_alias') self.opsgenie_proxy = self.rule.get('opsgenie_proxy', None) self.priority = self.rule.get('opsgenie_priority') self.opsgenie_details = self.rule.get('opsgenie_details', {}) def _parse_responders(self, responders, responder_args, matches, default_responders): if responder_args: formated_responders = list() responders_values = dict((k, lookup_es_key(matches[0], v)) for k, v in responder_args.items()) responders_values = dict((k, v) for k, v in responders_values.items() if v) for responder in responders: responder = str(responder) try: formated_responders.append(responder.format(**responders_values)) except KeyError as error: logging.warn("OpsGenieAlerter: Cannot create responder for OpsGenie Alert. Key not foud: %s. " % (error)) if not formated_responders: logging.warn("OpsGenieAlerter: no responders can be formed. Trying the default responder ") if not default_responders: logging.warn("OpsGenieAlerter: default responder not set. Falling back") formated_responders = responders else: formated_responders = default_responders responders = formated_responders return responders def _fill_responders(self, responders, type_): return [{'id': r, 'type': type_} for r in responders] def alert(self, matches): body = '' for match in matches: body += str(BasicMatchString(self.rule, match)) # Separate text of aggregated alerts with dashes if len(matches) > 1: body += '\n----------------------------------------\n' if self.custom_message is None: self.message = self.create_title(matches) else: self.message = self.custom_message.format(**matches[0]) self.recipients = self._parse_responders(self.recipients, self.recipients_args, matches, self.default_reciepients) self.teams = self._parse_responders(self.teams, self.teams_args, matches, self.default_teams) post = {} post['message'] = self.message if self.account: post['user'] = self.account if self.recipients: post['responders'] = [{'username': r, 'type': 'user'} for r in self.recipients] if self.teams: post['teams'] = [{'name': r, 'type': 'team'} for r in self.teams] post['description'] = body post['source'] = 'ElastAlert' for i, tag in enumerate(self.tags): self.tags[i] = tag.format(**matches[0]) post['tags'] = self.tags if self.priority and self.priority not in ('P1', 'P2', 'P3', 'P4', 'P5'): logging.warn("Priority level does not appear to be specified correctly. \ Please make sure to set it to a value between P1 and P5") else: post['priority'] = self.priority if self.alias is not None: post['alias'] = self.alias.format(**matches[0]) details = self.get_details(matches) if details: post['details'] = details logging.debug(json.dumps(post)) headers = { 'Content-Type': 'application/json', 'Authorization': 'GenieKey {}'.format(self.api_key), } # set https proxy, if it was provided proxies = {'https': self.opsgenie_proxy} if self.opsgenie_proxy else None try: r = requests.post(self.to_addr, json=post, headers=headers, proxies=proxies) logging.debug('request response: {0}'.format(r)) if r.status_code != 202: elastalert_logger.info("Error response from {0} \n " "API Response: {1}".format(self.to_addr, r)) r.raise_for_status() logging.info("Alert sent to OpsGenie") except Exception as err: raise EAException("Error sending alert: {0}".format(err)) def create_default_title(self, matches): subject = 'ElastAlert: %s' % (self.rule['name']) # If the rule has a query_key, add that value plus timestamp to subject if 'query_key' in self.rule: qk = matches[0].get(self.rule['query_key']) if qk: subject += ' - %s' % (qk) return subject def create_title(self, matches): """ Creates custom alert title to be used as subject for opsgenie alert.""" if self.opsgenie_subject: return self.create_custom_title(matches) return self.create_default_title(matches) def create_custom_title(self, matches): opsgenie_subject = str(self.rule['opsgenie_subject']) if self.opsgenie_subject_args: opsgenie_subject_values = [lookup_es_key(matches[0], arg) for arg in self.opsgenie_subject_args] for i, subject_value in enumerate(opsgenie_subject_values): if subject_value is None: alert_value = self.rule.get(self.opsgenie_subject_args[i]) if alert_value: opsgenie_subject_values[i] = alert_value opsgenie_subject_values = ['' if val is None else val for val in opsgenie_subject_values] return opsgenie_subject.format(*opsgenie_subject_values) return opsgenie_subject def get_info(self): ret = {'type': 'opsgenie'} if self.recipients: ret['recipients'] = self.recipients if self.account: ret['account'] = self.account if self.teams: ret['teams'] = self.teams return ret def get_details(self, matches): details = {} for key, value in self.opsgenie_details.items(): if type(value) is dict: if 'field' in value: field_value = lookup_es_key(matches[0], value['field']) if field_value is not None: details[key] = str(field_value) elif type(value) is str: details[key] = os.path.expandvars(value) return details elastalert-0.2.4/elastalert/rule_from_kibana.py000066400000000000000000000026701364615736500217130ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import json import yaml from elastalert.kibana import filters_from_dashboard from elastalert.util import elasticsearch_client def main(): es_host = input("Elasticsearch host: ") es_port = input("Elasticsearch port: ") db_name = input("Dashboard name: ") send_get_body_as = input("Method for querying Elasticsearch[GET]: ") or 'GET' es = elasticsearch_client({'es_host': es_host, 'es_port': es_port, 'send_get_body_as': send_get_body_as}) print("Elastic Version:" + es.es_version) query = {'query': {'term': {'_id': db_name}}} if es.is_atleastsixsix(): # TODO check support for kibana 7 # TODO use doc_type='_doc' instead res = es.deprecated_search(index='kibana-int', doc_type='dashboard', body=query, _source_includes=['dashboard']) else: res = es.deprecated_search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) if not res['hits']['hits']: print("No dashboard %s found" % (db_name)) exit() db = json.loads(res['hits']['hits'][0]['_source']['dashboard']) config_filters = filters_from_dashboard(db) print("\nPartial Config file") print("-----------\n") print("name: %s" % (db_name)) print("es_host: %s" % (es_host)) print("es_port: %s" % (es_port)) print("filter:") print(yaml.safe_dump(config_filters)) if __name__ == '__main__': main() elastalert-0.2.4/elastalert/ruletypes.py000066400000000000000000001607631364615736500204600ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import sys from blist import sortedlist from .util import add_raw_postfix from .util import dt_to_ts from .util import EAException from .util import elastalert_logger from .util import elasticsearch_client from .util import format_index from .util import hashable from .util import lookup_es_key from .util import new_get_event_ts from .util import pretty_ts from .util import total_seconds from .util import ts_now from .util import ts_to_dt class RuleType(object): """ The base class for a rule type. The class must implement add_data and add any matches to self.matches. :param rules: A rule configuration. """ required_options = frozenset() def __init__(self, rules, args=None): self.matches = [] self.rules = rules self.occurrences = {} self.rules['category'] = self.rules.get('category', '') self.rules['description'] = self.rules.get('description', '') self.rules['owner'] = self.rules.get('owner', '') self.rules['priority'] = self.rules.get('priority', '2') def add_data(self, data): """ The function that the ElastAlert client calls with results from ES. Data is a list of dictionaries, from Elasticsearch. :param data: A list of events, each of which is a dictionary of terms. """ raise NotImplementedError() def add_match(self, event): """ This function is called on all matching events. Rules use it to add extra information about the context of a match. Event is a dictionary containing terms directly from Elasticsearch and alerts will report all of the information. :param event: The matching event, a dictionary of terms. """ # Convert datetime's back to timestamps ts = self.rules.get('timestamp_field') if ts in event: event[ts] = dt_to_ts(event[ts]) self.matches.append(copy.deepcopy(event)) def get_match_str(self, match): """ Returns a string that gives more context about a match. :param match: The matching event, a dictionary of terms. :return: A user facing string describing the match. """ return '' def garbage_collect(self, timestamp): """ Gets called periodically to remove old data that is useless beyond given timestamp. May also be used to compute things in the absence of new data. :param timestamp: A timestamp indicating the rule has been run up to that point. """ pass def add_count_data(self, counts): """ Gets called when a rule has use_count_query set to True. Called to add data from querying to the rule. :param counts: A dictionary mapping timestamps to hit counts. """ raise NotImplementedError() def add_terms_data(self, terms): """ Gets called when a rule has use_terms_query set to True. :param terms: A list of buckets with a key, corresponding to query_key, and the count """ raise NotImplementedError() def add_aggregation_data(self, payload): """ Gets called when a rule has use_terms_query set to True. :param terms: A list of buckets with a key, corresponding to query_key, and the count """ raise NotImplementedError() class CompareRule(RuleType): """ A base class for matching a specific term by passing it to a compare function """ required_options = frozenset(['compound_compare_key']) def expand_entries(self, list_type): """ Expand entries specified in files using the '!file' directive, if there are any, then add everything to a set. """ entries_set = set() for entry in self.rules[list_type]: if entry.startswith("!file"): # - "!file /path/to/list" filename = entry.split()[1] with open(filename, 'r') as f: for line in f: entries_set.add(line.rstrip()) else: entries_set.add(entry) self.rules[list_type] = entries_set def compare(self, event): """ An event is a match if this returns true """ raise NotImplementedError() def add_data(self, data): # If compare returns true, add it as a match for event in data: if self.compare(event): self.add_match(event) class BlacklistRule(CompareRule): """ A CompareRule where the compare function checks a given key against a blacklist """ required_options = frozenset(['compare_key', 'blacklist']) def __init__(self, rules, args=None): super(BlacklistRule, self).__init__(rules, args=None) self.expand_entries('blacklist') def compare(self, event): term = lookup_es_key(event, self.rules['compare_key']) if term in self.rules['blacklist']: return True return False class WhitelistRule(CompareRule): """ A CompareRule where the compare function checks a given term against a whitelist """ required_options = frozenset(['compare_key', 'whitelist', 'ignore_null']) def __init__(self, rules, args=None): super(WhitelistRule, self).__init__(rules, args=None) self.expand_entries('whitelist') def compare(self, event): term = lookup_es_key(event, self.rules['compare_key']) if term is None: return not self.rules['ignore_null'] if term not in self.rules['whitelist']: return True return False class ChangeRule(CompareRule): """ A rule that will store values for a certain term and match if those values change """ required_options = frozenset(['query_key', 'compound_compare_key', 'ignore_null']) change_map = {} occurrence_time = {} def compare(self, event): key = hashable(lookup_es_key(event, self.rules['query_key'])) values = [] elastalert_logger.debug(" Previous Values of compare keys " + str(self.occurrences)) for val in self.rules['compound_compare_key']: lookup_value = lookup_es_key(event, val) values.append(lookup_value) elastalert_logger.debug(" Current Values of compare keys " + str(values)) changed = False for val in values: if not isinstance(val, bool) and not val and self.rules['ignore_null']: return False # If we have seen this key before, compare it to the new value if key in self.occurrences: for idx, previous_values in enumerate(self.occurrences[key]): elastalert_logger.debug(" " + str(previous_values) + " " + str(values[idx])) changed = previous_values != values[idx] if changed: break if changed: self.change_map[key] = (self.occurrences[key], values) # If using timeframe, only return true if the time delta is < timeframe if key in self.occurrence_time: changed = event[self.rules['timestamp_field']] - self.occurrence_time[key] <= self.rules['timeframe'] # Update the current value and time elastalert_logger.debug(" Setting current value of compare keys values " + str(values)) self.occurrences[key] = values if 'timeframe' in self.rules: self.occurrence_time[key] = event[self.rules['timestamp_field']] elastalert_logger.debug("Final result of comparision between previous and current values " + str(changed)) return changed def add_match(self, match): # TODO this is not technically correct # if the term changes multiple times before an alert is sent # this data will be overwritten with the most recent change change = self.change_map.get(hashable(lookup_es_key(match, self.rules['query_key']))) extra = {} if change: extra = {'old_value': change[0], 'new_value': change[1]} elastalert_logger.debug("Description of the changed records " + str(dict(list(match.items()) + list(extra.items())))) super(ChangeRule, self).add_match(dict(list(match.items()) + list(extra.items()))) class FrequencyRule(RuleType): """ A rule that matches if num_events number of events occur within a timeframe """ required_options = frozenset(['num_events', 'timeframe']) def __init__(self, *args): super(FrequencyRule, self).__init__(*args) self.ts_field = self.rules.get('timestamp_field', '@timestamp') self.get_ts = new_get_event_ts(self.ts_field) self.attach_related = self.rules.get('attach_related', False) def add_count_data(self, data): """ Add count data to the rule. Data should be of the form {ts: count}. """ if len(data) > 1: raise EAException('add_count_data can only accept one count at a time') (ts, count), = list(data.items()) event = ({self.ts_field: ts}, count) self.occurrences.setdefault('all', EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts)).append(event) self.check_for_match('all') def add_terms_data(self, terms): for timestamp, buckets in terms.items(): for bucket in buckets: event = ({self.ts_field: timestamp, self.rules['query_key']: bucket['key']}, bucket['doc_count']) self.occurrences.setdefault(bucket['key'], EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts)).append(event) self.check_for_match(bucket['key']) def add_data(self, data): if 'query_key' in self.rules: qk = self.rules['query_key'] else: qk = None for event in data: if qk: key = hashable(lookup_es_key(event, qk)) else: # If no query_key, we use the key 'all' for all events key = 'all' # Store the timestamps of recent occurrences, per key self.occurrences.setdefault(key, EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts)).append((event, 1)) self.check_for_match(key, end=False) # We call this multiple times with the 'end' parameter because subclasses # may or may not want to check while only partial data has been added if key in self.occurrences: # could have been emptied by previous check self.check_for_match(key, end=True) def check_for_match(self, key, end=False): # Match if, after removing old events, we hit num_events. # the 'end' parameter depends on whether this was called from the # middle or end of an add_data call and is used in subclasses if self.occurrences[key].count() >= self.rules['num_events']: event = self.occurrences[key].data[-1][0] if self.attach_related: event['related_events'] = [data[0] for data in self.occurrences[key].data[:-1]] self.add_match(event) self.occurrences.pop(key) def garbage_collect(self, timestamp): """ Remove all occurrence data that is beyond the timeframe away """ stale_keys = [] for key, window in self.occurrences.items(): if timestamp - lookup_es_key(window.data[-1][0], self.ts_field) > self.rules['timeframe']: stale_keys.append(key) list(map(self.occurrences.pop, stale_keys)) def get_match_str(self, match): lt = self.rules.get('use_local_time') match_ts = lookup_es_key(match, self.ts_field) starttime = pretty_ts(dt_to_ts(ts_to_dt(match_ts) - self.rules['timeframe']), lt) endtime = pretty_ts(match_ts, lt) message = 'At least %d events occurred between %s and %s\n\n' % (self.rules['num_events'], starttime, endtime) return message class AnyRule(RuleType): """ A rule that will match on any input data """ def add_data(self, data): for datum in data: self.add_match(datum) class EventWindow(object): """ A container for hold event counts for rules which need a chronological ordered event window. """ def __init__(self, timeframe, onRemoved=None, getTimestamp=new_get_event_ts('@timestamp')): self.timeframe = timeframe self.onRemoved = onRemoved self.get_ts = getTimestamp self.data = sortedlist(key=self.get_ts) self.running_count = 0 def clear(self): self.data = sortedlist(key=self.get_ts) self.running_count = 0 def append(self, event): """ Add an event to the window. Event should be of the form (dict, count). This will also pop the oldest events and call onRemoved on them until the window size is less than timeframe. """ self.data.add(event) self.running_count += event[1] while self.duration() >= self.timeframe: oldest = self.data[0] self.data.remove(oldest) self.running_count -= oldest[1] self.onRemoved and self.onRemoved(oldest) def duration(self): """ Get the size in timedelta of the window. """ if not self.data: return datetime.timedelta(0) return self.get_ts(self.data[-1]) - self.get_ts(self.data[0]) def count(self): """ Count the number of events in the window. """ return self.running_count def mean(self): """ Compute the mean of the value_field in the window. """ if len(self.data) > 0: datasum = 0 datalen = 0 for dat in self.data: if "placeholder" not in dat[0]: datasum += dat[1] datalen += 1 if datalen > 0: return datasum / float(datalen) return None else: return None def __iter__(self): return iter(self.data) def append_middle(self, event): """ Attempt to place the event in the correct location in our deque. Returns True if successful, otherwise False. """ rotation = 0 ts = self.get_ts(event) # Append left if ts is earlier than first event if self.get_ts(self.data[0]) > ts: self.data.appendleft(event) self.running_count += event[1] return # Rotate window until we can insert event while self.get_ts(self.data[-1]) > ts: self.data.rotate(1) rotation += 1 if rotation == len(self.data): # This should never happen return self.data.append(event) self.running_count += event[1] self.data.rotate(-rotation) class SpikeRule(RuleType): """ A rule that uses two sliding windows to compare relative event frequency. """ required_options = frozenset(['timeframe', 'spike_height', 'spike_type']) def __init__(self, *args): super(SpikeRule, self).__init__(*args) self.timeframe = self.rules['timeframe'] self.ref_windows = {} self.cur_windows = {} self.ts_field = self.rules.get('timestamp_field', '@timestamp') self.get_ts = new_get_event_ts(self.ts_field) self.first_event = {} self.skip_checks = {} self.field_value = self.rules.get('field_value') self.ref_window_filled_once = False def add_count_data(self, data): """ Add count data to the rule. Data should be of the form {ts: count}. """ if len(data) > 1: raise EAException('add_count_data can only accept one count at a time') for ts, count in data.items(): self.handle_event({self.ts_field: ts}, count, 'all') def add_terms_data(self, terms): for timestamp, buckets in terms.items(): for bucket in buckets: count = bucket['doc_count'] event = {self.ts_field: timestamp, self.rules['query_key']: bucket['key']} key = bucket['key'] self.handle_event(event, count, key) def add_data(self, data): for event in data: qk = self.rules.get('query_key', 'all') if qk != 'all': qk = hashable(lookup_es_key(event, qk)) if qk is None: qk = 'other' if self.field_value is not None: count = lookup_es_key(event, self.field_value) if count is not None: try: count = int(count) except ValueError: elastalert_logger.warn('{} is not a number: {}'.format(self.field_value, count)) else: self.handle_event(event, count, qk) else: self.handle_event(event, 1, qk) def clear_windows(self, qk, event): # Reset the state and prevent alerts until windows filled again self.ref_windows[qk].clear() self.first_event.pop(qk) self.skip_checks[qk] = lookup_es_key(event, self.ts_field) + self.rules['timeframe'] * 2 def handle_event(self, event, count, qk='all'): self.first_event.setdefault(qk, event) self.ref_windows.setdefault(qk, EventWindow(self.timeframe, getTimestamp=self.get_ts)) self.cur_windows.setdefault(qk, EventWindow(self.timeframe, self.ref_windows[qk].append, self.get_ts)) self.cur_windows[qk].append((event, count)) # Don't alert if ref window has not yet been filled for this key AND if lookup_es_key(event, self.ts_field) - self.first_event[qk][self.ts_field] < self.rules['timeframe'] * 2: # ElastAlert has not been running long enough for any alerts OR if not self.ref_window_filled_once: return # This rule is not using alert_on_new_data (with query_key) OR if not (self.rules.get('query_key') and self.rules.get('alert_on_new_data')): return # An alert for this qk has recently fired if qk in self.skip_checks and lookup_es_key(event, self.ts_field) < self.skip_checks[qk]: return else: self.ref_window_filled_once = True if self.field_value is not None: if self.find_matches(self.ref_windows[qk].mean(), self.cur_windows[qk].mean()): # skip over placeholder events for match, count in self.cur_windows[qk].data: if "placeholder" not in match: break self.add_match(match, qk) self.clear_windows(qk, match) else: if self.find_matches(self.ref_windows[qk].count(), self.cur_windows[qk].count()): # skip over placeholder events which have count=0 for match, count in self.cur_windows[qk].data: if count: break self.add_match(match, qk) self.clear_windows(qk, match) def add_match(self, match, qk): extra_info = {} if self.field_value is None: spike_count = self.cur_windows[qk].count() reference_count = self.ref_windows[qk].count() else: spike_count = self.cur_windows[qk].mean() reference_count = self.ref_windows[qk].mean() extra_info = {'spike_count': spike_count, 'reference_count': reference_count} match = dict(list(match.items()) + list(extra_info.items())) super(SpikeRule, self).add_match(match) def find_matches(self, ref, cur): """ Determines if an event spike or dip happening. """ # Apply threshold limits if self.field_value is None: if (cur < self.rules.get('threshold_cur', 0) or ref < self.rules.get('threshold_ref', 0)): return False elif ref is None or ref == 0 or cur is None or cur == 0: return False spike_up, spike_down = False, False if cur <= ref / self.rules['spike_height']: spike_down = True if cur >= ref * self.rules['spike_height']: spike_up = True if (self.rules['spike_type'] in ['both', 'up'] and spike_up) or \ (self.rules['spike_type'] in ['both', 'down'] and spike_down): return True return False def get_match_str(self, match): if self.field_value is None: message = 'An abnormal number (%d) of events occurred around %s.\n' % ( match['spike_count'], pretty_ts(match[self.rules['timestamp_field']], self.rules.get('use_local_time')) ) message += 'Preceding that time, there were only %d events within %s\n\n' % (match['reference_count'], self.rules['timeframe']) else: message = 'An abnormal average value (%.2f) of field \'%s\' occurred around %s.\n' % ( match['spike_count'], self.field_value, pretty_ts(match[self.rules['timestamp_field']], self.rules.get('use_local_time')) ) message += 'Preceding that time, the field had an average value of (%.2f) within %s\n\n' % ( match['reference_count'], self.rules['timeframe']) return message def garbage_collect(self, ts): # Windows are sized according to their newest event # This is a placeholder to accurately size windows in the absence of events for qk in list(self.cur_windows.keys()): # If we havn't seen this key in a long time, forget it if qk != 'all' and self.ref_windows[qk].count() == 0 and self.cur_windows[qk].count() == 0: self.cur_windows.pop(qk) self.ref_windows.pop(qk) continue placeholder = {self.ts_field: ts, "placeholder": True} # The placeholder may trigger an alert, in which case, qk will be expected if qk != 'all': placeholder.update({self.rules['query_key']: qk}) self.handle_event(placeholder, 0, qk) class FlatlineRule(FrequencyRule): """ A rule that matches when there is a low number of events given a timeframe. """ required_options = frozenset(['timeframe', 'threshold']) def __init__(self, *args): super(FlatlineRule, self).__init__(*args) self.threshold = self.rules['threshold'] # Dictionary mapping query keys to the first events self.first_event = {} def check_for_match(self, key, end=True): # This function gets called between every added document with end=True after the last # We ignore the calls before the end because it may trigger false positives if not end: return most_recent_ts = self.get_ts(self.occurrences[key].data[-1]) if self.first_event.get(key) is None: self.first_event[key] = most_recent_ts # Don't check for matches until timeframe has elapsed if most_recent_ts - self.first_event[key] < self.rules['timeframe']: return # Match if, after removing old events, we hit num_events count = self.occurrences[key].count() if count < self.rules['threshold']: # Do a deep-copy, otherwise we lose the datetime type in the timestamp field of the last event event = copy.deepcopy(self.occurrences[key].data[-1][0]) event.update(key=key, count=count) self.add_match(event) if not self.rules.get('forget_keys'): # After adding this match, leave the occurrences windows alone since it will # be pruned in the next add_data or garbage_collect, but reset the first_event # so that alerts continue to fire until the threshold is passed again. least_recent_ts = self.get_ts(self.occurrences[key].data[0]) timeframe_ago = most_recent_ts - self.rules['timeframe'] self.first_event[key] = min(least_recent_ts, timeframe_ago) else: # Forget about this key until we see it again self.first_event.pop(key) self.occurrences.pop(key) def get_match_str(self, match): ts = match[self.rules['timestamp_field']] lt = self.rules.get('use_local_time') message = 'An abnormally low number of events occurred around %s.\n' % (pretty_ts(ts, lt)) message += 'Between %s and %s, there were less than %s events.\n\n' % ( pretty_ts(dt_to_ts(ts_to_dt(ts) - self.rules['timeframe']), lt), pretty_ts(ts, lt), self.rules['threshold'] ) return message def garbage_collect(self, ts): # We add an event with a count of zero to the EventWindow for each key. This will cause the EventWindow # to remove events that occurred more than one `timeframe` ago, and call onRemoved on them. default = ['all'] if 'query_key' not in self.rules else [] for key in list(self.occurrences.keys()) or default: self.occurrences.setdefault( key, EventWindow(self.rules['timeframe'], getTimestamp=self.get_ts) ).append( ({self.ts_field: ts}, 0) ) self.first_event.setdefault(key, ts) self.check_for_match(key) class NewTermsRule(RuleType): """ Alerts on a new value in a list of fields. """ def __init__(self, rule, args=None): super(NewTermsRule, self).__init__(rule, args) self.seen_values = {} # Allow the use of query_key or fields if 'fields' not in self.rules: if 'query_key' not in self.rules: raise EAException("fields or query_key must be specified") self.fields = self.rules['query_key'] else: self.fields = self.rules['fields'] if not self.fields: raise EAException("fields must not be an empty list") if type(self.fields) != list: self.fields = [self.fields] if self.rules.get('use_terms_query') and \ (len(self.fields) != 1 or (len(self.fields) == 1 and type(self.fields[0]) == list)): raise EAException("use_terms_query can only be used with a single non-composite field") if self.rules.get('use_terms_query'): if [self.rules['query_key']] != self.fields: raise EAException('If use_terms_query is specified, you cannot specify different query_key and fields') if not self.rules.get('query_key').endswith('.keyword') and not self.rules.get('query_key').endswith('.raw'): if self.rules.get('use_keyword_postfix', True): elastalert_logger.warn('Warning: If query_key is a non-keyword field, you must set ' 'use_keyword_postfix to false, or add .keyword/.raw to your query_key.') try: self.get_all_terms(args) except Exception as e: # Refuse to start if we cannot get existing terms raise EAException('Error searching for existing terms: %s' % (repr(e))).with_traceback(sys.exc_info()[2]) def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = elasticsearch_client(self.rules) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and hasattr(args, 'start') and args.start: end = ts_to_dt(args.start) elif 'start_date' in self.rules: end = ts_to_dt(self.rules['start_date']) else: end = ts_now() start = end - window_size step = datetime.timedelta(**self.rules.get('window_step_size', {'days': 1})) for field in self.fields: tmp_start = start tmp_end = min(start + step, end) time_filter = {self.rules['timestamp_field']: {'lt': self.rules['dt_to_ts'](tmp_end), 'gte': self.rules['dt_to_ts'](tmp_start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} if 'filter' in self.rules: for item in self.rules['filter']: query_template['filter']['bool']['must'].append(item) # For composite keys, we will need to perform sub-aggregations if type(field) == list: self.seen_values.setdefault(tuple(field), []) level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): if self.rules.get('use_keyword_postfix', True): level['values']['terms']['field'] = add_raw_postfix(sub_field, self.is_five_or_above()) else: level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: self.seen_values.setdefault(field, []) # For non-composite keys, only a single agg is needed if self.rules.get('use_keyword_postfix', True): field_name['field'] = add_raw_postfix(field, self.is_five_or_above()) else: field_name['field'] = field # Query the entire time range in small chunks while tmp_start < end: if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], tmp_start, tmp_end) else: index = self.rules['index'] res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] += keys else: if type(field) == list: self.seen_values.setdefault(tuple(field), []) else: self.seen_values.setdefault(field, []) if tmp_start == tmp_end: break tmp_start = tmp_end tmp_end = min(tmp_start + step, end) time_filter[self.rules['timestamp_field']] = {'lt': self.rules['dt_to_ts'](tmp_end), 'gte': self.rules['dt_to_ts'](tmp_start)} for key, values in self.seen_values.items(): if not values: if type(key) == tuple: # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: elastalert_logger.info('Found no values for %s' % (field)) continue self.seen_values[key] = list(set(values)) elastalert_logger.info('Found %s unique values for %s' % (len(set(values)), key)) def flatten_aggregation_hierarchy(self, root, hierarchy_tuple=()): """ For nested aggregations, the results come back in the following format: { "aggregations" : { "filtered" : { "doc_count" : 37, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "1.1.1.1", # IP address (root) "doc_count" : 13, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "80", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 1 } ] } }, { "key" : "82", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 } ] } } ] } }, { "key" : "2.2.2.2", # IP address (root) "doc_count" : 4, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "443", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 } ] } } ] } } ] } } } } Each level will either have more values and buckets, or it will be a leaf node We'll ultimately return a flattened list with the hierarchies appended as strings, e.g the above snippet would yield a list with: [ ('1.1.1.1', '80', 'ack'), ('1.1.1.1', '80', 'syn'), ('1.1.1.1', '82', 'ack'), ('1.1.1.1', '82', 'syn'), ('2.2.2.2', '443', 'ack'), ('2.2.2.2', '443', 'syn') ] A similar formatting will be performed in the add_data method and used as the basis for comparison """ results = [] # There are more aggregation hierarchies left. Traverse them. if 'values' in root: results += self.flatten_aggregation_hierarchy(root['values']['buckets'], hierarchy_tuple + (root['key'],)) else: # We've gotten to a sub-aggregation, which may have further sub-aggregations # See if we need to traverse further for node in root: if 'values' in node: results += self.flatten_aggregation_hierarchy(node, hierarchy_tuple) else: results.append(hierarchy_tuple + (node['key'],)) return results def add_data(self, data): for document in data: for field in self.fields: value = () lookup_field = field if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups lookup_field = tuple(field) for sub_field in field: lookup_result = lookup_es_key(document, sub_field) if not lookup_result: value = None break value += (lookup_result,) else: value = lookup_es_key(document, field) if not value and self.rules.get('alert_on_missing_field'): document['missing_field'] = lookup_field self.add_match(copy.deepcopy(document)) elif value: if value not in self.seen_values[lookup_field]: document['new_field'] = lookup_field self.add_match(copy.deepcopy(document)) self.seen_values[lookup_field].append(value) def add_terms_data(self, terms): # With terms query, len(self.fields) is always 1 and the 0'th entry is always a string field = self.fields[0] for timestamp, buckets in terms.items(): for bucket in buckets: if bucket['doc_count']: if bucket['key'] not in self.seen_values[field]: match = {field: bucket['key'], self.rules['timestamp_field']: timestamp, 'new_field': field} self.add_match(match) self.seen_values[field].append(bucket['key']) def is_five_or_above(self): version = self.es.info()['version']['number'] return int(version[0]) >= 5 class CardinalityRule(RuleType): """ A rule that matches if cardinality of a field is above or below a threshold within a timeframe """ required_options = frozenset(['timeframe', 'cardinality_field']) def __init__(self, *args): super(CardinalityRule, self).__init__(*args) if 'max_cardinality' not in self.rules and 'min_cardinality' not in self.rules: raise EAException("CardinalityRule must have one of either max_cardinality or min_cardinality") self.ts_field = self.rules.get('timestamp_field', '@timestamp') self.cardinality_field = self.rules['cardinality_field'] self.cardinality_cache = {} self.first_event = {} self.timeframe = self.rules['timeframe'] def add_data(self, data): qk = self.rules.get('query_key') for event in data: if qk: key = hashable(lookup_es_key(event, qk)) else: # If no query_key, we use the key 'all' for all events key = 'all' self.cardinality_cache.setdefault(key, {}) self.first_event.setdefault(key, lookup_es_key(event, self.ts_field)) value = hashable(lookup_es_key(event, self.cardinality_field)) if value is not None: # Store this timestamp as most recent occurence of the term self.cardinality_cache[key][value] = lookup_es_key(event, self.ts_field) self.check_for_match(key, event) def check_for_match(self, key, event, gc=True): # Check to see if we are past max/min_cardinality for a given key time_elapsed = lookup_es_key(event, self.ts_field) - self.first_event.get(key, lookup_es_key(event, self.ts_field)) timeframe_elapsed = time_elapsed > self.timeframe if (len(self.cardinality_cache[key]) > self.rules.get('max_cardinality', float('inf')) or (len(self.cardinality_cache[key]) < self.rules.get('min_cardinality', float('-inf')) and timeframe_elapsed)): # If there might be a match, run garbage collect first, as outdated terms are only removed in GC # Only run it if there might be a match so it doesn't impact performance if gc: self.garbage_collect(lookup_es_key(event, self.ts_field)) self.check_for_match(key, event, False) else: self.first_event.pop(key, None) self.add_match(event) def garbage_collect(self, timestamp): """ Remove all occurrence data that is beyond the timeframe away """ for qk, terms in list(self.cardinality_cache.items()): for term, last_occurence in list(terms.items()): if timestamp - last_occurence > self.rules['timeframe']: self.cardinality_cache[qk].pop(term) # Create a placeholder event for if a min_cardinality match occured if 'min_cardinality' in self.rules: event = {self.ts_field: timestamp} if 'query_key' in self.rules: event.update({self.rules['query_key']: qk}) self.check_for_match(qk, event, False) def get_match_str(self, match): lt = self.rules.get('use_local_time') starttime = pretty_ts(dt_to_ts(ts_to_dt(lookup_es_key(match, self.ts_field)) - self.rules['timeframe']), lt) endtime = pretty_ts(lookup_es_key(match, self.ts_field), lt) if 'max_cardinality' in self.rules: message = ('A maximum of %d unique %s(s) occurred since last alert or between %s and %s\n\n' % (self.rules['max_cardinality'], self.rules['cardinality_field'], starttime, endtime)) else: message = ('Less than %d unique %s(s) occurred since last alert or between %s and %s\n\n' % (self.rules['min_cardinality'], self.rules['cardinality_field'], starttime, endtime)) return message class BaseAggregationRule(RuleType): def __init__(self, *args): super(BaseAggregationRule, self).__init__(*args) bucket_interval = self.rules.get('bucket_interval') if bucket_interval: if 'seconds' in bucket_interval: self.rules['bucket_interval_period'] = str(bucket_interval['seconds']) + 's' elif 'minutes' in bucket_interval: self.rules['bucket_interval_period'] = str(bucket_interval['minutes']) + 'm' elif 'hours' in bucket_interval: self.rules['bucket_interval_period'] = str(bucket_interval['hours']) + 'h' elif 'days' in bucket_interval: self.rules['bucket_interval_period'] = str(bucket_interval['days']) + 'd' elif 'weeks' in bucket_interval: self.rules['bucket_interval_period'] = str(bucket_interval['weeks']) + 'w' else: raise EAException("Unsupported window size") if self.rules.get('use_run_every_query_size'): if total_seconds(self.rules['run_every']) % total_seconds(self.rules['bucket_interval_timedelta']) != 0: raise EAException("run_every must be evenly divisible by bucket_interval if specified") else: if total_seconds(self.rules['buffer_time']) % total_seconds(self.rules['bucket_interval_timedelta']) != 0: raise EAException("Buffer_time must be evenly divisible by bucket_interval if specified") def generate_aggregation_query(self): raise NotImplementedError() def add_aggregation_data(self, payload): for timestamp, payload_data in payload.items(): if 'interval_aggs' in payload_data: self.unwrap_interval_buckets(timestamp, None, payload_data['interval_aggs']['buckets']) elif 'bucket_aggs' in payload_data: self.unwrap_term_buckets(timestamp, payload_data['bucket_aggs']['buckets']) else: self.check_matches(timestamp, None, payload_data) def unwrap_interval_buckets(self, timestamp, query_key, interval_buckets): for interval_data in interval_buckets: # Use bucket key here instead of start_time for more accurate match timestamp self.check_matches(ts_to_dt(interval_data['key_as_string']), query_key, interval_data) def unwrap_term_buckets(self, timestamp, term_buckets): for term_data in term_buckets: if 'interval_aggs' in term_data: self.unwrap_interval_buckets(timestamp, term_data['key'], term_data['interval_aggs']['buckets']) else: self.check_matches(timestamp, term_data['key'], term_data) def check_matches(self, timestamp, query_key, aggregation_data): raise NotImplementedError() class MetricAggregationRule(BaseAggregationRule): """ A rule that matches when there is a low number of events given a timeframe. """ required_options = frozenset(['metric_agg_key', 'metric_agg_type']) allowed_aggregations = frozenset(['min', 'max', 'avg', 'sum', 'cardinality', 'value_count']) def __init__(self, *args): super(MetricAggregationRule, self).__init__(*args) self.ts_field = self.rules.get('timestamp_field', '@timestamp') if 'max_threshold' not in self.rules and 'min_threshold' not in self.rules: raise EAException("MetricAggregationRule must have at least one of either max_threshold or min_threshold") self.metric_key = 'metric_' + self.rules['metric_agg_key'] + '_' + self.rules['metric_agg_type'] if not self.rules['metric_agg_type'] in self.allowed_aggregations: raise EAException("metric_agg_type must be one of %s" % (str(self.allowed_aggregations))) self.rules['aggregation_query_element'] = self.generate_aggregation_query() def get_match_str(self, match): message = 'Threshold violation, %s:%s %s (min: %s max : %s) \n\n' % ( self.rules['metric_agg_type'], self.rules['metric_agg_key'], match[self.metric_key], self.rules.get('min_threshold'), self.rules.get('max_threshold') ) return message def generate_aggregation_query(self): return {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} def check_matches(self, timestamp, query_key, aggregation_data): if "compound_query_key" in self.rules: self.check_matches_recursive(timestamp, query_key, aggregation_data, self.rules['compound_query_key'], dict()) else: metric_val = aggregation_data[self.metric_key]['value'] if self.crossed_thresholds(metric_val): match = {self.rules['timestamp_field']: timestamp, self.metric_key: metric_val} if query_key is not None: match[self.rules['query_key']] = query_key self.add_match(match) def check_matches_recursive(self, timestamp, query_key, aggregation_data, compound_keys, match_data): if len(compound_keys) < 1: # shouldn't get to this point, but checking for safety return match_data[compound_keys[0]] = aggregation_data['key'] if 'bucket_aggs' in aggregation_data: for result in aggregation_data['bucket_aggs']['buckets']: self.check_matches_recursive(timestamp, query_key, result, compound_keys[1:], match_data) else: metric_val = aggregation_data[self.metric_key]['value'] if self.crossed_thresholds(metric_val): match_data[self.rules['timestamp_field']] = timestamp match_data[self.metric_key] = metric_val # add compound key to payload to allow alerts to trigger for every unique occurence compound_value = [match_data[key] for key in self.rules['compound_query_key']] match_data[self.rules['query_key']] = ",".join([str(value) for value in compound_value]) self.add_match(match_data) def crossed_thresholds(self, metric_value): if metric_value is None: return False if 'max_threshold' in self.rules and metric_value > self.rules['max_threshold']: return True if 'min_threshold' in self.rules and metric_value < self.rules['min_threshold']: return True return False class SpikeMetricAggregationRule(BaseAggregationRule, SpikeRule): """ A rule that matches when there is a spike in an aggregated event compared to its reference point """ required_options = frozenset(['metric_agg_key', 'metric_agg_type', 'spike_height', 'spike_type']) allowed_aggregations = frozenset(['min', 'max', 'avg', 'sum', 'cardinality', 'value_count']) def __init__(self, *args): # We inherit everything from BaseAggregation and Spike, overwrite only what we need in functions below super(SpikeMetricAggregationRule, self).__init__(*args) # MetricAgg alert things self.metric_key = 'metric_' + self.rules['metric_agg_key'] + '_' + self.rules['metric_agg_type'] if not self.rules['metric_agg_type'] in self.allowed_aggregations: raise EAException("metric_agg_type must be one of %s" % (str(self.allowed_aggregations))) # Disabling bucket intervals (doesn't make sense in context of spike to split up your time period) if self.rules.get('bucket_interval'): raise EAException("bucket intervals are not supported for spike aggregation alerts") self.rules['aggregation_query_element'] = self.generate_aggregation_query() def generate_aggregation_query(self): """Lifted from MetricAggregationRule, added support for scripted fields""" if self.rules.get('metric_agg_script'): return {self.metric_key: {self.rules['metric_agg_type']: self.rules['metric_agg_script']}} return {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} def add_aggregation_data(self, payload): """ BaseAggregationRule.add_aggregation_data unpacks our results and runs checks directly against hardcoded cutoffs. We instead want to use all of our SpikeRule.handle_event inherited logic (current/reference) from the aggregation's "value" key to determine spikes from aggregations """ for timestamp, payload_data in payload.items(): if 'bucket_aggs' in payload_data: self.unwrap_term_buckets(timestamp, payload_data['bucket_aggs']) else: # no time / term split, just focus on the agg event = {self.ts_field: timestamp} agg_value = payload_data[self.metric_key]['value'] self.handle_event(event, agg_value, 'all') return def unwrap_term_buckets(self, timestamp, term_buckets, qk=[]): """ create separate spike event trackers for each term, handle compound query keys """ for term_data in term_buckets['buckets']: qk.append(term_data['key']) # handle compound query keys (nested aggregations) if term_data.get('bucket_aggs'): self.unwrap_term_buckets(timestamp, term_data['bucket_aggs'], qk) # reset the query key to consider the proper depth for N > 2 del qk[-1] continue qk_str = ','.join(qk) agg_value = term_data[self.metric_key]['value'] event = {self.ts_field: timestamp, self.rules['query_key']: qk_str} # pass to SpikeRule's tracker self.handle_event(event, agg_value, qk_str) # handle unpack of lowest level del qk[-1] return def get_match_str(self, match): """ Overwrite SpikeRule's message to relate to the aggregation type & field instead of count """ message = 'An abnormal {0} of {1} ({2}) occurred around {3}.\n'.format( self.rules['metric_agg_type'], self.rules['metric_agg_key'], round(match['spike_count'], 2), pretty_ts(match[self.rules['timestamp_field']], self.rules.get('use_local_time')) ) message += 'Preceding that time, there was a {0} of {1} of ({2}) within {3}\n\n'.format( self.rules['metric_agg_type'], self.rules['metric_agg_key'], round(match['reference_count'], 2), self.rules['timeframe']) return message class PercentageMatchRule(BaseAggregationRule): required_options = frozenset(['match_bucket_filter']) def __init__(self, *args): super(PercentageMatchRule, self).__init__(*args) self.ts_field = self.rules.get('timestamp_field', '@timestamp') if 'max_percentage' not in self.rules and 'min_percentage' not in self.rules: raise EAException("PercentageMatchRule must have at least one of either min_percentage or max_percentage") self.min_denominator = self.rules.get('min_denominator', 0) self.match_bucket_filter = self.rules['match_bucket_filter'] self.rules['aggregation_query_element'] = self.generate_aggregation_query() def get_match_str(self, match): percentage_format_string = self.rules.get('percentage_format_string', None) message = 'Percentage violation, value: %s (min: %s max : %s) of %s items\n\n' % ( percentage_format_string % (match['percentage']) if percentage_format_string else match['percentage'], self.rules.get('min_percentage'), self.rules.get('max_percentage'), match['denominator'] ) return message def generate_aggregation_query(self): return { 'percentage_match_aggs': { 'filters': { 'other_bucket': True, 'filters': { 'match_bucket': { 'bool': { 'must': self.match_bucket_filter } } } } } } def check_matches(self, timestamp, query_key, aggregation_data): match_bucket_count = aggregation_data['percentage_match_aggs']['buckets']['match_bucket']['doc_count'] other_bucket_count = aggregation_data['percentage_match_aggs']['buckets']['_other_']['doc_count'] if match_bucket_count is None or other_bucket_count is None: return else: total_count = other_bucket_count + match_bucket_count if total_count == 0 or total_count < self.min_denominator: return else: match_percentage = (match_bucket_count * 1.0) / (total_count * 1.0) * 100 if self.percentage_violation(match_percentage): match = {self.rules['timestamp_field']: timestamp, 'percentage': match_percentage, 'denominator': total_count} if query_key is not None: match[self.rules['query_key']] = query_key self.add_match(match) def percentage_violation(self, match_percentage): if 'max_percentage' in self.rules and match_percentage > self.rules['max_percentage']: return True if 'min_percentage' in self.rules and match_percentage < self.rules['min_percentage']: return True return False elastalert-0.2.4/elastalert/schema.yaml000066400000000000000000000274771364615736500202020ustar00rootroot00000000000000$schema: http://json-schema.org/draft-07/schema# definitions: # Either a single string OR an array of strings arrayOfStrings: &arrayOfString type: [string, array] items: {type: string} # Either a single string OR an array of strings OR an array of ararys arrayOfStringsOrOtherArrays: &arrayOfStringsOrOtherArray type: [string, array] items: {type: [string, array]} timedelta: &timedelta type: object additionalProperties: false properties: days: {type: number} weeks: {type: number} hours: {type: number} minutes: {type: number} seconds: {type: number} milliseconds: {type: number} timeFrame: &timeframe type: object additionalProperties: false properties: days: {type: number} weeks: {type: number} hours: {type: number} minutes: {type: number} seconds: {type: number} milliseconds: {type: number} schedule: {type: string} filter: &filter {} mattermostField: &mattermostField type: object additionalProperties: false properties: title: {type: string} value: {type: string} args: *arrayOfString short: {type: boolean} required: [type, index, alert] type: object ### Rule Types section oneOf: - title: Any properties: type: {enum: [any]} - title: Blacklist required: [blacklist, compare_key] properties: type: {enum: [blacklist]} compare_key: {'items': {'type': 'string'},'type': ['string', 'array']} blacklist: {type: array, items: {type: string}} - title: Whitelist required: [whitelist, compare_key, ignore_null] properties: type: {enum: [whitelist]} compare_key: {'items': {'type': 'string'},'type': ['string', 'array']} whitelist: {type: array, items: {type: string}} ignore_null: {type: boolean} - title: Change required: [query_key, compare_key, ignore_null] properties: type: {enum: [change]} compare_key: {'items': {'type': 'string'},'type': ['string', 'array']} ignore_null: {type: boolean} timeframe: *timeframe - title: Frequency required: [num_events, timeframe] properties: type: {enum: [frequency]} num_events: {type: integer} timeframe: *timeframe use_count_query: {type: boolean} doc_type: {type: string} use_terms_query: {type: boolean} terms_size: {type: integer} attach_related: {type: boolean} - title: Spike required: [spike_height, spike_type, timeframe] properties: type: {enum: [spike]} spike_height: {type: number} spike_type: {enum: ["up", "down", "both"]} timeframe: *timeframe use_count_query: {type: boolean} doc_type: {type: string} use_terms_query: {type: boolean} terms_size: {type: integer} alert_on_new_data: {type: boolean} threshold_ref: {type: integer} threshold_cur: {type: integer} - title: Spike Aggregation required: [spike_height, spike_type, timeframe] properties: type: {enum: [spike_aggregation]} spike_height: {type: number} spike_type: {enum: ["up", "down", "both"]} metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count"]} timeframe: *timeframe use_count_query: {type: boolean} doc_type: {type: string} use_terms_query: {type: boolean} terms_size: {type: integer} alert_on_new_data: {type: boolean} threshold_ref: {type: number} threshold_cur: {type: number} min_doc_count: {type: integer} - title: Flatline required: [threshold, timeframe] properties: type: {enum: [flatline]} timeframe: *timeframe threshold: {type: integer} use_count_query: {type: boolean} doc_type: {type: string} - title: New Term required: [] properties: type: {enum: [new_term]} fields: *arrayOfStringsOrOtherArray terms_window_size: *timeframe alert_on_missing_field: {type: boolean} use_terms_query: {type: boolean} terms_size: {type: integer} - title: Cardinality required: [cardinality_field, timeframe] properties: type: {enum: [cardinality]} max_cardinality: {type: integer} min_cardinality: {type: integer} cardinality_field: {type: string} timeframe: *timeframe - title: Metric Aggregation required: [metric_agg_key,metric_agg_type] properties: type: {enum: [metric_aggregation]} metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count"]} #timeframe: *timeframe - title: Percentage Match required: [match_bucket_filter] properties: type: {enum: [percentage_match]} - title: Custom Rule from Module properties: # custom rules include a period in the rule type type: {pattern: "[.]"} properties: # Common Settings es_host: {type: string} es_port: {type: integer} index: {type: string} name: {type: string} use_ssl: {type: boolean} verify_certs: {type: boolean} es_username: {type: string} es_password: {type: string} use_strftime_index: {type: boolean} # Optional Settings import: {type: string} aggregation: *timeframe realert: *timeframe exponential_realert: *timeframe buffer_time: *timeframe query_delay: *timeframe max_query_size: {type: integer} max_scrolling: {type: integer} owner: {type: string} priority: {type: integer} filter : type: [array, object] items: *filter additionalProperties: false properties: download_dashboard: {type: string} include: {type: array, items: {type: string}} top_count_keys: {type: array, items: {type: string}} top_count_number: {type: integer} raw_count_keys: {type: boolean} generate_kibana_link: {type: boolean} kibana_dashboard: {type: string} use_kibana_dashboard: {type: string} use_local_time: {type: boolean} match_enhancements: {type: array, items: {type: string}} query_key: *arrayOfString replace_dots_in_field_names: {type: boolean} scan_entire_timeframe: {type: boolean} ### Kibana Discover App Link generate_kibana_discover_url: {type: boolean} kibana_discover_app_url: {type: string, format: uri} kibana_discover_version: {type: string, enum: ['7.3', '7.2', '7.1', '7.0', '6.8', '6.7', '6.6', '6.5', '6.4', '6.3', '6.2', '6.1', '6.0', '5.6']} kibana_discover_index_pattern_id: {type: string, minLength: 1} kibana_discover_columns: {type: array, items: {type: string, minLength: 1}, minItems: 1} kibana_discover_from_timedelta: *timedelta kibana_discover_to_timedelta: *timedelta # Alert Content alert_text: {type: string} # Python format string alert_text_args: {type: array, items: {type: string}} alert_text_kw: {type: object} alert_text_type: {enum: [alert_text_only, exclude_fields, aggregation_summary_only]} alert_missing_value: {type: string} timestamp_field: {type: string} field: {} ### Commands command: *arrayOfString pipe_match_json: {type: boolean} fail_on_non_zero_exit: {type: boolean} ### Email email: *arrayOfString email_reply_to: {type: string} notify_email: *arrayOfString # if rule is slow or erroring, send to this email smtp_host: {type: string} from_addr: {type: string} ### JIRA jira_server: {type: string} jira_project: {type: string} jira_issuetype: {type: string} jira_account_file: {type: string} # a Yaml file that includes the keys {user:, password:} jira_assignee: {type: string} jira_component: *arrayOfString jira_components: *arrayOfString jira_label: *arrayOfString jira_labels: *arrayOfString jira_bump_tickets: {type: boolean} jira_bump_in_statuses: *arrayOfString jira_bump_not_in_statuses: *arrayOfString jira_max_age: {type: number} jira_watchers: *arrayOfString ### HipChat hipchat_auth_token: {type: string} hipchat_room_id: {type: [string, integer]} hipchat_domain: {type: string} hipchat_ignore_ssl_errors: {type: boolean} hipchat_notify: {type: boolean} hipchat_from: {type: string} hipchat_mentions: {type: array, items: {type: string}} ### Stride stride_access_token: {type: string} stride_cloud_id: {type: string} stride_conversation_id: {type: string} stride_ignore_ssl_errors: {type: boolean} ### Slack slack_webhook_url: *arrayOfString slack_username_override: {type: string} slack_emoji_override: {type: string} slack_icon_url_override: {type: string} slack_msg_color: {enum: [good, warning, danger]} slack_parse_override: {enum: [none, full]} slack_text_string: {type: string} slack_ignore_ssl_errors: {type: boolean} slack_ca_certs: {type: string} slack_attach_kibana_discover_url {type: boolean} slack_kibana_discover_color {type: string} slack_kibana_discover_title {type: string} ### Mattermost mattermost_webhook_url: *arrayOfString mattermost_proxy: {type: string} mattermost_ignore_ssl_errors: {type: boolean} mattermost_username_override: {type: string} mattermost_icon_url_override: {type: string} mattermost_channel_override: {type: string} mattermost_msg_color: {enum: [good, warning, danger]} mattermost_msg_pretext: {type: string} mattermost_msg_fields: *mattermostField ## Opsgenie opsgenie_details: type: object minProperties: 1 patternProperties: "^.+$": oneOf: - type: string - type: object additionalProperties: false required: [field] properties: field: {type: string, minLength: 1} ### PagerDuty pagerduty_service_key: {type: string} pagerduty_client_name: {type: string} pagerduty_event_type: {enum: [none, trigger, resolve, acknowledge]} ### PagerTree pagertree_integration_url: {type: string} ### Exotel exotel_account_sid: {type: string} exotel_auth_token: {type: string} exotel_to_number: {type: string} exotel_from_number: {type: string} ### Twilio twilio_account_sid: {type: string} twilio_auth_token: {type: string} twilio_to_number: {type: string} twilio_from_number: {type: string} ### VictorOps victorops_api_key: {type: string} victorops_routing_key: {type: string} victorops_message_type: {enum: [INFO, WARNING, ACKNOWLEDGEMENT, CRITICAL, RECOVERY]} victorops_entity_id: {type: string} victorops_entity_display_name: {type: string} ### Telegram telegram_bot_token: {type: string} telegram_room_id: {type: string} telegram_api_url: {type: string} ### Gitter gitter_webhook_url: {type: string} gitter_proxy: {type: string} gitter_msg_level: {enum: [info, error]} ### Alerta alerta_api_url: {type: string} alerta_api_key: {type: string} alerta_severity: {enum: [unknown, security, debug, informational, ok, normal, cleared, indeterminate, warning, minor, major, critical]} alerta_resource: {type: string} # Python format string alerta_environment: {type: string} # Python format string alerta_origin: {type: string} # Python format string alerta_group: {type: string} # Python format string alerta_service: {type: array, items: {type: string}} # Python format string alerta_service: {type: array, items: {type: string}} # Python format string alerta_correlate: {type: array, items: {type: string}} # Python format string alerta_tags: {type: array, items: {type: string}} # Python format string alerta_event: {type: string} # Python format string alerta_customer: {type: string} alerta_text: {type: string} # Python format string alerta_type: {type: string} alerta_value: {type: string} # Python format string alerta_attributes_keys: {type: array, items: {type: string}} alerta_attributes_values: {type: array, items: {type: string}} # Python format string alerta_new_style_string_format: {type: boolean} ### Simple simple_webhook_url: *arrayOfString simple_proxy: {type: string} ### LineNotify linenotify_access_token: {type: string} ### Zabbix zbx_sender_host: {type: string} zbx_sender_port: {type: integer} zbx_host: {type: string} zbx_item: {type: string} elastalert-0.2.4/elastalert/test_rule.py000066400000000000000000000435411364615736500204240ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import copy import datetime import json import logging import random import re import string import sys import mock from elastalert.config import load_conf from elastalert.elastalert import ElastAlerter from elastalert.util import EAException from elastalert.util import elasticsearch_client from elastalert.util import lookup_es_key from elastalert.util import ts_now from elastalert.util import ts_to_dt logging.getLogger().setLevel(logging.INFO) logging.getLogger('elasticsearch').setLevel(logging.WARNING) """ Error Codes: 1: Error connecting to ElasticSearch 2: Error querying ElasticSearch 3: Invalid Rule 4: Missing/invalid timestamp """ def print_terms(terms, parent): """ Prints a list of flattened dictionary keys """ for term in terms: if type(terms[term]) != dict: print('\t' + parent + term) else: print_terms(terms[term], parent + term + '.') class MockElastAlerter(object): def __init__(self): self.data = [] self.formatted_output = {} def test_file(self, conf, args): """ Loads a rule config file, performs a query over the last day (args.days), lists available keys and prints the number of results. """ if args.schema_only: return [] # Set up Elasticsearch client and query es_client = elasticsearch_client(conf) try: ElastAlerter.modify_rule_for_ES5(conf) except EAException as ea: print('Invalid filter provided:', str(ea), file=sys.stderr) if args.stop_error: exit(3) return None except Exception as e: print("Error connecting to ElasticSearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(1) return None start_time = ts_now() - datetime.timedelta(days=args.days) end_time = ts_now() ts = conf.get('timestamp_field', '@timestamp') query = ElastAlerter.get_query( conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, to_ts_func=conf['dt_to_ts'], five=conf['five'] ) index = ElastAlerter.get_index(conf, start_time, end_time) # Get one document for schema try: res = es_client.search(index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(3) return None num_hits = len(res['hits']['hits']) if not num_hits: print("Didn't get any results.") return [] terms = res['hits']['hits'][0]['_source'] doc_type = res['hits']['hits'][0]['_type'] # Get a count of all docs count_query = ElastAlerter.get_query( conf['filter'], starttime=start_time, endtime=end_time, timestamp_field=ts, to_ts_func=conf['dt_to_ts'], sort=False, five=conf['five'] ) try: res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(2) return None num_hits = res['count'] if args.formatted_output: self.formatted_output['hits'] = num_hits self.formatted_output['days'] = args.days self.formatted_output['terms'] = list(terms.keys()) self.formatted_output['result'] = terms else: print("Got %s hits from the last %s day%s" % (num_hits, args.days, 's' if args.days > 1 else '')) print("\nAvailable terms in first hit:") print_terms(terms, '') # Check for missing keys pk = conf.get('primary_key') ck = conf.get('compare_key') if pk and not lookup_es_key(terms, pk): print("Warning: primary key %s is either missing or null!", file=sys.stderr) if ck and not lookup_es_key(terms, ck): print("Warning: compare key %s is either missing or null!", file=sys.stderr) include = conf.get('include') if include: for term in include: if not lookup_es_key(terms, term) and '*' not in term: print("Included term %s may be missing or null" % (term), file=sys.stderr) for term in conf.get('top_count_keys', []): # If the index starts with 'logstash', fields with .raw will be available but won't in _source if term not in terms and not (term.endswith('.raw') and term[:-4] in terms and index.startswith('logstash')): print("top_count_key %s may be missing" % (term), file=sys.stderr) if not args.formatted_output: print('') # Newline # Download up to max_query_size (defaults to 10,000) documents to save if (args.save or args.formatted_output) and not args.count: try: res = es_client.search(index, size=args.max_query_size, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) if args.stop_error: exit(2) return None num_hits = len(res['hits']['hits']) if args.save: print("Downloaded %s documents to save" % (num_hits)) return res['hits']['hits'] def mock_count(self, rule, start, end, index): """ Mocks the effects of get_hits_count using global data instead of Elasticsearch """ count = 0 for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: count += 1 return {end: count} def mock_hits(self, rule, start, end, index, scroll=False): """ Mocks the effects of get_hits using global data instead of Elasticsearch. """ docs = [] for doc in self.data: if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: docs.append(doc) # Remove all fields which don't match 'include' for doc in docs: fields_to_remove = [] for field in doc: if field != '_id': if not any([re.match(incl.replace('*', '.*'), field) for incl in rule['include']]): fields_to_remove.append(field) list(map(doc.pop, fields_to_remove)) # Separate _source and _id, convert timestamps resp = [{'_source': doc, '_id': doc['_id']} for doc in docs] for doc in resp: doc['_source'].pop('_id') return ElastAlerter.process_hits(rule, resp) def mock_terms(self, rule, start, end, index, key, qk=None, size=None): """ Mocks the effects of get_hits_terms using global data instead of Elasticsearch. """ if key.endswith('.raw'): key = key[:-4] buckets = {} for doc in self.data: if key not in doc: continue if start <= ts_to_dt(doc[rule['timestamp_field']]) < end: if qk is None or doc[rule['query_key']] == qk: buckets.setdefault(doc[key], 0) buckets[doc[key]] += 1 counts = list(buckets.items()) counts.sort(key=lambda x: x[1], reverse=True) if size: counts = counts[:size] buckets = [{'key': value, 'doc_count': count} for value, count in counts] return {end: buckets} def mock_elastalert(self, elastalert): """ Replaces elastalert's get_hits functions with mocks. """ elastalert.get_hits_count = self.mock_count elastalert.get_hits_terms = self.mock_terms elastalert.get_hits = self.mock_hits elastalert.elasticsearch_client = mock.Mock() def run_elastalert(self, rule, conf, args): """ Creates an ElastAlert instance and run's over for a specific rule using either real or mock data. """ # Load and instantiate rule # Pass an args containing the context of whether we're alerting or not # It is needed to prevent unnecessary initialization of unused alerters load_modules_args = argparse.Namespace() load_modules_args.debug = not args.alert conf['rules_loader'].load_modules(rule, load_modules_args) # If using mock data, make sure it's sorted and find appropriate time range timestamp_field = rule.get('timestamp_field', '@timestamp') if args.json: if not self.data: return None try: self.data.sort(key=lambda x: x[timestamp_field]) starttime = ts_to_dt(self.data[0][timestamp_field]) endtime = self.data[-1][timestamp_field] endtime = ts_to_dt(endtime) + datetime.timedelta(seconds=1) except KeyError as e: print("All documents must have a timestamp and _id: %s" % (e), file=sys.stderr) if args.stop_error: exit(4) return None # Create mock _id for documents if it's missing used_ids = [] def get_id(): _id = ''.join([random.choice(string.letters) for i in range(16)]) if _id in used_ids: return get_id() used_ids.append(_id) return _id for doc in self.data: doc.update({'_id': doc.get('_id', get_id())}) else: if args.end: if args.end == 'NOW': endtime = ts_now() else: try: endtime = ts_to_dt(args.end) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.end)) exit(4) else: endtime = ts_now() if args.start: try: starttime = ts_to_dt(args.start) except (TypeError, ValueError): self.handle_error("%s is not a valid ISO8601 timestamp (YYYY-MM-DDTHH:MM:SS+XX:00)" % (args.start)) exit(4) else: # if days given as command line argument if args.days > 0: starttime = endtime - datetime.timedelta(days=args.days) else: # if timeframe is given in rule if 'timeframe' in rule: starttime = endtime - datetime.timedelta(seconds=rule['timeframe'].total_seconds() * 1.01) # default is 1 days / 24 hours else: starttime = endtime - datetime.timedelta(days=1) # Set run_every to cover the entire time range unless count query, terms query or agg query used # This is to prevent query segmenting which unnecessarily slows down tests if not rule.get('use_terms_query') and not rule.get('use_count_query') and not rule.get('aggregation_query_element'): conf['run_every'] = endtime - starttime # Instantiate ElastAlert to use mock config and special rule with mock.patch.object(conf['rules_loader'], 'get_hashes'): with mock.patch.object(conf['rules_loader'], 'load') as load_rules: load_rules.return_value = [rule] with mock.patch('elastalert.elastalert.load_conf') as load_conf: load_conf.return_value = conf if args.alert: client = ElastAlerter(['--verbose']) else: client = ElastAlerter(['--debug']) # Replace get_hits_* functions to use mock data if args.json: self.mock_elastalert(client) # Mock writeback to return empty results client.writeback_es = mock.MagicMock() client.writeback_es.search.return_value = {"hits": {"hits": []}} with mock.patch.object(client, 'writeback') as mock_writeback: client.run_rule(rule, endtime, starttime) if mock_writeback.call_count: if args.formatted_output: self.formatted_output['writeback'] = {} else: print("\nWould have written the following documents to writeback index (default is elastalert_status):\n") errors = False for call in mock_writeback.call_args_list: if args.formatted_output: self.formatted_output['writeback'][call[0][0]] = json.loads(json.dumps(call[0][1], default=str)) else: print("%s - %s\n" % (call[0][0], call[0][1])) if call[0][0] == 'elastalert_error': errors = True if errors and args.stop_error: exit(2) def run_rule_test(self): """ Uses args to run the various components of MockElastAlerter such as loading the file, saving data, loading data, and running. """ parser = argparse.ArgumentParser(description='Validate a rule configuration') parser.add_argument('file', metavar='rule', type=str, help='rule configuration filename') parser.add_argument('--schema-only', action='store_true', help='Show only schema errors; do not run query') parser.add_argument('--days', type=int, default=0, action='store', help='Query the previous N days with this rule') parser.add_argument('--start', dest='start', help='YYYY-MM-DDTHH:MM:SS Start querying from this timestamp.') parser.add_argument('--end', dest='end', help='YYYY-MM-DDTHH:MM:SS Query to this timestamp. (Default: present) ' 'Use "NOW" to start from current time. (Default: present)') parser.add_argument('--stop-error', action='store_true', help='Stop the entire test right after the first error') parser.add_argument('--formatted-output', action='store_true', help='Output results in formatted JSON') parser.add_argument( '--data', type=str, metavar='FILENAME', action='store', dest='json', help='A JSON file containing data to run the rule against') parser.add_argument('--alert', action='store_true', help='Use actual alerts instead of debug output') parser.add_argument( '--save-json', type=str, metavar='FILENAME', action='store', dest='save', help='A file to which documents from the last day or --days will be saved') parser.add_argument( '--use-downloaded', action='store_true', dest='use_downloaded', help='Use the downloaded ' ) parser.add_argument( '--max-query-size', type=int, default=10000, action='store', dest='max_query_size', help='Maximum size of any query') parser.add_argument( '--count-only', action='store_true', dest='count', help='Only display the number of documents matching the filter') parser.add_argument('--config', action='store', dest='config', help='Global config file.') args = parser.parse_args() defaults = { 'rules_folder': 'rules', 'es_host': 'localhost', 'es_port': 14900, 'writeback_index': 'wb', 'writeback_alias': 'wb_a', 'max_query_size': 10000, 'alert_time_limit': {'hours': 24}, 'old_query_limit': {'weeks': 1}, 'run_every': {'minutes': 5}, 'disable_rules_on_error': False, 'buffer_time': {'minutes': 45}, 'scroll_keepalive': '30s' } overwrites = { 'rules_loader': 'file', } # Set arguments that ElastAlerter needs args.verbose = args.alert args.debug = not args.alert args.es_debug = False args.es_debug_trace = False conf = load_conf(args, defaults, overwrites) rule_yaml = conf['rules_loader'].load_yaml(args.file) conf['rules_loader'].load_options(rule_yaml, conf, args.file) if args.json: with open(args.json, 'r') as data_file: self.data = json.loads(data_file.read()) else: hits = self.test_file(copy.deepcopy(rule_yaml), args) if hits and args.formatted_output: self.formatted_output['results'] = json.loads(json.dumps(hits)) if hits and args.save: with open(args.save, 'wb') as data_file: # Add _id to _source for dump [doc['_source'].update({'_id': doc['_id']}) for doc in hits] data_file.write(json.dumps([doc['_source'] for doc in hits], indent=4)) if args.use_downloaded: if hits: args.json = args.save with open(args.json, 'r') as data_file: self.data = json.loads(data_file.read()) else: self.data = [] if not args.schema_only and not args.count: self.run_elastalert(rule_yaml, conf, args) if args.formatted_output: print(json.dumps(self.formatted_output)) def main(): test_instance = MockElastAlerter() test_instance.run_rule_test() if __name__ == '__main__': main() elastalert-0.2.4/elastalert/util.py000066400000000000000000000360741364615736500173760ustar00rootroot00000000000000# -*- coding: utf-8 -*- import collections import datetime import logging import os import re import sys import dateutil.parser import pytz from six import string_types from . import ElasticSearchClient from .auth import Auth logging.basicConfig() elastalert_logger = logging.getLogger('elastalert') def get_module(module_name): """ Loads a module and returns a specific object. module_name should 'module.file.object'. Returns object or raises EAException on error. """ sys.path.append(os.getcwd()) try: module_path, module_class = module_name.rsplit('.', 1) base_module = __import__(module_path, globals(), locals(), [module_class]) module = getattr(base_module, module_class) except (ImportError, AttributeError, ValueError) as e: raise EAException("Could not import module %s: %s" % (module_name, e)).with_traceback(sys.exc_info()[2]) return module def new_get_event_ts(ts_field): """ Constructs a lambda that may be called to extract the timestamp field from a given event. :returns: A callable function that takes an event and outputs that event's timestamp field. """ return lambda event: lookup_es_key(event[0], ts_field) def _find_es_dict_by_key(lookup_dict, term): """ Performs iterative dictionary search based upon the following conditions: 1. Subkeys may either appear behind a full stop (.) or at one lookup_dict level lower in the tree. 2. No wildcards exist within the provided ES search terms (these are treated as string literals) This is necessary to get around inconsistencies in ES data. For example: {'ad.account_name': 'bob'} Or: {'csp_report': {'blocked_uri': 'bob.com'}} And even: {'juniper_duo.geoip': {'country_name': 'Democratic People's Republic of Korea'}} We want a search term of form "key.subkey.subsubkey" to match in all cases. :returns: A tuple with the first element being the dict that contains the key and the second element which is the last subkey used to access the target specified by the term. None is returned for both if the key can not be found. """ if term in lookup_dict: return lookup_dict, term # If the term does not match immediately, perform iterative lookup: # 1. Split the search term into tokens # 2. Recurrently concatenate these together to traverse deeper into the dictionary, # clearing the subkey at every successful lookup. # # This greedy approach is correct because subkeys must always appear in order, # preferring full stops and traversal interchangeably. # # Subkeys will NEVER be duplicated between an alias and a traversal. # # For example: # {'foo.bar': {'bar': 'ray'}} to look up foo.bar will return {'bar': 'ray'}, not 'ray' dict_cursor = lookup_dict while term: split_results = re.split(r'\[(\d)\]', term, maxsplit=1) if len(split_results) == 3: sub_term, index, term = split_results index = int(index) else: sub_term, index, term = split_results + [None, ''] subkeys = sub_term.split('.') subkey = '' while len(subkeys) > 0: if not dict_cursor: return {}, None subkey += subkeys.pop(0) if subkey in dict_cursor: if len(subkeys) == 0: break dict_cursor = dict_cursor[subkey] subkey = '' elif len(subkeys) == 0: # If there are no keys left to match, return None values dict_cursor = None subkey = None else: subkey += '.' if index is not None and subkey: dict_cursor = dict_cursor[subkey] if type(dict_cursor) == list and len(dict_cursor) > index: subkey = index if term: dict_cursor = dict_cursor[subkey] else: return {}, None return dict_cursor, subkey def set_es_key(lookup_dict, term, value): """ Looks up the location that the term maps to and sets it to the given value. :returns: True if the value was set successfully, False otherwise. """ value_dict, value_key = _find_es_dict_by_key(lookup_dict, term) if value_dict is not None: value_dict[value_key] = value return True return False def lookup_es_key(lookup_dict, term): """ Performs iterative dictionary search for the given term. :returns: The value identified by term or None if it cannot be found. """ value_dict, value_key = _find_es_dict_by_key(lookup_dict, term) return None if value_key is None else value_dict[value_key] def ts_to_dt(timestamp): if isinstance(timestamp, datetime.datetime): return timestamp dt = dateutil.parser.parse(timestamp) # Implicitly convert local timestamps to UTC if dt.tzinfo is None: dt = dt.replace(tzinfo=pytz.utc) return dt def dt_to_ts(dt): if not isinstance(dt, datetime.datetime): logging.warning('Expected datetime, got %s' % (type(dt))) return dt ts = dt.isoformat() # Round microseconds to milliseconds if dt.tzinfo is None: # Implicitly convert local times to UTC return ts + 'Z' # isoformat() uses microsecond accuracy and timezone offsets # but we should try to use millisecond accuracy and Z to indicate UTC return ts.replace('000+00:00', 'Z').replace('+00:00', 'Z') def ts_to_dt_with_format(timestamp, ts_format): if isinstance(timestamp, datetime.datetime): return timestamp dt = datetime.datetime.strptime(timestamp, ts_format) # Implicitly convert local timestamps to UTC if dt.tzinfo is None: dt = dt.replace(tzinfo=dateutil.tz.tzutc()) return dt def dt_to_ts_with_format(dt, ts_format): if not isinstance(dt, datetime.datetime): logging.warning('Expected datetime, got %s' % (type(dt))) return dt ts = dt.strftime(ts_format) return ts def ts_now(): return datetime.datetime.utcnow().replace(tzinfo=dateutil.tz.tzutc()) def inc_ts(timestamp, milliseconds=1): """Increment a timestamp by milliseconds.""" dt = ts_to_dt(timestamp) dt += datetime.timedelta(milliseconds=milliseconds) return dt_to_ts(dt) def pretty_ts(timestamp, tz=True): """Pretty-format the given timestamp (to be printed or logged hereafter). If tz, the timestamp will be converted to local time. Format: YYYY-MM-DD HH:MM TZ""" dt = timestamp if not isinstance(timestamp, datetime.datetime): dt = ts_to_dt(timestamp) if tz: dt = dt.astimezone(dateutil.tz.tzlocal()) return dt.strftime('%Y-%m-%d %H:%M %Z') def ts_add(ts, td): """ Allows a timedelta (td) add operation on a string timestamp (ts) """ return dt_to_ts(ts_to_dt(ts) + td) def hashable(obj): """ Convert obj to a hashable obj. We use the value of some fields from Elasticsearch as keys for dictionaries. This means that whatever Elasticsearch returns must be hashable, and it sometimes returns a list or dict.""" if not obj.__hash__: return str(obj) return obj def format_index(index, start, end, add_extra=False): """ Takes an index, specified using strftime format, start and end time timestamps, and outputs a wildcard based index string to match all possible timestamps. """ # Convert to UTC start -= start.utcoffset() end -= end.utcoffset() original_start = start indices = set() while start.date() <= end.date(): indices.add(start.strftime(index)) start += datetime.timedelta(days=1) num = len(indices) if add_extra: while len(indices) == num: original_start -= datetime.timedelta(days=1) new_index = original_start.strftime(index) assert new_index != index, "You cannot use a static index with search_extra_index" indices.add(new_index) return ','.join(indices) class EAException(Exception): pass def seconds(td): return td.seconds + td.days * 24 * 3600 def total_seconds(dt): # For python 2.6 compatability if dt is None: return 0 elif hasattr(dt, 'total_seconds'): return dt.total_seconds() else: return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10**6) / 10**6 def dt_to_int(dt): dt = dt.replace(tzinfo=None) return int(total_seconds((dt - datetime.datetime.utcfromtimestamp(0))) * 1000) def unixms_to_dt(ts): return unix_to_dt(float(ts) / 1000) def unix_to_dt(ts): dt = datetime.datetime.utcfromtimestamp(float(ts)) dt = dt.replace(tzinfo=dateutil.tz.tzutc()) return dt def dt_to_unix(dt): return int(total_seconds(dt - datetime.datetime(1970, 1, 1, tzinfo=dateutil.tz.tzutc()))) def dt_to_unixms(dt): return int(dt_to_unix(dt) * 1000) def cronite_datetime_to_timestamp(self, d): """ Converts a `datetime` object `d` into a UNIX timestamp. """ if d.tzinfo is not None: d = d.replace(tzinfo=None) - d.utcoffset() return total_seconds((d - datetime.datetime(1970, 1, 1))) def add_raw_postfix(field, is_five_or_above): if is_five_or_above: end = '.keyword' else: end = '.raw' if not field.endswith(end): field += end return field def replace_dots_in_field_names(document): """ This method destructively modifies document by replacing any dots in field names with an underscore. """ for key, value in list(document.items()): if isinstance(value, dict): value = replace_dots_in_field_names(value) if isinstance(key, string_types) and key.find('.') != -1: del document[key] document[key.replace('.', '_')] = value return document def elasticsearch_client(conf): """ returns an :class:`ElasticSearchClient` instance configured using an es_conn_config """ es_conn_conf = build_es_conn_config(conf) auth = Auth() es_conn_conf['http_auth'] = auth(host=es_conn_conf['es_host'], username=es_conn_conf['es_username'], password=es_conn_conf['es_password'], aws_region=es_conn_conf['aws_region'], profile_name=es_conn_conf['profile']) return ElasticSearchClient(es_conn_conf) def build_es_conn_config(conf): """ Given a conf dictionary w/ raw config properties 'use_ssl', 'es_host', 'es_port' 'es_username' and 'es_password', this will return a new dictionary with properly initialized values for 'es_host', 'es_port', 'use_ssl' and 'http_auth' which will be a basicauth username:password formatted string """ parsed_conf = {} parsed_conf['use_ssl'] = os.environ.get('ES_USE_SSL', False) parsed_conf['verify_certs'] = True parsed_conf['ca_certs'] = None parsed_conf['client_cert'] = None parsed_conf['client_key'] = None parsed_conf['http_auth'] = None parsed_conf['es_username'] = None parsed_conf['es_password'] = None parsed_conf['aws_region'] = None parsed_conf['profile'] = None parsed_conf['es_host'] = os.environ.get('ES_HOST', conf['es_host']) parsed_conf['es_port'] = int(os.environ.get('ES_PORT', conf['es_port'])) parsed_conf['es_url_prefix'] = '' parsed_conf['es_conn_timeout'] = conf.get('es_conn_timeout', 20) parsed_conf['send_get_body_as'] = conf.get('es_send_get_body_as', 'GET') if os.environ.get('ES_USERNAME'): parsed_conf['es_username'] = os.environ.get('ES_USERNAME') parsed_conf['es_password'] = os.environ.get('ES_PASSWORD') elif 'es_username' in conf: parsed_conf['es_username'] = conf['es_username'] parsed_conf['es_password'] = conf['es_password'] if 'aws_region' in conf: parsed_conf['aws_region'] = conf['aws_region'] # Deprecated if 'boto_profile' in conf: logging.warning('Found deprecated "boto_profile", use "profile" instead!') parsed_conf['profile'] = conf['boto_profile'] if 'profile' in conf: parsed_conf['profile'] = conf['profile'] if 'use_ssl' in conf: parsed_conf['use_ssl'] = conf['use_ssl'] if 'verify_certs' in conf: parsed_conf['verify_certs'] = conf['verify_certs'] if 'ca_certs' in conf: parsed_conf['ca_certs'] = conf['ca_certs'] if 'client_cert' in conf: parsed_conf['client_cert'] = conf['client_cert'] if 'client_key' in conf: parsed_conf['client_key'] = conf['client_key'] if 'es_url_prefix' in conf: parsed_conf['es_url_prefix'] = conf['es_url_prefix'] return parsed_conf def pytzfy(dt): # apscheduler requires pytz timezone objects # This function will replace a dateutil.tz one with a pytz one if dt.tzinfo is not None: new_tz = pytz.timezone(dt.tzinfo.tzname('Y is this even required??')) return dt.replace(tzinfo=new_tz) return dt def parse_duration(value): """Convert ``unit=num`` spec into a ``timedelta`` object.""" unit, num = value.split('=') return datetime.timedelta(**{unit: int(num)}) def parse_deadline(value): """Convert ``unit=num`` spec into a ``datetime`` object.""" duration = parse_duration(value) return ts_now() + duration def flatten_dict(dct, delim='.', prefix=''): ret = {} for key, val in list(dct.items()): if type(val) == dict: ret.update(flatten_dict(val, prefix=prefix + key + delim)) else: ret[prefix + key] = val return ret def resolve_string(string, match, missing_text=''): """ Given a python string that may contain references to fields on the match dictionary, the strings are replaced using the corresponding values. However, if the referenced field is not found on the dictionary, it is replaced by a default string. Strings can be formatted using the old-style format ('%(field)s') or the new-style format ('{match[field]}'). :param string: A string that may contain references to values of the 'match' dictionary. :param match: A dictionary with the values to replace where referenced by keys in the string. :param missing_text: The default text to replace a formatter with if the field doesnt exist. """ flat_match = flatten_dict(match) flat_match.update(match) dd_match = collections.defaultdict(lambda: missing_text, flat_match) dd_match['_missing_value'] = missing_text while True: try: string = string % dd_match string = string.format(**dd_match) break except KeyError as e: if '{%s}' % str(e).strip("'") not in string: break string = string.replace('{%s}' % str(e).strip("'"), '{_missing_value}') return string def should_scrolling_continue(rule_conf): """ Tells about a rule config if it can scroll still or should stop the scrolling. :param: rule_conf as dict :rtype: bool """ max_scrolling = rule_conf.get('max_scrolling_count') stop_the_scroll = 0 < max_scrolling <= rule_conf.get('scrolling_cycle') return not stop_the_scroll elastalert-0.2.4/elastalert/zabbix.py000066400000000000000000000061531364615736500176730ustar00rootroot00000000000000from alerts import Alerter # , BasicMatchString import logging from pyzabbix.api import ZabbixAPI from pyzabbix import ZabbixSender, ZabbixMetric from datetime import datetime class ZabbixClient(ZabbixAPI): def __init__(self, url='http://localhost', use_authenticate=False, user='Admin', password='zabbix', sender_host='localhost', sender_port=10051): self.url = url self.use_authenticate = use_authenticate self.sender_host = sender_host self.sender_port = sender_port self.metrics_chunk_size = 200 self.aggregated_metrics = [] self.logger = logging.getLogger(self.__class__.__name__) super(ZabbixClient, self).__init__(url=self.url, use_authenticate=self.use_authenticate, user=user, password=password) def send_metric(self, hostname, key, data): zm = ZabbixMetric(hostname, key, data) if self.send_aggregated_metrics: self.aggregated_metrics.append(zm) if len(self.aggregated_metrics) > self.metrics_chunk_size: self.logger.info("Sending: %s metrics" % (len(self.aggregated_metrics))) try: ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port).send(self.aggregated_metrics) self.aggregated_metrics = [] except Exception as e: self.logger.exception(e) pass else: try: ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port).send(zm) except Exception as e: self.logger.exception(e) pass class ZabbixAlerter(Alerter): # By setting required_options to a set of strings # You can ensure that the rule config file specifies all # of the options. Otherwise, ElastAlert will throw an exception # when trying to load the rule. required_options = frozenset(['zbx_sender_host', 'zbx_sender_port', 'zbx_host', 'zbx_key']) def __init__(self, *args): super(ZabbixAlerter, self).__init__(*args) self.zbx_sender_host = self.rule.get('zbx_sender_host', 'localhost') self.zbx_sender_port = self.rule.get('zbx_sender_port', 10051) self.zbx_host = self.rule.get('zbx_host') self.zbx_key = self.rule.get('zbx_key') # Alert is called def alert(self, matches): # Matches is a list of match dictionaries. # It contains more than one match when the alert has # the aggregation option set zm = [] for match in matches: ts_epoch = int(datetime.strptime(match['@timestamp'], "%Y-%m-%dT%H:%M:%S.%fZ").strftime('%s')) zm.append(ZabbixMetric(host=self.zbx_host, key=self.zbx_key, value=1, clock=ts_epoch)) ZabbixSender(zabbix_server=self.zbx_sender_host, zabbix_port=self.zbx_sender_port).send(zm) # get_info is called after an alert is sent to get data that is written back # to Elasticsearch in the field "alert_info" # It should return a dict of information relevant to what the alert does def get_info(self): return {'type': 'zabbix Alerter'} elastalert-0.2.4/example_rules/000077500000000000000000000000001364615736500165425ustar00rootroot00000000000000elastalert-0.2.4/example_rules/example_cardinality.yaml000077500000000000000000000026551364615736500234570ustar00rootroot00000000000000# Alert when the rate of events exceeds a threshold # (Optional) # Elasticsearch host # es_host: elasticsearch.example.com # (Optional) # Elasticsearch port # es_port: 14900 # (Required) # Index to search, wildcard supported index: logstash-* # (OptionaL) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: Example cardinality rule # (Required) # Type of alert. # the frequency rule type alerts when num_events events occur with timeframe time type: cardinality # (Required, cardinality specific) # Count the number of unique values for this field cardinality_field: "Hostname" # (Required, frequency specific) # Alert when there less than 15 unique hostnames min_cardinality: 15 # (Required, frequency specific) # The cardinality is defined as the number of unique values for the most recent 4 hours timeframe: hours: 4 # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - term: status: "active" # (Required) # The alert is use when a match is found alert: - "email" # (required, email specific) # a list of email addresses to send alerts to email: - "elastalert@example.com" elastalert-0.2.4/example_rules/example_change.yaml000077500000000000000000000036301364615736500223730ustar00rootroot00000000000000# Alert when some field changes between documents # This rule would alert on documents similar to the following: # {'username': 'bob', 'country_name': 'USA', '@timestamp': '2014-10-15T00:00:00'} # {'username': 'bob', 'country_name': 'Russia', '@timestamp': '2014-10-15T05:00:00'} # Because the user (query_key) bob logged in from different countries (compare_key) in the same day (timeframe) # (Optional) # Elasticsearch host # es_host: elasticsearch.example.com # (Optional) # Elasticsearch port # es_port: 14900 # (Optional) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: New country login # (Required) # Type of alert. # the change rule will alert when a certain field changes in two documents within a timeframe type: change # (Required) # Index to search, wildcard supported index: logstash-* # (Required, change specific) # The field to look for changes in compare_key: country_name # (Required, change specific) # Ignore documents without the compare_key (country_name) field ignore_null: true # (Required, change specific) # The change must occur in two documents with the same query_key query_key: username # (Required, change specific) # The value of compare_key must change in two events that are less than timeframe apart to trigger an alert timeframe: days: 1 # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - query: query_string: query: "document_type: login" # (Required) # The alert is use when a match is found alert: - "email" # (required, email specific) # a list of email addresses to send alerts to email: - "elastalert@example.com" elastalert-0.2.4/example_rules/example_frequency.yaml000077500000000000000000000025021364615736500231440ustar00rootroot00000000000000# Alert when the rate of events exceeds a threshold # (Optional) # Elasticsearch host # es_host: elasticsearch.example.com # (Optional) # Elasticsearch port # es_port: 14900 # (OptionaL) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: Example frequency rule # (Required) # Type of alert. # the frequency rule type alerts when num_events events occur with timeframe time type: frequency # (Required) # Index to search, wildcard supported index: logstash-* # (Required, frequency specific) # Alert when this many documents matching the query occur within a timeframe num_events: 50 # (Required, frequency specific) # num_events must occur within this amount of time to trigger an alert timeframe: hours: 4 # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - term: some_field: "some_value" # (Required) # The alert is use when a match is found alert: - "email" # (required, email specific) # a list of email addresses to send alerts to email: - "elastalert@example.com" elastalert-0.2.4/example_rules/example_new_term.yaml000077500000000000000000000032031364615736500227620ustar00rootroot00000000000000# Alert when a login event is detected for user "admin" never before seen IP # In this example, "login" logs contain which user has logged in from what IP # (Optional) # Elasticsearch host # es_host: elasticsearch.example.com # (Optional) # Elasticsearch port # es_port: 14900 # (OptionaL) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: Example new term rule # (Required) # Type of alert. # the frequency rule type alerts when num_events events occur with timeframe time type: new_term # (Required) # Index to search, wildcard supported index: logstash-* # (Required, new_term specific) # Monitor the field ip_address fields: - "ip_address" # (Optional, new_term specific) # This means that we will query 90 days worth of data when ElastAlert starts to find which values of ip_address already exist # If they existed in the last 90 days, no alerts will be triggered for them when they appear terms_window_size: days: 90 # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html # We are filtering for only "login_event" type documents with username "admin" filter: - term: _type: "login_event" - term: username: admin # (Required) # The alert is use when a match is found alert: - "email" # (required, email specific) # a list of email addresses to send alerts to email: - "elastalert@example.com" elastalert-0.2.4/example_rules/example_opsgenie_frequency.yaml000077500000000000000000000036001364615736500250350ustar00rootroot00000000000000# Alert when the rate of events exceeds a threshold # (Optional) # Elasticsearch host #es_host: localhost # (Optional) # Elasticsearch port #es_port: 9200 # (Required) # OpsGenie credentials opsgenie_key: ogkey # (Optional) # OpsGenie user account that the alert will show as created by #opsgenie_account: neh # (Optional) # OpsGenie recipients of the alert #opsgenie_recipients: # - "neh" # (Optional) # OpsGenie recipients with args # opsgenie_recipients: # - {recipient} # opsgenie_recipients_args: # team_prefix:'user.email' # (Optional) # OpsGenie teams to notify #opsgenie_teams: # - "Infrastructure" # (Optional) # OpsGenie teams with args # opsgenie_teams: # - {team_prefix}-Team # opsgenie_teams_args: # team_prefix:'team' # (Optional) # OpsGenie alert tags opsgenie_tags: - "Production" # (OptionaL) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: opsgenie_rule # (Required) # Type of alert. # the frequency rule type alerts when num_events events occur with timeframe time type: frequency # (Required) # Index to search, wildcard supported index: logstash-* #doc_type: "golog" # (Required, frequency specific) # Alert when this many documents matching the query occur within a timeframe num_events: 50 # (Required, frequency specific) # num_events must occur within this amount of time to trigger an alert timeframe: hours: 2 # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - query: query_string: query: "@message: *hihi*" # (Required) # The alert is use when a match is found alert: - "opsgenie" elastalert-0.2.4/example_rules/example_percentage_match.yaml000066400000000000000000000011321364615736500244270ustar00rootroot00000000000000name: Example Percentage Match type: percentage_match #es_host: localhost #es_port: 9200 index: logstash-http-request-* description: "95% of all http requests should be successful" filter: - term: _type: http_request buffer_time: minutes: 5 query_key: Hostname.keyword doc_type: http_request match_bucket_filter: - terms: ResponseStatus: [200] min_percentage: 95 #max_percentage: 60 #bucket_interval: # minutes: 1 #sync_bucket_interval: true #allow_buffer_time_overlap: true #use_run_every_query_size: true # (Required) # The alert is use when a match is found alert: - "debug" elastalert-0.2.4/example_rules/example_single_metric_agg.yaml000066400000000000000000000010141364615736500245770ustar00rootroot00000000000000name: Metricbeat CPU Spike Rule type: metric_aggregation #es_host: localhost #es_port: 9200 index: metricbeat-* buffer_time: hours: 1 metric_agg_key: system.cpu.user.pct metric_agg_type: avg query_key: beat.hostname doc_type: metricsets bucket_interval: minutes: 5 sync_bucket_interval: true #allow_buffer_time_overlap: true #use_run_every_query_size: true min_threshold: 0.1 max_threshold: 0.8 filter: - term: metricset.name: cpu # (Required) # The alert is use when a match is found alert: - "debug" elastalert-0.2.4/example_rules/example_spike.yaml000077500000000000000000000040261364615736500222610ustar00rootroot00000000000000# Alert when there is a sudden spike in the volume of events # (Optional) # Elasticsearch host # es_host: elasticsearch.example.com # (Optional) # Elasticsearch port # es_port: 14900 # (Optional) Connect with SSL to Elasticsearch #use_ssl: True # (Optional) basic-auth username and password for Elasticsearch #es_username: someusername #es_password: somepassword # (Required) # Rule name, must be unique name: Event spike # (Required) # Type of alert. # the spike rule type compares the number of events within two sliding windows to each other type: spike # (Required) # Index to search, wildcard supported index: logstash-* # (Required one of _cur or _ref, spike specific) # The minimum number of events that will trigger an alert # For example, if there are only 2 events between 12:00 and 2:00, and 20 between 2:00 and 4:00 # _ref is 2 and _cur is 20, and the alert WILL fire because 20 is greater than threshold_cur and (_ref * spike_height) threshold_cur: 5 #threshold_ref: 5 # (Required, spike specific) # The size of the window used to determine average event frequency # We use two sliding windows each of size timeframe # To measure the 'reference' rate and the current rate timeframe: hours: 2 # (Required, spike specific) # The spike rule matches when the current window contains spike_height times more # events than the reference window spike_height: 3 # (Required, spike specific) # The direction of the spike # 'up' matches only spikes, 'down' matches only troughs # 'both' matches both spikes and troughs spike_type: "up" # (Required) # A list of Elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - query: query_string: query: "field: value" - type: value: "some_doc_type" # (Required) # The alert is use when a match is found alert: - "email" # (required, email specific) # a list of email addresses to send alerts to email: - "elastalert@example.com" elastalert-0.2.4/example_rules/example_spike_single_metric_agg.yaml000066400000000000000000000027061364615736500260030ustar00rootroot00000000000000name: Metricbeat Average CPU Spike Rule type: spike_aggregation #es_host: localhost #es_port: 9200 index: metricbeat-* timeframe: hours: 4 buffer_time: hours: 1 metric_agg_key: system.cpu.user.pct metric_agg_type: avg query_key: beat.hostname doc_type: metricsets #allow_buffer_time_overlap: true #use_run_every_query_size: true # (Required one of _cur or _ref, spike specific) # The minimum value of the aggregation that will trigger the alert # For example, if we're tracking the average for a metric whose average is 0.4 between 12:00 and 2:00 # and 0.95 between 2:00 and 4:00 with spike_height set to 2 and threshhold_cur set to 0.9: # _ref is 0.4 and _cur is 0.95, and the alert WILL fire # because 0.95 is greater than threshold_cur (0.9) and (_ref * spike_height (.4 * 2)) threshold_cur: 0.9 # (Optional, min_doc_count) # for rules using a per-term aggregation via query_key, the minimum number of events # over the past buffer_time needed to update the spike tracker min_doc_count: 5 # (Required, spike specific) # The spike aggregation rule matches when the current window contains spike_height times higher aggregated value # than the reference window spike_height: 2 # (Required, spike specific) # The direction of the spike # 'up' matches only spikes, 'down' matches only troughs # 'both' matches both spikes and troughs spike_type: "up" filter: - term: metricset.name: cpu # (Required) # The alert is use when a match is found alert: - "debug" elastalert-0.2.4/example_rules/jira_acct.txt000077500000000000000000000003421364615736500212240ustar00rootroot00000000000000# Example jira_account information file # You should make sure that this file is not globally readable or version controlled! (Except for this example) # Jira username user: elastalert-jira # Jira password password: p455w0rd elastalert-0.2.4/example_rules/ssh-repeat-offender.yaml000066400000000000000000000030061364615736500232660ustar00rootroot00000000000000# Rule name, must be unique name: SSH abuse - reapeat offender # Alert on x events in y seconds type: frequency # Alert when this many documents matching the query occur within a timeframe num_events: 2 # num_events must occur within this amount of time to trigger an alert timeframe: weeks: 1 # A list of elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - term: rule_name: "SSH abuse" index: elastalert # When the attacker continues, send a new alert after x minutes realert: weeks: 4 query_key: - match_body.source.ip include: - match_body.host.hostname - match_body.user.name - match_body.source.ip alert_subject: "SSH abuse (repeat offender) on <{}> | <{}|Show Dashboard>" alert_subject_args: - match_body.host.hostname - kibana_link alert_text: |- An reapeat offender has been active on {}. IP: {} User: {} alert_text_args: - match_body.host.hostname - match_body.user.name - match_body.source.ip # The alert is use when a match is found alert: - slack slack_webhook_url: "https://hooks.slack.com/services/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" slack_username_override: "ElastAlert" # Alert body only cointains a title and text alert_text_type: alert_text_only # Link to BitSensor Kibana Dashboard use_kibana4_dashboard: "https://dev.securely.ai/app/kibana#/dashboard/37739d80-a95c-11e9-b5ba-33a34ca252fb" elastalert-0.2.4/example_rules/ssh.yaml000066400000000000000000000027671364615736500202370ustar00rootroot00000000000000# Rule name, must be unique name: SSH abuse (ElastAlert 3.0.1) - 2 # Alert on x events in y seconds type: frequency # Alert when this many documents matching the query occur within a timeframe num_events: 20 # num_events must occur within this amount of time to trigger an alert timeframe: minutes: 60 # A list of elasticsearch filters used for find events # These filters are joined with AND and nested in a filtered query # For more info: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html filter: - query: query_string: query: "event.type:authentication_failure" index: auditbeat-* # When the attacker continues, send a new alert after x minutes realert: minutes: 1 query_key: - source.ip include: - host.hostname - user.name - source.ip include_match_in_root: true alert_subject: "SSH abuse on <{}> | <{}|Show Dashboard>" alert_subject_args: - host.hostname - kibana_link alert_text: |- An attack on {} is detected. The attacker looks like: User: {} IP: {} alert_text_args: - host.hostname - user.name - source.ip # The alert is use when a match is found alert: - debug slack_webhook_url: "https://hooks.slack.com/services/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" slack_username_override: "ElastAlert" # Alert body only cointains a title and text alert_text_type: alert_text_only # Link to BitSensor Kibana Dashboard use_kibana4_dashboard: "https://dev.securely.ai/app/kibana#/dashboard/37739d80-a95c-11e9-b5ba-33a34ca252fb" elastalert-0.2.4/pytest.ini000066400000000000000000000001121364615736500157200ustar00rootroot00000000000000[pytest] markers = elasticsearch: mark a test as using elasticsearch. elastalert-0.2.4/requirements-dev.txt000066400000000000000000000001621364615736500177340ustar00rootroot00000000000000-r requirements.txt coverage==4.5.4 flake8 pre-commit pylint<1.4 pytest<3.3.0 setuptools sphinx_rtd_theme tox<2.0 elastalert-0.2.4/requirements.txt000066400000000000000000000006321364615736500171620ustar00rootroot00000000000000apscheduler>=3.3.0 aws-requests-auth>=0.3.0 blist>=1.3.6 boto3>=1.4.4 cffi>=1.11.5 configparser>=3.5.0 croniter>=0.3.16 elasticsearch>=7.0.0 envparse>=0.2.0 exotel>=0.1.3 jira>=1.0.10,<1.0.15 jsonschema>=3.0.2 mock>=2.0.0 prison>=0.1.2 py-zabbix==1.1.3 PyStaticConfiguration>=0.10.3 python-dateutil>=2.6.0,<2.7.0 python-magic>=0.4.15 PyYAML>=5.1 requests>=2.0.0 stomp.py>=4.1.17 texttable>=0.8.8 twilio==6.0.0 elastalert-0.2.4/setup.cfg000066400000000000000000000001441364615736500155150ustar00rootroot00000000000000[flake8] exclude = .git,__pycache__,.tox,docs,virtualenv_run,modules,venv,env max-line-length = 140 elastalert-0.2.4/setup.py000066400000000000000000000032331364615736500154100ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os from setuptools import find_packages from setuptools import setup base_dir = os.path.dirname(__file__) setup( name='elastalert', version='0.2.4', description='Runs custom filters on Elasticsearch and alerts on matches', author='Quentin Long', author_email='qlo@yelp.com', setup_requires='setuptools', license='Copyright 2014 Yelp', classifiers=[ 'Programming Language :: Python :: 3.6', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', ], entry_points={ 'console_scripts': ['elastalert-create-index=elastalert.create_index:main', 'elastalert-test-rule=elastalert.test_rule:main', 'elastalert-rule-from-kibana=elastalert.rule_from_kibana:main', 'elastalert=elastalert.elastalert:main']}, packages=find_packages(), package_data={'elastalert': ['schema.yaml', 'es_mappings/**/*.json']}, install_requires=[ 'apscheduler>=3.3.0', 'aws-requests-auth>=0.3.0', 'blist>=1.3.6', 'boto3>=1.4.4', 'configparser>=3.5.0', 'croniter>=0.3.16', 'elasticsearch==7.0.0', 'envparse>=0.2.0', 'exotel>=0.1.3', 'jira>=2.0.0', 'jsonschema>=3.0.2', 'mock>=2.0.0', 'prison>=0.1.2', 'PyStaticConfiguration>=0.10.3', 'python-dateutil>=2.6.0,<2.7.0', 'PyYAML>=3.12', 'requests>=2.10.0', 'stomp.py>=4.1.17', 'texttable>=0.8.8', 'twilio>=6.0.0,<6.1', 'python-magic>=0.4.15', 'cffi>=1.11.5' ] ) elastalert-0.2.4/supervisord.conf.example000066400000000000000000000014141364615736500205630ustar00rootroot00000000000000[unix_http_server] file=/var/run/elastalert_supervisor.sock [supervisord] logfile=/var/log/elastalert_supervisord.log logfile_maxbytes=1MB logfile_backups=2 loglevel=debug nodaemon=false directory=%(here)s [rpcinterface:supervisor] supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface [supervisorctl] serverurl=unix:///var/run/elastalert_supervisor.sock [program:elastalert] # running globally command = python elastalert.py --verbose # (alternative) using virtualenv # command=/path/to/venv/bin/elastalert --config /path/to/config.yaml --verbose process_name=elastalert autorestart=true startsecs=15 stopsignal=INT stopasgroup=true killasgroup=true stderr_logfile=/var/log/elastalert_stderr.log stderr_logfile_maxbytes=5MB elastalert-0.2.4/tests/000077500000000000000000000000001364615736500150375ustar00rootroot00000000000000elastalert-0.2.4/tests/__init__.py000066400000000000000000000000001364615736500171360ustar00rootroot00000000000000elastalert-0.2.4/tests/alerts_test.py000066400000000000000000002740641364615736500177570ustar00rootroot00000000000000# -*- coding: utf-8 -*- import base64 import datetime import json import subprocess import mock import pytest from jira.exceptions import JIRAError from elastalert.alerts import AlertaAlerter from elastalert.alerts import Alerter from elastalert.alerts import BasicMatchString from elastalert.alerts import CommandAlerter from elastalert.alerts import EmailAlerter from elastalert.alerts import HipChatAlerter from elastalert.alerts import HTTPPostAlerter from elastalert.alerts import JiraAlerter from elastalert.alerts import JiraFormattedMatchString from elastalert.alerts import MsTeamsAlerter from elastalert.alerts import PagerDutyAlerter from elastalert.alerts import SlackAlerter from elastalert.alerts import StrideAlerter from elastalert.loaders import FileRulesLoader from elastalert.opsgenie import OpsGenieAlerter from elastalert.util import ts_add from elastalert.util import ts_now class mock_rule: def get_match_str(self, event): return str(event) def test_basic_match_string(ea): ea.rules[0]['top_count_keys'] = ['username'] match = {'@timestamp': '1918-01-17', 'field': 'value', 'top_events_username': {'bob': 10, 'mallory': 5}} alert_text = str(BasicMatchString(ea.rules[0], match)) assert 'anytest' in alert_text assert 'some stuff happened' in alert_text assert 'username' in alert_text assert 'bob: 10' in alert_text assert 'field: value' in alert_text # Non serializable objects don't cause errors match['non-serializable'] = {open: 10} alert_text = str(BasicMatchString(ea.rules[0], match)) # unicode objects dont cause errors match['snowman'] = '☃' alert_text = str(BasicMatchString(ea.rules[0], match)) # Pretty printed objects match.pop('non-serializable') match['object'] = {'this': {'that': [1, 2, "3"]}} alert_text = str(BasicMatchString(ea.rules[0], match)) assert '"this": {\n "that": [\n 1,\n 2,\n "3"\n ]\n }' in alert_text ea.rules[0]['alert_text'] = 'custom text' alert_text = str(BasicMatchString(ea.rules[0], match)) assert 'custom text' in alert_text assert 'anytest' not in alert_text ea.rules[0]['alert_text_type'] = 'alert_text_only' alert_text = str(BasicMatchString(ea.rules[0], match)) assert 'custom text' in alert_text assert 'some stuff happened' not in alert_text assert 'username' not in alert_text assert 'field: value' not in alert_text ea.rules[0]['alert_text_type'] = 'exclude_fields' alert_text = str(BasicMatchString(ea.rules[0], match)) assert 'custom text' in alert_text assert 'some stuff happened' in alert_text assert 'username' in alert_text assert 'field: value' not in alert_text def test_jira_formatted_match_string(ea): match = {'foo': {'bar': ['one', 2, 'three']}, 'top_events_poof': 'phew'} alert_text = str(JiraFormattedMatchString(ea.rules[0], match)) tab = 4 * ' ' expected_alert_text_snippet = '{code}{\n' \ + tab + '"foo": {\n' \ + 2 * tab + '"bar": [\n' \ + 3 * tab + '"one",\n' \ + 3 * tab + '2,\n' \ + 3 * tab + '"three"\n' \ + 2 * tab + ']\n' \ + tab + '}\n' \ + '}{code}' assert expected_alert_text_snippet in alert_text def test_email(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'owner': 'owner_value', 'alert_subject': 'Test alert for {0}, owned by {1}', 'alert_subject_args': ['test_term', 'owner'], 'snowman': '☃'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'From: testfrom@test.test' in body assert 'Subject: Test alert for test_value, owned by owner_value' in body def test_email_from_field(): rule = {'name': 'test alert', 'email': ['testing@test.test'], 'email_add_domain': 'example.com', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_from_field': 'data.user', 'owner': 'owner_value'} # Found, without @ with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'data': {'user': 'qlo'}}]) assert mock_smtp.mock_calls[4][1][1] == ['qlo@example.com'] # Found, with @ rule['email_add_domain'] = '@example.com' with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'data': {'user': 'qlo'}}]) assert mock_smtp.mock_calls[4][1][1] == ['qlo@example.com'] # Found, list with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'data': {'user': ['qlo', 'foo']}}]) assert mock_smtp.mock_calls[4][1][1] == ['qlo@example.com', 'foo@example.com'] # Not found with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'data': {'foo': 'qlo'}}]) assert mock_smtp.mock_calls[4][1][1] == ['testing@test.test'] # Found, wrong type with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'data': {'user': 17}}]) assert mock_smtp.mock_calls[4][1][1] == ['testing@test.test'] def test_email_with_unicode_strings(): rule = {'name': 'test alert', 'email': 'testing@test.test', 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'owner': 'owner_value', 'alert_subject': 'Test alert for {0}, owned by {1}', 'alert_subject_args': ['test_term', 'owner'], 'snowman': '☃'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail(mock.ANY, ['testing@test.test'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'From: testfrom@test.test' in body assert 'Subject: Test alert for test_value, owned by owner_value' in body def test_email_with_auth(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'alert_subject': 'Test alert for {0}', 'alert_subject_args': ['test_term'], 'smtp_auth_file': 'file.txt', 'rule_file': '/tmp/foo.yaml'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: with mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'someone', 'password': 'hunter2'} mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().login('someone', 'hunter2'), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected def test_email_with_cert_key(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'alert_subject': 'Test alert for {0}', 'alert_subject_args': ['test_term'], 'smtp_auth_file': 'file.txt', 'smtp_cert_file': 'dummy/cert.crt', 'smtp_key_file': 'dummy/client.key', 'rule_file': '/tmp/foo.yaml'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: with mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'someone', 'password': 'hunter2'} mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile='dummy/cert.crt', keyfile='dummy/client.key'), mock.call().login('someone', 'hunter2'), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected def test_email_with_cc(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'cc': 'tester@testing.testing'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test', 'tester@testing.testing'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'CC: tester@testing.testing' in body assert 'From: testfrom@test.test' in body def test_email_with_bcc(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'bcc': 'tester@testing.testing'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test', 'tester@testing.testing'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'CC: tester@testing.testing' not in body assert 'From: testfrom@test.test' in body def test_email_with_cc_and_bcc(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'cc': ['test1@test.com', 'test2@test.com'], 'bcc': 'tester@testing.testing'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value'}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail( mock.ANY, [ 'testing@test.test', 'test@test.test', 'test1@test.com', 'test2@test.com', 'tester@testing.testing' ], mock.ANY ), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'CC: test1@test.com,test2@test.com' in body assert 'From: testfrom@test.test' in body def test_email_with_args(): rule = { 'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'from_addr': 'testfrom@test.test', 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'alert_subject': 'Test alert for {0} {1}', 'alert_subject_args': ['test_term', 'test.term'], 'alert_text': 'Test alert for {0} and {1} {2}', 'alert_text_args': ['test_arg1', 'test_arg2', 'test.arg3'], 'alert_missing_value': '' } with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value', 'test_arg1': 'testing', 'test': {'term': ':)', 'arg3': '☃'}}]) expected = [mock.call('localhost'), mock.call().ehlo(), mock.call().has_extn('STARTTLS'), mock.call().starttls(certfile=None, keyfile=None), mock.call().sendmail(mock.ANY, ['testing@test.test', 'test@test.test'], mock.ANY), mock.call().quit()] assert mock_smtp.mock_calls == expected body = mock_smtp.mock_calls[4][1][2] # Extract the MIME encoded message body body_text = base64.b64decode(body.split('\n\n')[-1][:-1]).decode('utf-8') assert 'testing' in body_text assert '' in body_text assert '☃' in body_text assert 'Reply-To: test@example.com' in body assert 'To: testing@test.test' in body assert 'From: testfrom@test.test' in body assert 'Subject: Test alert for test_value :)' in body def test_email_query_key_in_subject(): rule = {'name': 'test alert', 'email': ['testing@test.test', 'test@test.test'], 'type': mock_rule(), 'timestamp_field': '@timestamp', 'email_reply_to': 'test@example.com', 'query_key': 'username'} with mock.patch('elastalert.alerts.SMTP') as mock_smtp: mock_smtp.return_value = mock.Mock() alert = EmailAlerter(rule) alert.alert([{'test_term': 'test_value', 'username': 'werbenjagermanjensen'}]) body = mock_smtp.mock_calls[4][1][2] lines = body.split('\n') found_subject = False for line in lines: if line.startswith('Subject'): assert 'werbenjagermanjensen' in line found_subject = True assert found_subject def test_opsgenie_basic(): rule = {'name': 'testOGalert', 'opsgenie_key': 'ogkey', 'opsgenie_account': 'genies', 'opsgenie_addr': 'https://api.opsgenie.com/v2/alerts', 'opsgenie_recipients': ['lytics'], 'type': mock_rule()} with mock.patch('requests.post') as mock_post: alert = OpsGenieAlerter(rule) alert.alert([{'@timestamp': '2014-10-31T00:00:00'}]) print(("mock_post: {0}".format(mock_post._mock_call_args_list))) mcal = mock_post._mock_call_args_list print(('mcal: {0}'.format(mcal[0]))) assert mcal[0][0][0] == ('https://api.opsgenie.com/v2/alerts') assert mock_post.called assert mcal[0][1]['headers']['Authorization'] == 'GenieKey ogkey' assert mcal[0][1]['json']['source'] == 'ElastAlert' assert mcal[0][1]['json']['responders'] == [{'username': 'lytics', 'type': 'user'}] assert mcal[0][1]['json']['source'] == 'ElastAlert' def test_opsgenie_frequency(): rule = {'name': 'testOGalert', 'opsgenie_key': 'ogkey', 'opsgenie_account': 'genies', 'opsgenie_addr': 'https://api.opsgenie.com/v2/alerts', 'opsgenie_recipients': ['lytics'], 'type': mock_rule(), 'filter': [{'query': {'query_string': {'query': '*hihi*'}}}], 'alert': 'opsgenie'} with mock.patch('requests.post') as mock_post: alert = OpsGenieAlerter(rule) alert.alert([{'@timestamp': '2014-10-31T00:00:00'}]) assert alert.get_info()['recipients'] == rule['opsgenie_recipients'] print(("mock_post: {0}".format(mock_post._mock_call_args_list))) mcal = mock_post._mock_call_args_list print(('mcal: {0}'.format(mcal[0]))) assert mcal[0][0][0] == ('https://api.opsgenie.com/v2/alerts') assert mock_post.called assert mcal[0][1]['headers']['Authorization'] == 'GenieKey ogkey' assert mcal[0][1]['json']['source'] == 'ElastAlert' assert mcal[0][1]['json']['responders'] == [{'username': 'lytics', 'type': 'user'}] assert mcal[0][1]['json']['source'] == 'ElastAlert' assert mcal[0][1]['json']['source'] == 'ElastAlert' def test_opsgenie_alert_routing(): rule = {'name': 'testOGalert', 'opsgenie_key': 'ogkey', 'opsgenie_account': 'genies', 'opsgenie_addr': 'https://api.opsgenie.com/v2/alerts', 'opsgenie_recipients': ['{RECEIPIENT_PREFIX}'], 'opsgenie_recipients_args': {'RECEIPIENT_PREFIX': 'recipient'}, 'type': mock_rule(), 'filter': [{'query': {'query_string': {'query': '*hihi*'}}}], 'alert': 'opsgenie', 'opsgenie_teams': ['{TEAM_PREFIX}-Team'], 'opsgenie_teams_args': {'TEAM_PREFIX': 'team'}} with mock.patch('requests.post'): alert = OpsGenieAlerter(rule) alert.alert([{'@timestamp': '2014-10-31T00:00:00', 'team': "Test", 'recipient': "lytics"}]) assert alert.get_info()['teams'] == ['Test-Team'] assert alert.get_info()['recipients'] == ['lytics'] def test_opsgenie_default_alert_routing(): rule = {'name': 'testOGalert', 'opsgenie_key': 'ogkey', 'opsgenie_account': 'genies', 'opsgenie_addr': 'https://api.opsgenie.com/v2/alerts', 'opsgenie_recipients': ['{RECEIPIENT_PREFIX}'], 'opsgenie_recipients_args': {'RECEIPIENT_PREFIX': 'recipient'}, 'type': mock_rule(), 'filter': [{'query': {'query_string': {'query': '*hihi*'}}}], 'alert': 'opsgenie', 'opsgenie_teams': ['{TEAM_PREFIX}-Team'], 'opsgenie_default_receipients': ["devops@test.com"], 'opsgenie_default_teams': ["Test"] } with mock.patch('requests.post'): alert = OpsGenieAlerter(rule) alert.alert([{'@timestamp': '2014-10-31T00:00:00', 'team': "Test"}]) assert alert.get_info()['teams'] == ['{TEAM_PREFIX}-Team'] assert alert.get_info()['recipients'] == ['devops@test.com'] def test_opsgenie_details_with_constant_value(): rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': {'Foo': 'Bar'} } match = { '@timestamp': '2014-10-31T00:00:00' } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': {'Foo': 'Bar'}, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_opsgenie_details_with_field(): rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': {'Foo': {'field': 'message'}} } match = { 'message': 'Bar', '@timestamp': '2014-10-31T00:00:00' } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': {'Foo': 'Bar'}, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_opsgenie_details_with_nested_field(): rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': {'Foo': {'field': 'nested.field'}} } match = { 'nested': { 'field': 'Bar' }, '@timestamp': '2014-10-31T00:00:00' } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': {'Foo': 'Bar'}, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_opsgenie_details_with_non_string_field(): rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': { 'Age': {'field': 'age'}, 'Message': {'field': 'message'} } } match = { 'age': 10, 'message': { 'format': 'The cow goes %s!', 'arg0': 'moo' } } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': { 'Age': '10', 'Message': "{'format': 'The cow goes %s!', 'arg0': 'moo'}" }, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_opsgenie_details_with_missing_field(): rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': { 'Message': {'field': 'message'}, 'Missing': {'field': 'missing'} } } match = { 'message': 'Testing', '@timestamp': '2014-10-31T00:00:00' } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': {'Message': 'Testing'}, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_opsgenie_details_with_environment_variable_replacement(environ): environ.update({ 'TEST_VAR': 'Bar' }) rule = { 'name': 'Opsgenie Details', 'type': mock_rule(), 'opsgenie_account': 'genies', 'opsgenie_key': 'ogkey', 'opsgenie_details': {'Foo': '$TEST_VAR'} } match = { '@timestamp': '2014-10-31T00:00:00' } alert = OpsGenieAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( 'https://api.opsgenie.com/v2/alerts', headers={ 'Content-Type': 'application/json', 'Authorization': 'GenieKey ogkey' }, json=mock.ANY, proxies=None ) expected_json = { 'description': BasicMatchString(rule, match).__str__(), 'details': {'Foo': 'Bar'}, 'message': 'ElastAlert: Opsgenie Details', 'priority': None, 'source': 'ElastAlert', 'tags': ['ElastAlert', 'Opsgenie Details'], 'user': 'genies' } actual_json = mock_post_request.call_args_list[0][1]['json'] assert expected_json == actual_json def test_jira(): description_txt = "Description stuff goes here like a runbook link." rule = { 'name': 'test alert', 'jira_account_file': 'jirafile', 'type': mock_rule(), 'jira_project': 'testproject', 'jira_priority': 0, 'jira_issuetype': 'testtype', 'jira_server': 'jiraserver', 'jira_label': 'testlabel', 'jira_component': 'testcomponent', 'jira_description': description_txt, 'jira_watchers': ['testwatcher1', 'testwatcher2'], 'timestamp_field': '@timestamp', 'alert_subject': 'Issue {0} occurred at {1}', 'alert_subject_args': ['test_term', '@timestamp'], 'rule_file': '/tmp/foo.yaml' } mock_priority = mock.Mock(id='5') with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) expected = [ mock.call('jiraserver', basic_auth=('jirauser', 'jirapassword')), mock.call().priorities(), mock.call().fields(), mock.call().create_issue( issuetype={'name': 'testtype'}, priority={'id': '5'}, project={'key': 'testproject'}, labels=['testlabel'], components=[{'name': 'testcomponent'}], description=mock.ANY, summary='Issue test_value occurred at 2014-10-31T00:00:00', ), mock.call().add_watcher(mock.ANY, 'testwatcher1'), mock.call().add_watcher(mock.ANY, 'testwatcher2'), ] # We don't care about additional calls to mock_jira, such as __str__ assert mock_jira.mock_calls[:6] == expected assert mock_jira.mock_calls[3][2]['description'].startswith(description_txt) # Search called if jira_bump_tickets rule['jira_bump_tickets'] = True with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.return_value = [] mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) expected.insert(3, mock.call().search_issues(mock.ANY)) assert mock_jira.mock_calls == expected # Remove a field if jira_ignore_in_title set rule['jira_ignore_in_title'] = 'test_term' with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.return_value = [] mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert 'test_value' not in mock_jira.mock_calls[3][1][0] # Issue is still created if search_issues throws an exception with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.side_effect = JIRAError mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert mock_jira.mock_calls == expected # Only bump after 3d of inactivity rule['jira_bump_after_inactivity'] = 3 mock_issue = mock.Mock() # Check ticket is bumped if it is updated 4 days ago mock_issue.fields.updated = str(ts_now() - datetime.timedelta(days=4)) with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.return_value = [mock_issue] mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) # Check add_comment is called assert len(mock_jira.mock_calls) == 5 assert '().add_comment' == mock_jira.mock_calls[4][0] # Check ticket is bumped is not bumped if ticket is updated right now mock_issue.fields.updated = str(ts_now()) with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.return_value = [mock_issue] mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = [] alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) # Only 4 calls for mock_jira since add_comment is not called assert len(mock_jira.mock_calls) == 4 # Test match resolved values rule = { 'name': 'test alert', 'jira_account_file': 'jirafile', 'type': mock_rule(), 'owner': 'the_owner', 'jira_project': 'testproject', 'jira_issuetype': 'testtype', 'jira_server': 'jiraserver', 'jira_label': 'testlabel', 'jira_component': 'testcomponent', 'jira_description': "DESC", 'jira_watchers': ['testwatcher1', 'testwatcher2'], 'timestamp_field': '@timestamp', 'jira_affected_user': "#gmail.the_user", 'rule_file': '/tmp/foo.yaml' } mock_issue = mock.Mock() mock_issue.fields.updated = str(ts_now() - datetime.timedelta(days=4)) mock_fields = [ {'name': 'affected user', 'id': 'affected_user_id', 'schema': {'type': 'string'}} ] with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value = mock.Mock() mock_jira.return_value.search_issues.return_value = [mock_issue] mock_jira.return_value.fields.return_value = mock_fields mock_jira.return_value.priorities.return_value = [mock_priority] alert = JiraAlerter(rule) alert.alert([{'gmail.the_user': 'jdoe', '@timestamp': '2014-10-31T00:00:00'}]) assert mock_jira.mock_calls[4][2]['affected_user_id'] == "jdoe" def test_jira_arbitrary_field_support(): description_txt = "Description stuff goes here like a runbook link." rule = { 'name': 'test alert', 'jira_account_file': 'jirafile', 'type': mock_rule(), 'owner': 'the_owner', 'jira_project': 'testproject', 'jira_issuetype': 'testtype', 'jira_server': 'jiraserver', 'jira_label': 'testlabel', 'jira_component': 'testcomponent', 'jira_description': description_txt, 'jira_watchers': ['testwatcher1', 'testwatcher2'], 'jira_arbitrary_reference_string_field': '$owner$', 'jira_arbitrary_string_field': 'arbitrary_string_value', 'jira_arbitrary_string_array_field': ['arbitrary_string_value1', 'arbitrary_string_value2'], 'jira_arbitrary_string_array_field_provided_as_single_value': 'arbitrary_string_value_in_array_field', 'jira_arbitrary_number_field': 1, 'jira_arbitrary_number_array_field': [2, 3], 'jira_arbitrary_number_array_field_provided_as_single_value': 1, 'jira_arbitrary_complex_field': 'arbitrary_complex_value', 'jira_arbitrary_complex_array_field': ['arbitrary_complex_value1', 'arbitrary_complex_value2'], 'jira_arbitrary_complex_array_field_provided_as_single_value': 'arbitrary_complex_value_in_array_field', 'timestamp_field': '@timestamp', 'alert_subject': 'Issue {0} occurred at {1}', 'alert_subject_args': ['test_term', '@timestamp'], 'rule_file': '/tmp/foo.yaml' } mock_priority = mock.MagicMock(id='5') mock_fields = [ {'name': 'arbitrary reference string field', 'id': 'arbitrary_reference_string_field', 'schema': {'type': 'string'}}, {'name': 'arbitrary string field', 'id': 'arbitrary_string_field', 'schema': {'type': 'string'}}, {'name': 'arbitrary string array field', 'id': 'arbitrary_string_array_field', 'schema': {'type': 'array', 'items': 'string'}}, { 'name': 'arbitrary string array field provided as single value', 'id': 'arbitrary_string_array_field_provided_as_single_value', 'schema': {'type': 'array', 'items': 'string'} }, {'name': 'arbitrary number field', 'id': 'arbitrary_number_field', 'schema': {'type': 'number'}}, {'name': 'arbitrary number array field', 'id': 'arbitrary_number_array_field', 'schema': {'type': 'array', 'items': 'number'}}, { 'name': 'arbitrary number array field provided as single value', 'id': 'arbitrary_number_array_field_provided_as_single_value', 'schema': {'type': 'array', 'items': 'number'} }, {'name': 'arbitrary complex field', 'id': 'arbitrary_complex_field', 'schema': {'type': 'ArbitraryType'}}, { 'name': 'arbitrary complex array field', 'id': 'arbitrary_complex_array_field', 'schema': {'type': 'array', 'items': 'ArbitraryType'} }, { 'name': 'arbitrary complex array field provided as single value', 'id': 'arbitrary_complex_array_field_provided_as_single_value', 'schema': {'type': 'array', 'items': 'ArbitraryType'} }, ] with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = mock_fields alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) expected = [ mock.call('jiraserver', basic_auth=('jirauser', 'jirapassword')), mock.call().priorities(), mock.call().fields(), mock.call().create_issue( issuetype={'name': 'testtype'}, project={'key': 'testproject'}, labels=['testlabel'], components=[{'name': 'testcomponent'}], description=mock.ANY, summary='Issue test_value occurred at 2014-10-31T00:00:00', arbitrary_reference_string_field='the_owner', arbitrary_string_field='arbitrary_string_value', arbitrary_string_array_field=['arbitrary_string_value1', 'arbitrary_string_value2'], arbitrary_string_array_field_provided_as_single_value=['arbitrary_string_value_in_array_field'], arbitrary_number_field=1, arbitrary_number_array_field=[2, 3], arbitrary_number_array_field_provided_as_single_value=[1], arbitrary_complex_field={'name': 'arbitrary_complex_value'}, arbitrary_complex_array_field=[{'name': 'arbitrary_complex_value1'}, {'name': 'arbitrary_complex_value2'}], arbitrary_complex_array_field_provided_as_single_value=[{'name': 'arbitrary_complex_value_in_array_field'}], ), mock.call().add_watcher(mock.ANY, 'testwatcher1'), mock.call().add_watcher(mock.ANY, 'testwatcher2'), ] # We don't care about additional calls to mock_jira, such as __str__ assert mock_jira.mock_calls[:6] == expected assert mock_jira.mock_calls[3][2]['description'].startswith(description_txt) # Reference an arbitrary string field that is not defined on the JIRA server rule['jira_nonexistent_field'] = 'nonexistent field value' with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = mock_fields with pytest.raises(Exception) as exception: alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert "Could not find a definition for the jira field 'nonexistent field'" in str(exception) del rule['jira_nonexistent_field'] # Reference a watcher that does not exist rule['jira_watchers'] = 'invalid_watcher' with mock.patch('elastalert.alerts.JIRA') as mock_jira, \ mock.patch('elastalert.alerts.yaml_loader') as mock_open: mock_open.return_value = {'user': 'jirauser', 'password': 'jirapassword'} mock_jira.return_value.priorities.return_value = [mock_priority] mock_jira.return_value.fields.return_value = mock_fields # Cause add_watcher to raise, which most likely means that the user did not exist mock_jira.return_value.add_watcher.side_effect = Exception() with pytest.raises(Exception) as exception: alert = JiraAlerter(rule) alert.alert([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert "Exception encountered when trying to add 'invalid_watcher' as a watcher. Does the user exist?" in str(exception) def test_kibana(ea): rule = {'filter': [{'query': {'query_string': {'query': 'xy:z'}}}], 'name': 'Test rule!', 'es_host': 'test.testing', 'es_port': 12345, 'timeframe': datetime.timedelta(hours=1), 'index': 'logstash-test', 'include': ['@timestamp'], 'timestamp_field': '@timestamp'} match = {'@timestamp': '2014-10-10T00:00:00'} with mock.patch("elastalert.elastalert.elasticsearch_client") as mock_es: mock_create = mock.Mock(return_value={'_id': 'ABCDEFGH'}) mock_es_inst = mock.Mock() mock_es_inst.index = mock_create mock_es_inst.host = 'test.testing' mock_es_inst.port = 12345 mock_es.return_value = mock_es_inst link = ea.generate_kibana_db(rule, match) assert 'http://test.testing:12345/_plugin/kibana/#/dashboard/temp/ABCDEFGH' == link # Name and index dashboard = json.loads(mock_create.call_args_list[0][1]['body']['dashboard']) assert dashboard['index']['default'] == 'logstash-test' assert 'Test rule!' in dashboard['title'] # Filters and time range filters = dashboard['services']['filter']['list'] assert 'xy:z' in filters['1']['query'] assert filters['1']['type'] == 'querystring' time_range = filters['0'] assert time_range['from'] == ts_add(match['@timestamp'], -rule['timeframe']) assert time_range['to'] == ts_add(match['@timestamp'], datetime.timedelta(minutes=10)) # Included fields active in table assert dashboard['rows'][1]['panels'][0]['fields'] == ['@timestamp'] def test_command(): # Test command as list with a formatted arg rule = {'command': ['/bin/test/', '--arg', '%(somefield)s']} alert = CommandAlerter(rule) match = {'@timestamp': '2014-01-01T00:00:00', 'somefield': 'foobarbaz', 'nested': {'field': 1}} with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with(['/bin/test', '--arg', 'foobarbaz'], stdin=subprocess.PIPE, shell=False) # Test command as string with formatted arg (old-style string format) rule = {'command': '/bin/test/ --arg %(somefield)s'} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test --arg foobarbaz', stdin=subprocess.PIPE, shell=False) # Test command as string without formatted arg (old-style string format) rule = {'command': '/bin/test/foo.sh'} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test/foo.sh', stdin=subprocess.PIPE, shell=True) # Test command as string with formatted arg (new-style string format) rule = {'command': '/bin/test/ --arg {match[somefield]}', 'new_style_string_format': True} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test --arg foobarbaz', stdin=subprocess.PIPE, shell=False) rule = {'command': '/bin/test/ --arg {match[nested][field]}', 'new_style_string_format': True} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test --arg 1', stdin=subprocess.PIPE, shell=False) # Test command as string without formatted arg (new-style string format) rule = {'command': '/bin/test/foo.sh', 'new_style_string_format': True} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test/foo.sh', stdin=subprocess.PIPE, shell=True) rule = {'command': '/bin/test/foo.sh {{bar}}', 'new_style_string_format': True} alert = CommandAlerter(rule) with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: alert.alert([match]) assert mock_popen.called_with('/bin/test/foo.sh {bar}', stdin=subprocess.PIPE, shell=True) # Test command with pipe_match_json rule = {'command': ['/bin/test/', '--arg', '%(somefield)s'], 'pipe_match_json': True} alert = CommandAlerter(rule) match = {'@timestamp': '2014-01-01T00:00:00', 'somefield': 'foobarbaz'} with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: mock_subprocess = mock.Mock() mock_popen.return_value = mock_subprocess mock_subprocess.communicate.return_value = (None, None) alert.alert([match]) assert mock_popen.called_with(['/bin/test', '--arg', 'foobarbaz'], stdin=subprocess.PIPE, shell=False) assert mock_subprocess.communicate.called_with(input=json.dumps(match)) # Test command with fail_on_non_zero_exit rule = {'command': ['/bin/test/', '--arg', '%(somefield)s'], 'fail_on_non_zero_exit': True} alert = CommandAlerter(rule) match = {'@timestamp': '2014-01-01T00:00:00', 'somefield': 'foobarbaz'} with pytest.raises(Exception) as exception: with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: mock_subprocess = mock.Mock() mock_popen.return_value = mock_subprocess mock_subprocess.wait.return_value = 1 alert.alert([match]) assert mock_popen.called_with(['/bin/test', '--arg', 'foobarbaz'], stdin=subprocess.PIPE, shell=False) assert "Non-zero exit code while running command" in str(exception) def test_ms_teams(): rule = { 'name': 'Test Rule', 'type': 'any', 'ms_teams_webhook_url': 'http://test.webhook.url', 'ms_teams_alert_summary': 'Alert from ElastAlert', 'alert_subject': 'Cool subject', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = MsTeamsAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { '@type': 'MessageCard', '@context': 'http://schema.org/extensions', 'summary': rule['ms_teams_alert_summary'], 'title': rule['alert_subject'], 'text': BasicMatchString(rule, match).__str__() } mock_post_request.assert_called_once_with( rule['ms_teams_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_ms_teams_uses_color_and_fixed_width_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'ms_teams_webhook_url': 'http://test.webhook.url', 'ms_teams_alert_summary': 'Alert from ElastAlert', 'ms_teams_alert_fixed_width': True, 'ms_teams_theme_color': '#124578', 'alert_subject': 'Cool subject', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = MsTeamsAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = BasicMatchString(rule, match).__str__() body = body.replace('`', "'") body = "```{0}```".format('```\n\n```'.join(x for x in body.split('\n'))).replace('\n``````', '') expected_data = { '@type': 'MessageCard', '@context': 'http://schema.org/extensions', 'summary': rule['ms_teams_alert_summary'], 'title': rule['alert_subject'], 'themeColor': '#124578', 'text': body } mock_post_request.assert_called_once_with( rule['ms_teams_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_slack_uses_custom_title(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert_subject': 'Cool subject', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'channel': '', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['alert_subject'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_slack_uses_custom_timeout(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert_subject': 'Cool subject', 'alert': [], 'slack_timeout': 20 } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'channel': '', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['alert_subject'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=20 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_slack_uses_rule_name_when_custom_title_is_not_provided(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_webhook_url': ['http://please.dontgohere.slack'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'channel': '', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['name'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none', } mock_post_request.assert_called_once_with( rule['slack_webhook_url'][0], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_slack_uses_custom_slack_channel(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_webhook_url': ['http://please.dontgohere.slack'], 'slack_channel_override': '#test-alert', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'channel': '#test-alert', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['name'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none', } mock_post_request.assert_called_once_with( rule['slack_webhook_url'][0], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_slack_uses_list_of_custom_slack_channel(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_webhook_url': ['http://please.dontgohere.slack'], 'slack_channel_override': ['#test-alert', '#test-alert2'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data1 = { 'username': 'elastalert', 'channel': '#test-alert', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['name'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none' } expected_data2 = { 'username': 'elastalert', 'channel': '#test-alert2', 'icon_emoji': ':ghost:', 'attachments': [ { 'color': 'danger', 'title': rule['name'], 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'text': '', 'parse': 'none' } mock_post_request.assert_called_with( rule['slack_webhook_url'][0], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) assert expected_data1 == json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data2 == json.loads(mock_post_request.call_args_list[1][1]['data']) def test_slack_attach_kibana_discover_url_when_generated(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_attach_kibana_discover_url': True, 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'kibana_discover_url': 'http://kibana#discover' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'parse': 'none', 'text': '', 'attachments': [ { 'color': 'danger', 'title': 'Test Rule', 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] }, { 'color': '#ec4b98', 'title': 'Discover in Kibana', 'title_link': 'http://kibana#discover' } ], 'icon_emoji': ':ghost:', 'channel': '' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data def test_slack_attach_kibana_discover_url_when_not_generated(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_attach_kibana_discover_url': True, 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'parse': 'none', 'text': '', 'attachments': [ { 'color': 'danger', 'title': 'Test Rule', 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] } ], 'icon_emoji': ':ghost:', 'channel': '' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data def test_slack_kibana_discover_title(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_attach_kibana_discover_url': True, 'slack_kibana_discover_title': 'Click to discover in Kibana', 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'kibana_discover_url': 'http://kibana#discover' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'parse': 'none', 'text': '', 'attachments': [ { 'color': 'danger', 'title': 'Test Rule', 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] }, { 'color': '#ec4b98', 'title': 'Click to discover in Kibana', 'title_link': 'http://kibana#discover' } ], 'icon_emoji': ':ghost:', 'channel': '' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data def test_slack_kibana_discover_color(): rule = { 'name': 'Test Rule', 'type': 'any', 'slack_attach_kibana_discover_url': True, 'slack_kibana_discover_color': 'blue', 'slack_webhook_url': 'http://please.dontgohere.slack', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = SlackAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'kibana_discover_url': 'http://kibana#discover' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'username': 'elastalert', 'parse': 'none', 'text': '', 'attachments': [ { 'color': 'danger', 'title': 'Test Rule', 'text': BasicMatchString(rule, match).__str__(), 'mrkdwn_in': ['text', 'pretext'], 'fields': [] }, { 'color': 'blue', 'title': 'Discover in Kibana', 'title_link': 'http://kibana#discover' } ], 'icon_emoji': ':ghost:', 'channel': '' } mock_post_request.assert_called_once_with( rule['slack_webhook_url'], data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, verify=False, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data def test_http_alerter_with_payload(): rule = { 'name': 'Test HTTP Post Alerter With Payload', 'type': 'any', 'http_post_url': 'http://test.webhook.url', 'http_post_payload': {'posted_name': 'somefield'}, 'http_post_static_payload': {'name': 'somestaticname'}, 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = HTTPPostAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'posted_name': 'foobarbaz', 'name': 'somestaticname' } mock_post_request.assert_called_once_with( rule['http_post_url'], data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_http_alerter_with_payload_all_values(): rule = { 'name': 'Test HTTP Post Alerter With Payload', 'type': 'any', 'http_post_url': 'http://test.webhook.url', 'http_post_payload': {'posted_name': 'somefield'}, 'http_post_static_payload': {'name': 'somestaticname'}, 'http_post_all_values': True, 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = HTTPPostAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'posted_name': 'foobarbaz', 'name': 'somestaticname', '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } mock_post_request.assert_called_once_with( rule['http_post_url'], data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_http_alerter_without_payload(): rule = { 'name': 'Test HTTP Post Alerter Without Payload', 'type': 'any', 'http_post_url': 'http://test.webhook.url', 'http_post_static_payload': {'name': 'somestaticname'}, 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = HTTPPostAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz', 'name': 'somestaticname' } mock_post_request.assert_called_once_with( rule['http_post_url'], data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Test PD Rule', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': '', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with('https://events.pagerduty.com/generic/2010-04-15/create_event.json', data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_v2(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_api_version': 'v2', 'pagerduty_v2_payload_class': 'ping failure', 'pagerduty_v2_payload_component': 'mysql', 'pagerduty_v2_payload_group': 'app-stack', 'pagerduty_v2_payload_severity': 'error', 'pagerduty_v2_payload_source': 'mysql.host.name', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'payload': { 'class': 'ping failure', 'component': 'mysql', 'group': 'app-stack', 'severity': 'error', 'source': 'mysql.host.name', 'summary': 'Test PD Rule', 'custom_details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: foobarbaz\n' }, 'timestamp': '2017-01-01T00:00:00' }, 'event_action': 'trigger', 'dedup_key': '', 'routing_key': 'magicalbadgers', } mock_post_request.assert_called_once_with('https://events.pagerduty.com/v2/enqueue', data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_custom_incident_key(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_incident_key': 'custom key', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Test PD Rule', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': 'custom key', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with(alert.url, data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_custom_incident_key_with_args(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_incident_key': 'custom {0}', 'pagerduty_incident_key_args': ['somefield'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Test PD Rule', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': 'custom foobarbaz', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with(alert.url, data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_custom_alert_subject(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'alert_subject': 'Hungry kittens', 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_incident_key': 'custom {0}', 'pagerduty_incident_key_args': ['somefield'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Hungry kittens', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': 'custom foobarbaz', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with(alert.url, data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_custom_alert_subject_with_args(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'alert_subject': '{0} kittens', 'alert_subject_args': ['somefield'], 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_incident_key': 'custom {0}', 'pagerduty_incident_key_args': ['someotherfield'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'Stinky', 'someotherfield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Stinky kittens', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: Stinky\nsomeotherfield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': 'custom foobarbaz', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with(alert.url, data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_pagerduty_alerter_custom_alert_subject_with_args_specifying_trigger(): rule = { 'name': 'Test PD Rule', 'type': 'any', 'alert_subject': '{0} kittens', 'alert_subject_args': ['somefield'], 'pagerduty_service_key': 'magicalbadgers', 'pagerduty_event_type': 'trigger', 'pagerduty_client_name': 'ponies inc.', 'pagerduty_incident_key': 'custom {0}', 'pagerduty_incident_key_args': ['someotherfield'], 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = PagerDutyAlerter(rule) match = { '@timestamp': '2017-01-01T00:00:00', 'somefield': 'Stinkiest', 'someotherfield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { 'client': 'ponies inc.', 'description': 'Stinkiest kittens', 'details': { 'information': 'Test PD Rule\n\n@timestamp: 2017-01-01T00:00:00\nsomefield: Stinkiest\nsomeotherfield: foobarbaz\n' }, 'event_type': 'trigger', 'incident_key': 'custom foobarbaz', 'service_key': 'magicalbadgers', } mock_post_request.assert_called_once_with(alert.url, data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) def test_alert_text_kw(ea): rule = ea.rules[0].copy() rule['alert_text'] = '{field} at {time}' rule['alert_text_kw'] = { '@timestamp': 'time', 'field': 'field', } match = {'@timestamp': '1918-01-17', 'field': 'value'} alert_text = str(BasicMatchString(rule, match)) body = '{field} at {@timestamp}'.format(**match) assert body in alert_text def test_alert_text_global_substitution(ea): rule = ea.rules[0].copy() rule['owner'] = 'the owner from rule' rule['priority'] = 'priority from rule' rule['abc'] = 'abc from rule' rule['alert_text'] = 'Priority: {0}; Owner: {1}; Abc: {2}' rule['alert_text_args'] = ['priority', 'owner', 'abc'] match = { '@timestamp': '2016-01-01', 'field': 'field_value', 'abc': 'abc from match', } alert_text = str(BasicMatchString(rule, match)) assert 'Priority: priority from rule' in alert_text assert 'Owner: the owner from rule' in alert_text # When the key exists in both places, it will come from the match assert 'Abc: abc from match' in alert_text def test_alert_text_kw_global_substitution(ea): rule = ea.rules[0].copy() rule['foo_rule'] = 'foo from rule' rule['owner'] = 'the owner from rule' rule['abc'] = 'abc from rule' rule['alert_text'] = 'Owner: {owner}; Foo: {foo}; Abc: {abc}' rule['alert_text_kw'] = { 'owner': 'owner', 'foo_rule': 'foo', 'abc': 'abc', } match = { '@timestamp': '2016-01-01', 'field': 'field_value', 'abc': 'abc from match', } alert_text = str(BasicMatchString(rule, match)) assert 'Owner: the owner from rule' in alert_text assert 'Foo: foo from rule' in alert_text # When the key exists in both places, it will come from the match assert 'Abc: abc from match' in alert_text def test_resolving_rule_references(ea): rule = { 'name': 'test_rule', 'type': mock_rule(), 'owner': 'the_owner', 'priority': 2, 'list_of_things': [ '1', '$owner$', [ '11', '$owner$', ], ], 'nested_dict': { 'nested_one': '1', 'nested_owner': '$owner$', }, 'resolved_string_reference': '$owner$', 'resolved_int_reference': '$priority$', 'unresolved_reference': '$foo$', } alert = Alerter(rule) assert 'the_owner' == alert.rule['resolved_string_reference'] assert 2 == alert.rule['resolved_int_reference'] assert '$foo$' == alert.rule['unresolved_reference'] assert 'the_owner' == alert.rule['list_of_things'][1] assert 'the_owner' == alert.rule['list_of_things'][2][1] assert 'the_owner' == alert.rule['nested_dict']['nested_owner'] def test_stride_plain_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = "{0}\n\n@timestamp: {1}\nsomefield: {2}".format( rule['name'], match['@timestamp'], match['somefield'] ) expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': body} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_stride_underline_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert_text': 'Underline Text', 'alert_text_type': 'alert_text_only', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = "Underline Text" expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': body, 'marks': [ {'type': 'underline'} ]} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_stride_bold_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert_text': 'Bold Text', 'alert_text_type': 'alert_text_only', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = "Bold Text" expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': body, 'marks': [ {'type': 'strong'} ]} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_stride_strong_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert_text': 'Bold Text', 'alert_text_type': 'alert_text_only', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = "Bold Text" expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': body, 'marks': [ {'type': 'strong'} ]} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_stride_hyperlink(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert_text': '
Link', 'alert_text_type': 'alert_text_only', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) body = "Link" expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': body, 'marks': [ {'type': 'link', 'attrs': {'href': 'http://stride.com'}} ]} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_stride_html(): rule = { 'name': 'Test Rule', 'type': 'any', 'stride_access_token': 'token', 'stride_cloud_id': 'cloud_id', 'stride_conversation_id': 'conversation_id', 'alert_subject': 'Cool subject', 'alert_text': 'Alert: we found something. Link', 'alert_text_type': 'alert_text_only', 'alert': [] } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = StrideAlerter(rule) match = { '@timestamp': '2016-01-01T00:00:00', 'somefield': 'foobarbaz' } with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ {'type': 'paragraph', 'content': [ {'type': 'text', 'text': 'Alert', 'marks': [ {'type': 'strong'} ]}, {'type': 'text', 'text': ': we found something. '}, {'type': 'text', 'text': 'Link', 'marks': [ {'type': 'link', 'attrs': {'href': 'http://stride.com'}} ]} ]} ]} ]}} mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json', 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, verify=True, proxies=None ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_hipchat_body_size_limit_text(): rule = { 'name': 'Test Rule', 'type': 'any', 'hipchat_auth_token': 'token', 'hipchat_room_id': 'room_id', 'hipchat_message_format': 'text', 'alert_subject': 'Cool subject', 'alert_text': 'Alert: we found something.\n\n{message}', 'alert_text_type': 'alert_text_only', 'alert': [], 'alert_text_kw': { '@timestamp': 'time', 'message': 'message', }, } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = HipChatAlerter(rule) match = { '@timestamp': '2018-01-01T00:00:00', 'message': 'foo bar\n' * 5000, } body = alert.create_alert_body([match]) assert len(body) <= 10000 def test_hipchat_body_size_limit_html(): rule = { 'name': 'Test Rule', 'type': 'any', 'hipchat_auth_token': 'token', 'hipchat_room_id': 'room_id', 'hipchat_message_format': 'html', 'alert_subject': 'Cool subject', 'alert_text': 'Alert: we found something.\n\n{message}', 'alert_text_type': 'alert_text_only', 'alert': [], 'alert_text_kw': { '@timestamp': 'time', 'message': 'message', }, } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = HipChatAlerter(rule) match = { '@timestamp': '2018-01-01T00:00:00', 'message': 'foo bar\n' * 5000, } body = alert.create_alert_body([match]) assert len(body) <= 10000 def test_alerta_no_auth(ea): rule = { 'name': 'Test Alerta rule!', 'alerta_api_url': 'http://elastalerthost:8080/api/alert', 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': '@timestamp', 'alerta_api_skip_ssl': True, 'alerta_attributes_keys': ["hostname", "TimestampEvent", "senderIP"], 'alerta_attributes_values': ["%(key)s", "%(logdate)s", "%(sender_ip)s"], 'alerta_correlate': ["ProbeUP", "ProbeDOWN"], 'alerta_event': "ProbeUP", 'alerta_group': "Health", 'alerta_origin': "Elastalert", 'alerta_severity': "debug", 'alerta_text': "Probe %(hostname)s is UP at %(logdate)s GMT", 'alerta_value': "UP", 'type': 'any', 'alerta_use_match_timestamp': True, 'alert': 'alerta' } match = { '@timestamp': '2014-10-10T00:00:00', # 'key': ---- missing field on purpose, to verify that simply the text is left empty # 'logdate': ---- missing field on purpose, to verify that simply the text is left empty 'sender_ip': '1.1.1.1', 'hostname': 'aProbe' } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = AlertaAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { "origin": "Elastalert", "resource": "elastalert", "severity": "debug", "service": ["elastalert"], "tags": [], "text": "Probe aProbe is UP at GMT", "value": "UP", "createTime": "2014-10-10T00:00:00.000000Z", "environment": "Production", "rawData": "Test Alerta rule!\n\n@timestamp: 2014-10-10T00:00:00\nhostname: aProbe\nsender_ip: 1.1.1.1\n", "timeout": 86400, "correlate": ["ProbeUP", "ProbeDOWN"], "group": "Health", "attributes": {"senderIP": "1.1.1.1", "hostname": "", "TimestampEvent": ""}, "type": "elastalert", "event": "ProbeUP" } mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, headers={ 'content-type': 'application/json'}, verify=False ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_alerta_auth(ea): rule = { 'name': 'Test Alerta rule!', 'alerta_api_url': 'http://elastalerthost:8080/api/alert', 'alerta_api_key': '123456789ABCDEF', 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': '@timestamp', 'alerta_severity': "debug", 'type': 'any', 'alerta_use_match_timestamp': True, 'alert': 'alerta' } match = { '@timestamp': '2014-10-10T00:00:00', 'sender_ip': '1.1.1.1', 'hostname': 'aProbe' } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = AlertaAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, verify=True, headers={ 'content-type': 'application/json', 'Authorization': 'Key {}'.format(rule['alerta_api_key'])}) def test_alerta_new_style(ea): rule = { 'name': 'Test Alerta rule!', 'alerta_api_url': 'http://elastalerthost:8080/api/alert', 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': '@timestamp', 'alerta_attributes_keys': ["hostname", "TimestampEvent", "senderIP"], 'alerta_attributes_values': ["{hostname}", "{logdate}", "{sender_ip}"], 'alerta_correlate': ["ProbeUP", "ProbeDOWN"], 'alerta_event': "ProbeUP", 'alerta_group': "Health", 'alerta_origin': "Elastalert", 'alerta_severity': "debug", 'alerta_text': "Probe {hostname} is UP at {logdate} GMT", 'alerta_value': "UP", 'alerta_new_style_string_format': True, 'type': 'any', 'alerta_use_match_timestamp': True, 'alert': 'alerta' } match = { '@timestamp': '2014-10-10T00:00:00', # 'key': ---- missing field on purpose, to verify that simply the text is left empty # 'logdate': ---- missing field on purpose, to verify that simply the text is left empty 'sender_ip': '1.1.1.1', 'hostname': 'aProbe' } rules_loader = FileRulesLoader({}) rules_loader.load_modules(rule) alert = AlertaAlerter(rule) with mock.patch('requests.post') as mock_post_request: alert.alert([match]) expected_data = { "origin": "Elastalert", "resource": "elastalert", "severity": "debug", "service": ["elastalert"], "tags": [], "text": "Probe aProbe is UP at GMT", "value": "UP", "createTime": "2014-10-10T00:00:00.000000Z", "environment": "Production", "rawData": "Test Alerta rule!\n\n@timestamp: 2014-10-10T00:00:00\nhostname: aProbe\nsender_ip: 1.1.1.1\n", "timeout": 86400, "correlate": ["ProbeUP", "ProbeDOWN"], "group": "Health", "attributes": {"senderIP": "1.1.1.1", "hostname": "aProbe", "TimestampEvent": ""}, "type": "elastalert", "event": "ProbeUP" } mock_post_request.assert_called_once_with( alert.url, data=mock.ANY, verify=True, headers={ 'content-type': 'application/json'} ) assert expected_data == json.loads( mock_post_request.call_args_list[0][1]['data']) def test_alert_subject_size_limit_no_args(ea): rule = { 'name': 'test_rule', 'type': mock_rule(), 'owner': 'the_owner', 'priority': 2, 'alert_subject': 'A very long subject', 'alert_subject_max_len': 5 } alert = Alerter(rule) alertSubject = alert.create_custom_title([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert 5 == len(alertSubject) def test_alert_subject_size_limit_with_args(ea): rule = { 'name': 'test_rule', 'type': mock_rule(), 'owner': 'the_owner', 'priority': 2, 'alert_subject': 'Test alert for {0} {1}', 'alert_subject_args': ['test_term', 'test.term'], 'alert_subject_max_len': 6 } alert = Alerter(rule) alertSubject = alert.create_custom_title([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert 6 == len(alertSubject) elastalert-0.2.4/tests/auth_test.py000066400000000000000000000014431364615736500174130ustar00rootroot00000000000000# -*- coding: utf-8 -*- from elastalert.auth import Auth, RefeshableAWSRequestsAuth def test_auth_none(): auth = Auth()( host='localhost:8080', username=None, password=None, aws_region=None, profile_name=None ) assert not auth def test_auth_username_password(): auth = Auth()( host='localhost:8080', username='user', password='password', aws_region=None, profile_name=None ) assert auth == 'user:password' def test_auth_aws_region(): auth = Auth()( host='localhost:8080', username=None, password=None, aws_region='us-east-1', profile_name=None ) assert type(auth) == RefeshableAWSRequestsAuth assert auth.aws_region == 'us-east-1' elastalert-0.2.4/tests/base_test.py000066400000000000000000001741611364615736500173740ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import json import threading import elasticsearch import mock import pytest from elasticsearch.exceptions import ConnectionError from elasticsearch.exceptions import ElasticsearchException from elastalert.enhancements import BaseEnhancement from elastalert.enhancements import DropMatchException from elastalert.kibana import dashboard_temp from elastalert.util import dt_to_ts from elastalert.util import dt_to_unix from elastalert.util import dt_to_unixms from elastalert.util import EAException from elastalert.util import ts_now from elastalert.util import ts_to_dt from elastalert.util import unix_to_dt START_TIMESTAMP = '2014-09-26T12:34:45Z' END_TIMESTAMP = '2014-09-27T12:34:45Z' START = ts_to_dt(START_TIMESTAMP) END = ts_to_dt(END_TIMESTAMP) def _set_hits(ea_inst, hits): res = {'hits': {'total': len(hits), 'hits': hits}} ea_inst.client_es.return_value = res def generate_hits(timestamps, **kwargs): hits = [] for i, ts in enumerate(timestamps): data = {'_id': 'id{}'.format(i), '_source': {'@timestamp': ts}, '_type': 'logs', '_index': 'idx'} for key, item in kwargs.items(): data['_source'][key] = item # emulate process_hits(), add metadata to _source for field in ['_id', '_type', '_index']: data['_source'][field] = data[field] hits.append(data) return {'hits': {'total': len(hits), 'hits': hits}} def assert_alerts(ea_inst, calls): """ Takes a list of lists of timestamps. Asserts that an alert was called for each list, containing those timestamps. """ assert ea_inst.rules[0]['alert'][0].alert.call_count == len(calls) for call_num, call_args in enumerate(ea_inst.rules[0]['alert'][0].alert.call_args_list): assert not any([match['@timestamp'] not in calls[call_num] for match in call_args[0][0]]) assert len(call_args[0][0]) == len(calls[call_num]) def test_starttime(ea): invalid = ['2014-13-13', '2014-11-24T30:00:00', 'Not A Timestamp'] for ts in invalid: with pytest.raises((TypeError, ValueError)): ts_to_dt(ts) def test_init_rule(ea): # Simulate state of a rule just loaded from a file ea.rules[0]['minimum_starttime'] = datetime.datetime.now() new_rule = copy.copy(ea.rules[0]) list(map(new_rule.pop, ['agg_matches', 'current_aggregate_id', 'processed_hits', 'minimum_starttime'])) # Properties are copied from ea.rules[0] ea.rules[0]['starttime'] = '2014-01-02T00:11:22' ea.rules[0]['processed_hits'] = ['abcdefg'] new_rule = ea.init_rule(new_rule, False) for prop in ['starttime', 'agg_matches', 'current_aggregate_id', 'processed_hits', 'minimum_starttime', 'run_every']: assert new_rule[prop] == ea.rules[0][prop] # Properties are fresh new_rule = ea.init_rule(new_rule, True) new_rule.pop('starttime') assert 'starttime' not in new_rule assert new_rule['processed_hits'] == {} # Assert run_every is unique new_rule['run_every'] = datetime.timedelta(seconds=17) new_rule = ea.init_rule(new_rule, True) assert new_rule['run_every'] == datetime.timedelta(seconds=17) def test_query(ea): ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) ea.thread_data.current_es.search.assert_called_with(body={ 'query': {'filtered': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': END_TIMESTAMP, 'gt': START_TIMESTAMP}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea.rules[0]['max_query_size'], scroll=ea.conf['scroll_keepalive']) def test_query_sixsix(ea_sixsix): ea_sixsix.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea_sixsix.run_query(ea_sixsix.rules[0], START, END) ea_sixsix.thread_data.current_es.search.assert_called_with(body={ 'query': {'bool': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': END_TIMESTAMP, 'gt': START_TIMESTAMP}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea_sixsix.rules[0]['max_query_size'], scroll=ea_sixsix.conf['scroll_keepalive']) def test_query_with_fields(ea): ea.rules[0]['_source_enabled'] = False ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) ea.thread_data.current_es.search.assert_called_with(body={ 'query': {'filtered': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': END_TIMESTAMP, 'gt': START_TIMESTAMP}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}], 'fields': ['@timestamp']}, index='idx', ignore_unavailable=True, size=ea.rules[0]['max_query_size'], scroll=ea.conf['scroll_keepalive']) def test_query_sixsix_with_fields(ea_sixsix): ea_sixsix.rules[0]['_source_enabled'] = False ea_sixsix.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea_sixsix.run_query(ea_sixsix.rules[0], START, END) ea_sixsix.thread_data.current_es.search.assert_called_with(body={ 'query': {'bool': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': END_TIMESTAMP, 'gt': START_TIMESTAMP}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}], 'stored_fields': ['@timestamp']}, index='idx', ignore_unavailable=True, size=ea_sixsix.rules[0]['max_query_size'], scroll=ea_sixsix.conf['scroll_keepalive']) def test_query_with_unix(ea): ea.rules[0]['timestamp_type'] = 'unix' ea.rules[0]['dt_to_ts'] = dt_to_unix ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) start_unix = dt_to_unix(START) end_unix = dt_to_unix(END) ea.thread_data.current_es.search.assert_called_with( body={'query': {'filtered': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': end_unix, 'gt': start_unix}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea.rules[0]['max_query_size'], scroll=ea.conf['scroll_keepalive']) def test_query_sixsix_with_unix(ea_sixsix): ea_sixsix.rules[0]['timestamp_type'] = 'unix' ea_sixsix.rules[0]['dt_to_ts'] = dt_to_unix ea_sixsix.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea_sixsix.run_query(ea_sixsix.rules[0], START, END) start_unix = dt_to_unix(START) end_unix = dt_to_unix(END) ea_sixsix.thread_data.current_es.search.assert_called_with( body={'query': {'bool': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': end_unix, 'gt': start_unix}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea_sixsix.rules[0]['max_query_size'], scroll=ea_sixsix.conf['scroll_keepalive']) def test_query_with_unixms(ea): ea.rules[0]['timestamp_type'] = 'unixms' ea.rules[0]['dt_to_ts'] = dt_to_unixms ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) start_unix = dt_to_unixms(START) end_unix = dt_to_unixms(END) ea.thread_data.current_es.search.assert_called_with( body={'query': {'filtered': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': end_unix, 'gt': start_unix}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea.rules[0]['max_query_size'], scroll=ea.conf['scroll_keepalive']) def test_query_sixsix_with_unixms(ea_sixsix): ea_sixsix.rules[0]['timestamp_type'] = 'unixms' ea_sixsix.rules[0]['dt_to_ts'] = dt_to_unixms ea_sixsix.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea_sixsix.run_query(ea_sixsix.rules[0], START, END) start_unix = dt_to_unixms(START) end_unix = dt_to_unixms(END) ea_sixsix.thread_data.current_es.search.assert_called_with( body={'query': {'bool': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': end_unix, 'gt': start_unix}}}]}}}}, 'sort': [{'@timestamp': {'order': 'asc'}}]}, index='idx', _source_include=['@timestamp'], ignore_unavailable=True, size=ea_sixsix.rules[0]['max_query_size'], scroll=ea_sixsix.conf['scroll_keepalive']) def test_no_hits(ea): ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) assert ea.rules[0]['type'].add_data.call_count == 0 def test_no_terms_hits(ea): ea.rules[0]['use_terms_query'] = True ea.rules[0]['query_key'] = 'QWERTY' ea.rules[0]['doc_type'] = 'uiop' ea.thread_data.current_es.deprecated_search.return_value = {'hits': {'total': 0, 'hits': []}} ea.run_query(ea.rules[0], START, END) assert ea.rules[0]['type'].add_terms_data.call_count == 0 def test_some_hits(ea): hits = generate_hits([START_TIMESTAMP, END_TIMESTAMP]) hits_dt = generate_hits([START, END]) ea.thread_data.current_es.search.return_value = hits ea.run_query(ea.rules[0], START, END) assert ea.rules[0]['type'].add_data.call_count == 1 ea.rules[0]['type'].add_data.assert_called_with([x['_source'] for x in hits_dt['hits']['hits']]) def test_some_hits_unix(ea): ea.rules[0]['timestamp_type'] = 'unix' ea.rules[0]['dt_to_ts'] = dt_to_unix ea.rules[0]['ts_to_dt'] = unix_to_dt hits = generate_hits([dt_to_unix(START), dt_to_unix(END)]) hits_dt = generate_hits([START, END]) ea.thread_data.current_es.search.return_value = copy.deepcopy(hits) ea.run_query(ea.rules[0], START, END) assert ea.rules[0]['type'].add_data.call_count == 1 ea.rules[0]['type'].add_data.assert_called_with([x['_source'] for x in hits_dt['hits']['hits']]) def _duplicate_hits_generator(timestamps, **kwargs): """Generator repeatedly returns identical hits dictionaries """ while True: yield generate_hits(timestamps, **kwargs) def test_duplicate_timestamps(ea): ea.thread_data.current_es.search.side_effect = _duplicate_hits_generator([START_TIMESTAMP] * 3, blah='duplicate') ea.run_query(ea.rules[0], START, ts_to_dt('2014-01-01T00:00:00Z')) assert len(ea.rules[0]['type'].add_data.call_args_list[0][0][0]) == 3 assert ea.rules[0]['type'].add_data.call_count == 1 # Run the query again, duplicates will be removed and not added ea.run_query(ea.rules[0], ts_to_dt('2014-01-01T00:00:00Z'), END) assert ea.rules[0]['type'].add_data.call_count == 1 def test_match(ea): hits = generate_hits([START_TIMESTAMP, END_TIMESTAMP]) ea.thread_data.current_es.search.return_value = hits ea.rules[0]['type'].matches = [{'@timestamp': END}] with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) ea.rules[0]['alert'][0].alert.called_with({'@timestamp': END_TIMESTAMP}) assert ea.rules[0]['alert'][0].alert.call_count == 1 def test_run_rule_calls_garbage_collect(ea): start_time = '2014-09-26T00:00:00Z' end_time = '2014-09-26T12:00:00Z' ea.buffer_time = datetime.timedelta(hours=1) ea.run_every = datetime.timedelta(hours=1) with mock.patch.object(ea.rules[0]['type'], 'garbage_collect') as mock_gc, \ mock.patch.object(ea, 'run_query'): ea.run_rule(ea.rules[0], ts_to_dt(end_time), ts_to_dt(start_time)) # Running ElastAlert every hour for 12 hours, we should see self.garbage_collect called 12 times. assert mock_gc.call_count == 12 # The calls should be spaced 1 hour apart expected_calls = [ts_to_dt(start_time) + datetime.timedelta(hours=i) for i in range(1, 13)] for e in expected_calls: mock_gc.assert_any_call(e) def run_rule_query_exception(ea, mock_es): with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es_init: mock_es_init.return_value = mock_es ea.run_rule(ea.rules[0], END, START) # Assert neither add_data nor garbage_collect were called # and that starttime did not change assert ea.rules[0].get('starttime') == START assert ea.rules[0]['type'].add_data.call_count == 0 assert ea.rules[0]['type'].garbage_collect.call_count == 0 assert ea.rules[0]['type'].add_count_data.call_count == 0 def test_query_exception(ea): mock_es = mock.Mock() mock_es.search.side_effect = ElasticsearchException run_rule_query_exception(ea, mock_es) def test_query_exception_count_query(ea): ea.rules[0]['use_count_query'] = True ea.rules[0]['doc_type'] = 'blahblahblahblah' mock_es = mock.Mock() mock_es.count.side_effect = ElasticsearchException run_rule_query_exception(ea, mock_es) def test_match_with_module(ea): mod = BaseEnhancement(ea.rules[0]) mod.process = mock.Mock() ea.rules[0]['match_enhancements'] = [mod] test_match(ea) mod.process.assert_called_with({'@timestamp': END, 'num_hits': 0, 'num_matches': 1}) def test_match_with_module_from_pending(ea): mod = BaseEnhancement(ea.rules[0]) mod.process = mock.Mock() ea.rules[0]['match_enhancements'] = [mod] ea.rules[0].pop('aggregation') pending_alert = {'match_body': {'foo': 'bar'}, 'rule_name': ea.rules[0]['name'], 'alert_time': START_TIMESTAMP, '@timestamp': START_TIMESTAMP} # First call, return the pending alert, second, no associated aggregated alerts ea.writeback_es.deprecated_search.side_effect = [{'hits': {'hits': [{'_id': 'ABCD', '_index': 'wb', '_source': pending_alert}]}}, {'hits': {'hits': []}}] ea.send_pending_alerts() assert mod.process.call_count == 0 # If aggregation is set, enhancement IS called pending_alert = {'match_body': {'foo': 'bar'}, 'rule_name': ea.rules[0]['name'], 'alert_time': START_TIMESTAMP, '@timestamp': START_TIMESTAMP} ea.writeback_es.deprecated_search.side_effect = [{'hits': {'hits': [{'_id': 'ABCD', '_index': 'wb', '_source': pending_alert}]}}, {'hits': {'hits': []}}] ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.send_pending_alerts() assert mod.process.call_count == 1 def test_match_with_module_with_agg(ea): mod = BaseEnhancement(ea.rules[0]) mod.process = mock.Mock() ea.rules[0]['match_enhancements'] = [mod] ea.rules[0]['aggregation'] = datetime.timedelta(minutes=15) hits = generate_hits([START_TIMESTAMP, END_TIMESTAMP]) ea.thread_data.current_es.search.return_value = hits ea.rules[0]['type'].matches = [{'@timestamp': END}] with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert mod.process.call_count == 0 def test_match_with_enhancements_first(ea): mod = BaseEnhancement(ea.rules[0]) mod.process = mock.Mock() ea.rules[0]['match_enhancements'] = [mod] ea.rules[0]['aggregation'] = datetime.timedelta(minutes=15) ea.rules[0]['run_enhancements_first'] = True hits = generate_hits([START_TIMESTAMP, END_TIMESTAMP]) ea.thread_data.current_es.search.return_value = hits ea.rules[0]['type'].matches = [{'@timestamp': END}] with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch.object(ea, 'add_aggregated_alert') as add_alert: ea.run_rule(ea.rules[0], END, START) mod.process.assert_called_with({'@timestamp': END, 'num_hits': 0, 'num_matches': 1}) assert add_alert.call_count == 1 # Assert that dropmatchexception behaves properly mod.process = mock.MagicMock(side_effect=DropMatchException) ea.rules[0]['type'].matches = [{'@timestamp': END}] with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch.object(ea, 'add_aggregated_alert') as add_alert: ea.run_rule(ea.rules[0], END, START) mod.process.assert_called_with({'@timestamp': END, 'num_hits': 0, 'num_matches': 1}) assert add_alert.call_count == 0 def test_agg_matchtime(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] alerttime1 = dt_to_ts(ts_to_dt(hits_timestamps[0]) + datetime.timedelta(minutes=10)) hits = generate_hits(hits_timestamps) ea.thread_data.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es: # Aggregate first two, query over full range mock_es.return_value = ea.thread_data.current_es ea.rules[0]['aggregate_by_match_time'] = True ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to Elasticsearch call1 = ea.writeback_es.index.call_args_list[0][1]['body'] call2 = ea.writeback_es.index.call_args_list[1][1]['body'] call3 = ea.writeback_es.index.call_args_list[2][1]['body'] assert call1['match_body']['@timestamp'] == '2014-09-26T12:34:45' assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == alerttime1 assert call2['match_body']['@timestamp'] == '2014-09-26T12:40:45' assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body']['@timestamp'] == '2014-09-26T12:47:45' assert not call3['alert_sent'] assert 'aggregate_id' not in call3 # First call - Find all pending alerts (only entries without agg_id) # Second call - Find matches with agg_id == 'ABCD' # Third call - Find matches with agg_id == 'CDEF' ea.writeback_es.deprecated_search.side_effect = [{'hits': {'hits': [{'_id': 'ABCD', '_index': 'wb', '_source': call1}, {'_id': 'CDEF', '_index': 'wb', '_source': call3}]}}, {'hits': {'hits': [{'_id': 'BCDE', '_index': 'wb', '_source': call2}]}}, {'hits': {'total': 0, 'hits': []}}] with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es: ea.send_pending_alerts() # Assert that current_es was refreshed from the aggregate rules assert mock_es.called_with(host='', port='') assert mock_es.call_count == 2 assert_alerts(ea, [hits_timestamps[:2], hits_timestamps[2:]]) call1 = ea.writeback_es.deprecated_search.call_args_list[7][1]['body'] call2 = ea.writeback_es.deprecated_search.call_args_list[8][1]['body'] call3 = ea.writeback_es.deprecated_search.call_args_list[9][1]['body'] call4 = ea.writeback_es.deprecated_search.call_args_list[10][1]['body'] assert 'alert_time' in call2['filter']['range'] assert call3['query']['query_string']['query'] == 'aggregate_id:ABCD' assert call4['query']['query_string']['query'] == 'aggregate_id:CDEF' assert ea.writeback_es.deprecated_search.call_args_list[9][1]['size'] == 1337 def test_agg_not_matchtime(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] match_time = ts_to_dt('2014-09-26T12:55:00Z') hits = generate_hits(hits_timestamps) ea.thread_data.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch('elastalert.elastalert.ts_now', return_value=match_time): ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to Elasticsearch call1 = ea.writeback_es.index.call_args_list[0][1]['body'] call2 = ea.writeback_es.index.call_args_list[1][1]['body'] call3 = ea.writeback_es.index.call_args_list[2][1]['body'] assert call1['match_body']['@timestamp'] == '2014-09-26T12:34:45' assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == dt_to_ts(match_time + datetime.timedelta(minutes=10)) assert call2['match_body']['@timestamp'] == '2014-09-26T12:40:45' assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body']['@timestamp'] == '2014-09-26T12:47:45' assert not call3['alert_sent'] assert call3['aggregate_id'] == 'ABCD' def test_agg_cron(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45'] hits = generate_hits(hits_timestamps) ea.thread_data.current_es.search.return_value = hits alerttime1 = dt_to_ts(ts_to_dt('2014-09-26T12:46:00')) alerttime2 = dt_to_ts(ts_to_dt('2014-09-26T13:04:00')) with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch('elastalert.elastalert.croniter.get_next') as mock_ts: # Aggregate first two, query over full range mock_ts.side_effect = [dt_to_unix(ts_to_dt('2014-09-26T12:46:00')), dt_to_unix(ts_to_dt('2014-09-26T13:04:00'))] ea.rules[0]['aggregation'] = {'schedule': '*/5 * * * *'} ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to Elasticsearch call1 = ea.writeback_es.index.call_args_list[0][1]['body'] call2 = ea.writeback_es.index.call_args_list[1][1]['body'] call3 = ea.writeback_es.index.call_args_list[2][1]['body'] assert call1['match_body']['@timestamp'] == '2014-09-26T12:34:45' assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert call1['alert_time'] == alerttime1 assert call2['match_body']['@timestamp'] == '2014-09-26T12:40:45' assert not call2['alert_sent'] assert call2['aggregate_id'] == 'ABCD' assert call3['match_body']['@timestamp'] == '2014-09-26T12:47:45' assert call3['alert_time'] == alerttime2 assert not call3['alert_sent'] assert 'aggregate_id' not in call3 def test_agg_no_writeback_connectivity(ea): """ Tests that if writeback_es throws an exception, the matches will be added to 'agg_matches' and when run again, that they will be passed again to add_aggregated_alert """ hit1, hit2, hit3 = '2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:47:45' hits = generate_hits([hit1, hit2, hit3]) ea.thread_data.current_es.search.return_value = hits ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': hit1}, {'@timestamp': hit2}, {'@timestamp': hit3}] ea.writeback_es.index.side_effect = elasticsearch.exceptions.ElasticsearchException('Nope') with mock.patch('elastalert.elastalert.elasticsearch_client'): with mock.patch.object(ea, 'find_pending_aggregate_alert', return_value=None): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['agg_matches'] == [{'@timestamp': hit1, 'num_hits': 0, 'num_matches': 3}, {'@timestamp': hit2, 'num_hits': 0, 'num_matches': 3}, {'@timestamp': hit3, 'num_hits': 0, 'num_matches': 3}] ea.thread_data.current_es.search.return_value = {'hits': {'total': 0, 'hits': []}} ea.add_aggregated_alert = mock.Mock() with mock.patch.object(ea, 'run_query'): ea.run_rule(ea.rules[0], END, START) ea.add_aggregated_alert.assert_any_call({'@timestamp': hit1, 'num_hits': 0, 'num_matches': 3}, ea.rules[0]) ea.add_aggregated_alert.assert_any_call({'@timestamp': hit2, 'num_hits': 0, 'num_matches': 3}, ea.rules[0]) ea.add_aggregated_alert.assert_any_call({'@timestamp': hit3, 'num_hits': 0, 'num_matches': 3}, ea.rules[0]) def test_agg_with_aggregation_key(ea): ea.max_aggregation = 1337 hits_timestamps = ['2014-09-26T12:34:45', '2014-09-26T12:40:45', '2014-09-26T12:43:45'] match_time = ts_to_dt('2014-09-26T12:45:00Z') hits = generate_hits(hits_timestamps) ea.thread_data.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es: mock_es.return_value = ea.thread_data.current_es with mock.patch('elastalert.elastalert.ts_now', return_value=match_time): ea.rules[0]['aggregation'] = datetime.timedelta(minutes=10) ea.rules[0]['type'].matches = [{'@timestamp': h} for h in hits_timestamps] # Hit1 and Hit3 should be aggregated together, since they have same query_key value ea.rules[0]['type'].matches[0]['key'] = 'Key Value 1' ea.rules[0]['type'].matches[1]['key'] = 'Key Value 2' ea.rules[0]['type'].matches[2]['key'] = 'Key Value 1' ea.rules[0]['aggregation_key'] = 'key' ea.run_rule(ea.rules[0], END, START) # Assert that the three matches were added to elasticsearch call1 = ea.writeback_es.index.call_args_list[0][1]['body'] call2 = ea.writeback_es.index.call_args_list[1][1]['body'] call3 = ea.writeback_es.index.call_args_list[2][1]['body'] assert call1['match_body']['key'] == 'Key Value 1' assert not call1['alert_sent'] assert 'aggregate_id' not in call1 assert 'aggregation_key' in call1 assert call1['aggregation_key'] == 'Key Value 1' assert call1['alert_time'] == dt_to_ts(match_time + datetime.timedelta(minutes=10)) assert call2['match_body']['key'] == 'Key Value 2' assert not call2['alert_sent'] assert 'aggregate_id' not in call2 assert 'aggregation_key' in call2 assert call2['aggregation_key'] == 'Key Value 2' assert call2['alert_time'] == dt_to_ts(match_time + datetime.timedelta(minutes=10)) assert call3['match_body']['key'] == 'Key Value 1' assert not call3['alert_sent'] # Call3 should have it's aggregate_id set to call1's _id # It should also have the same alert_time as call1 assert call3['aggregate_id'] == 'ABCD' assert 'aggregation_key' in call3 assert call3['aggregation_key'] == 'Key Value 1' assert call3['alert_time'] == dt_to_ts(match_time + datetime.timedelta(minutes=10)) # First call - Find all pending alerts (only entries without agg_id) # Second call - Find matches with agg_id == 'ABCD' # Third call - Find matches with agg_id == 'CDEF' ea.writeback_es.deprecated_search.side_effect = [{'hits': {'hits': [{'_id': 'ABCD', '_index': 'wb', '_source': call1}, {'_id': 'CDEF', '_index': 'wb', '_source': call2}]}}, {'hits': {'hits': [{'_id': 'BCDE', '_index': 'wb', '_source': call3}]}}, {'hits': {'total': 0, 'hits': []}}] with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es: mock_es.return_value = ea.thread_data.current_es ea.send_pending_alerts() # Assert that current_es was refreshed from the aggregate rules assert mock_es.called_with(host='', port='') assert mock_es.call_count == 2 assert_alerts(ea, [[hits_timestamps[0], hits_timestamps[2]], [hits_timestamps[1]]]) call1 = ea.writeback_es.deprecated_search.call_args_list[7][1]['body'] call2 = ea.writeback_es.deprecated_search.call_args_list[8][1]['body'] call3 = ea.writeback_es.deprecated_search.call_args_list[9][1]['body'] call4 = ea.writeback_es.deprecated_search.call_args_list[10][1]['body'] assert 'alert_time' in call2['filter']['range'] assert call3['query']['query_string']['query'] == 'aggregate_id:ABCD' assert call4['query']['query_string']['query'] == 'aggregate_id:CDEF' assert ea.writeback_es.deprecated_search.call_args_list[9][1]['size'] == 1337 def test_silence(ea): # Silence test rule for 4 hours ea.args.rule = 'test_rule.yaml' # Not a real name, just has to be set ea.args.silence = 'hours=4' ea.silence() # Don't alert even with a match match = [{'@timestamp': '2014-11-17T00:00:00'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 0 # Mock ts_now() to +5 hours, alert on match match = [{'@timestamp': '2014-11-17T00:00:00'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.elasticsearch_client'): # Converted twice to add tzinfo mock_ts.return_value = ts_to_dt(dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(hours=5))) ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 def test_compound_query_key(ea): ea.rules[0]['query_key'] = 'this,that,those' ea.rules[0]['compound_query_key'] = ['this', 'that', 'those'] hits = generate_hits([START_TIMESTAMP, END_TIMESTAMP], this='abc', that='☃', those=4) ea.thread_data.current_es.search.return_value = hits ea.run_query(ea.rules[0], START, END) call_args = ea.rules[0]['type'].add_data.call_args_list[0] assert 'this,that,those' in call_args[0][0][0] assert call_args[0][0][0]['this,that,those'] == 'abc, ☃, 4' def test_silence_query_key(ea): # Silence test rule for 4 hours ea.args.rule = 'test_rule.yaml' # Not a real name, just has to be set ea.args.silence = 'hours=4' ea.silence('anytest.qlo') # Don't alert even with a match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': 'qlo'}] ea.rules[0]['type'].matches = match ea.rules[0]['query_key'] = 'username' with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 0 # If there is a new record with a different value for the query_key, we should get an alert match = [{'@timestamp': '2014-11-17T00:00:01', 'username': 'dpopes'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Mock ts_now() to +5 hours, alert on match match = [{'@timestamp': '2014-11-17T00:00:00', 'username': 'qlo'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.elasticsearch_client'): # Converted twice to add tzinfo mock_ts.return_value = ts_to_dt(dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(hours=5))) ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 2 def test_realert(ea): hits = ['2014-09-26T12:35:%sZ' % (x) for x in range(60)] matches = [{'@timestamp': x} for x in hits] ea.thread_data.current_es.search.return_value = hits with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.rules[0]['realert'] = datetime.timedelta(seconds=50) ea.rules[0]['type'].matches = matches ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Doesn't alert again matches = [{'@timestamp': x} for x in hits] with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) ea.rules[0]['type'].matches = matches assert ea.rules[0]['alert'][0].alert.call_count == 1 # mock ts_now() to past the realert time matches = [{'@timestamp': hits[0]}] with mock.patch('elastalert.elastalert.ts_now') as mock_ts: with mock.patch('elastalert.elastalert.elasticsearch_client'): # mock_ts is converted twice to add tzinfo mock_ts.return_value = ts_to_dt(dt_to_ts(datetime.datetime.utcnow() + datetime.timedelta(minutes=10))) ea.rules[0]['type'].matches = matches ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 2 def test_realert_with_query_key(ea): ea.rules[0]['query_key'] = 'username' ea.rules[0]['realert'] = datetime.timedelta(minutes=10) # Alert and silence username: qlo match = [{'@timestamp': '2014-11-17T00:00:00', 'username': 'qlo'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Dont alert again for same username match = [{'@timestamp': '2014-11-17T00:05:00', 'username': 'qlo'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Do alert with a different value match = [{'@timestamp': '2014-11-17T00:05:00', 'username': ''}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 2 # Alert with query_key missing match = [{'@timestamp': '2014-11-17T00:05:00'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 3 # Still alert with a different value match = [{'@timestamp': '2014-11-17T00:05:00', 'username': 'ghengis_khan'}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 4 def test_realert_with_nested_query_key(ea): ea.rules[0]['query_key'] = 'user.name' ea.rules[0]['realert'] = datetime.timedelta(minutes=10) # Alert and silence username: qlo match = [{'@timestamp': '2014-11-17T00:00:00', 'user': {'name': 'qlo'}}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 # Dont alert again for same username match = [{'@timestamp': '2014-11-17T00:05:00', 'user': {'name': 'qlo'}}] ea.rules[0]['type'].matches = match with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.run_rule(ea.rules[0], END, START) assert ea.rules[0]['alert'][0].alert.call_count == 1 def test_count(ea): ea.rules[0]['use_count_query'] = True ea.rules[0]['doc_type'] = 'doctype' with mock.patch('elastalert.elastalert.elasticsearch_client'), \ mock.patch.object(ea, 'get_hits_count') as mock_hits: ea.run_rule(ea.rules[0], END, START) # Assert that es.count is run against every run_every timeframe between START and END start = START query = { 'query': {'filtered': { 'filter': {'bool': {'must': [{'range': {'@timestamp': {'lte': END_TIMESTAMP, 'gt': START_TIMESTAMP}}}]}}}}} while END - start > ea.run_every: end = start + ea.run_every query['query']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['lte'] = dt_to_ts(end) query['query']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['gt'] = dt_to_ts(start) mock_hits.assert_any_call(mock.ANY, start, end, mock.ANY) start = start + ea.run_every def run_and_assert_segmented_queries(ea, start, end, segment_size): with mock.patch.object(ea, 'run_query') as mock_run_query: ea.run_rule(ea.rules[0], end, start) original_end, original_start = end, start for call_args in mock_run_query.call_args_list: end = min(start + segment_size, original_end) assert call_args[0][1:3] == (start, end) start += segment_size # Assert elastalert_status was created for the entire time range assert ea.writeback_es.index.call_args_list[-1][1]['body']['starttime'] == dt_to_ts(original_start) if ea.rules[0].get('aggregation_query_element'): assert ea.writeback_es.index.call_args_list[-1][1]['body']['endtime'] == dt_to_ts( original_end - (original_end - end)) assert original_end - end < segment_size else: assert ea.writeback_es.index.call_args_list[-1][1]['body']['endtime'] == dt_to_ts(original_end) def test_query_segmenting_reset_num_hits(ea): # Tests that num_hits gets reset every time run_query is run def assert_num_hits_reset(): assert ea.thread_data.num_hits == 0 ea.thread_data.num_hits += 10 with mock.patch.object(ea, 'run_query') as mock_run_query: mock_run_query.side_effect = assert_num_hits_reset() ea.run_rule(ea.rules[0], END, START) assert mock_run_query.call_count > 1 def test_query_segmenting(ea): # buffer_time segments with normal queries ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=53) with mock.patch('elastalert.elastalert.elasticsearch_client'): run_and_assert_segmented_queries(ea, START, END, ea.rules[0]['buffer_time']) # run_every segments with count queries ea.rules[0]['use_count_query'] = True with mock.patch('elastalert.elastalert.elasticsearch_client'): run_and_assert_segmented_queries(ea, START, END, ea.run_every) # run_every segments with terms queries ea.rules[0].pop('use_count_query') ea.rules[0]['use_terms_query'] = True with mock.patch('elastalert.elastalert.elasticsearch_client'): run_and_assert_segmented_queries(ea, START, END, ea.run_every) # buffer_time segments with terms queries ea.rules[0].pop('use_terms_query') ea.rules[0]['aggregation_query_element'] = {'term': 'term_val'} with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=30) run_and_assert_segmented_queries(ea, START, END, ea.rules[0]['buffer_time']) # partial segment size scenario with mock.patch('elastalert.elastalert.elasticsearch_client'): ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=53) run_and_assert_segmented_queries(ea, START, END, ea.rules[0]['buffer_time']) # run every segmenting ea.rules[0]['use_run_every_query_size'] = True with mock.patch('elastalert.elastalert.elasticsearch_client'): run_and_assert_segmented_queries(ea, START, END, ea.run_every) def test_get_starttime(ea): endtime = '2015-01-01T00:00:00Z' mock_es = mock.Mock() mock_es.search.return_value = {'hits': {'hits': [{'_source': {'endtime': endtime}}]}} mock_es.info.return_value = {'version': {'number': '2.0'}} ea.writeback_es = mock_es # 4 days old, will return endtime with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt('2015-01-05T00:00:00Z') # 4 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) == ts_to_dt(endtime) # 10 days old, will return None with mock.patch('elastalert.elastalert.ts_now') as mock_ts: mock_ts.return_value = ts_to_dt('2015-01-11T00:00:00Z') # 10 days ahead of the endtime assert ea.get_starttime(ea.rules[0]) is None def test_set_starttime(ea): # standard query, no starttime, no last run end = ts_to_dt('2014-10-10T10:10:10') with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = None ea.set_starttime(ea.rules[0], end) assert mock_gs.call_count == 1 assert ea.rules[0]['starttime'] == end - ea.buffer_time # Standard query, no starttime, rule specific buffer_time ea.rules[0].pop('starttime') ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=37) with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = None ea.set_starttime(ea.rules[0], end) assert mock_gs.call_count == 1 assert ea.rules[0]['starttime'] == end - datetime.timedelta(minutes=37) ea.rules[0].pop('buffer_time') # Standard query, no starttime, last run ea.rules[0].pop('starttime') with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = ts_to_dt('2014-10-10T00:00:00') ea.set_starttime(ea.rules[0], end) assert mock_gs.call_count == 1 assert ea.rules[0]['starttime'] == ts_to_dt('2014-10-10T00:00:00') # Standard query, no starttime, last run, assure buffer_time doesn't go past ea.rules[0].pop('starttime') ea.rules[0]['buffer_time'] = datetime.timedelta(weeks=1000) with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = ts_to_dt('2014-10-09T00:00:00') # First call sets minumum_time ea.set_starttime(ea.rules[0], end) # Second call uses buffer_time, but it goes past minimum ea.set_starttime(ea.rules[0], end) assert ea.rules[0]['starttime'] == ts_to_dt('2014-10-09T00:00:00') # Standard query, starttime ea.rules[0].pop('buffer_time') ea.rules[0].pop('minimum_starttime') with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = None ea.set_starttime(ea.rules[0], end) assert mock_gs.call_count == 0 assert ea.rules[0]['starttime'] == end - ea.buffer_time # Count query, starttime, no previous endtime ea.rules[0]['use_count_query'] = True ea.rules[0]['doc_type'] = 'blah' with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = None ea.set_starttime(ea.rules[0], end) assert mock_gs.call_count == 0 assert ea.rules[0]['starttime'] == end - ea.run_every # Count query, with previous endtime with mock.patch('elastalert.elastalert.elasticsearch_client'), \ mock.patch.object(ea, 'get_hits_count'): ea.run_rule(ea.rules[0], END, START) ea.set_starttime(ea.rules[0], end) assert ea.rules[0]['starttime'] == END # buffer_time doesn't go past previous endtime ea.rules[0].pop('use_count_query') ea.rules[0]['previous_endtime'] = end - ea.buffer_time * 2 ea.set_starttime(ea.rules[0], end) assert ea.rules[0]['starttime'] == ea.rules[0]['previous_endtime'] # Make sure starttime is updated if previous_endtime isn't used ea.rules[0]['previous_endtime'] = end - ea.buffer_time / 2 ea.rules[0]['starttime'] = ts_to_dt('2014-10-09T00:00:01') ea.set_starttime(ea.rules[0], end) assert ea.rules[0]['starttime'] == end - ea.buffer_time # scan_entire_timeframe ea.rules[0].pop('previous_endtime') ea.rules[0].pop('starttime') ea.rules[0]['timeframe'] = datetime.timedelta(days=3) ea.rules[0]['scan_entire_timeframe'] = True with mock.patch.object(ea, 'get_starttime') as mock_gs: mock_gs.return_value = None ea.set_starttime(ea.rules[0], end) assert ea.rules[0]['starttime'] == end - datetime.timedelta(days=3) def test_kibana_dashboard(ea): match = {'@timestamp': '2014-10-11T00:00:00'} mock_es = mock.Mock() ea.rules[0]['use_kibana_dashboard'] = 'my dashboard' with mock.patch('elastalert.elastalert.elasticsearch_client') as mock_es_init: mock_es_init.return_value = mock_es # No dashboard found mock_es.deprecated_search.return_value = {'hits': {'total': 0, 'hits': []}} with pytest.raises(EAException): ea.use_kibana_link(ea.rules[0], match) mock_call = mock_es.deprecated_search.call_args_list[0][1] assert mock_call['body'] == {'query': {'term': {'_id': 'my dashboard'}}} # Dashboard found mock_es.index.return_value = {'_id': 'ABCDEFG'} mock_es.deprecated_search.return_value = {'hits': {'hits': [{'_source': {'dashboard': json.dumps(dashboard_temp)}}]}} url = ea.use_kibana_link(ea.rules[0], match) assert 'ABCDEFG' in url db = json.loads(mock_es.index.call_args_list[0][1]['body']['dashboard']) assert 'anytest' in db['title'] # Query key filtering added ea.rules[0]['query_key'] = 'foobar' match['foobar'] = 'baz' url = ea.use_kibana_link(ea.rules[0], match) db = json.loads(mock_es.index.call_args_list[-1][1]['body']['dashboard']) assert db['services']['filter']['list']['1']['field'] == 'foobar' assert db['services']['filter']['list']['1']['query'] == '"baz"' # Compound query key ea.rules[0]['query_key'] = 'foo,bar' ea.rules[0]['compound_query_key'] = ['foo', 'bar'] match['foo'] = 'cat' match['bar'] = 'dog' match['foo,bar'] = 'cat, dog' url = ea.use_kibana_link(ea.rules[0], match) db = json.loads(mock_es.index.call_args_list[-1][1]['body']['dashboard']) found_filters = 0 for filter_id, filter_dict in list(db['services']['filter']['list'].items()): if (filter_dict['field'] == 'foo' and filter_dict['query'] == '"cat"') or \ (filter_dict['field'] == 'bar' and filter_dict['query'] == '"dog"'): found_filters += 1 continue assert found_filters == 2 def test_rule_changes(ea): ea.rule_hashes = {'rules/rule1.yaml': 'ABC', 'rules/rule2.yaml': 'DEF'} run_every = datetime.timedelta(seconds=1) ea.rules = [ea.init_rule(rule, True) for rule in [{'rule_file': 'rules/rule1.yaml', 'name': 'rule1', 'filter': [], 'run_every': run_every}, {'rule_file': 'rules/rule2.yaml', 'name': 'rule2', 'filter': [], 'run_every': run_every}]] ea.rules[1]['processed_hits'] = ['save me'] new_hashes = {'rules/rule1.yaml': 'ABC', 'rules/rule3.yaml': 'XXX', 'rules/rule2.yaml': '!@#$'} with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: mock_load.side_effect = [{'filter': [], 'name': 'rule2', 'rule_file': 'rules/rule2.yaml', 'run_every': run_every}, {'filter': [], 'name': 'rule3', 'rule_file': 'rules/rule3.yaml', 'run_every': run_every}] mock_hashes.return_value = new_hashes ea.load_rule_changes() # All 3 rules still exist assert ea.rules[0]['name'] == 'rule1' assert ea.rules[1]['name'] == 'rule2' assert ea.rules[1]['processed_hits'] == ['save me'] assert ea.rules[2]['name'] == 'rule3' # Assert 2 and 3 were reloaded assert mock_load.call_count == 2 mock_load.assert_any_call('rules/rule2.yaml', ea.conf) mock_load.assert_any_call('rules/rule3.yaml', ea.conf) # A new rule with a conflicting name wont load new_hashes = copy.copy(new_hashes) new_hashes.update({'rules/rule4.yaml': 'asdf'}) with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: with mock.patch.object(ea, 'send_notification_email') as mock_send: mock_load.return_value = {'filter': [], 'name': 'rule3', 'new': 'stuff', 'rule_file': 'rules/rule4.yaml', 'run_every': run_every} mock_hashes.return_value = new_hashes ea.load_rule_changes() mock_send.assert_called_once_with(exception=mock.ANY, rule_file='rules/rule4.yaml') assert len(ea.rules) == 3 assert not any(['new' in rule for rule in ea.rules]) # A new rule with is_enabled=False wont load new_hashes = copy.copy(new_hashes) new_hashes.update({'rules/rule4.yaml': 'asdf'}) with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: mock_load.return_value = {'filter': [], 'name': 'rule4', 'new': 'stuff', 'is_enabled': False, 'rule_file': 'rules/rule4.yaml', 'run_every': run_every} mock_hashes.return_value = new_hashes ea.load_rule_changes() assert len(ea.rules) == 3 assert not any(['new' in rule for rule in ea.rules]) # An old rule which didn't load gets reloaded new_hashes = copy.copy(new_hashes) new_hashes['rules/rule4.yaml'] = 'qwerty' with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: mock_load.return_value = {'filter': [], 'name': 'rule4', 'new': 'stuff', 'rule_file': 'rules/rule4.yaml', 'run_every': run_every} mock_hashes.return_value = new_hashes ea.load_rule_changes() assert len(ea.rules) == 4 # Disable a rule by removing the file new_hashes.pop('rules/rule4.yaml') with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: mock_load.return_value = {'filter': [], 'name': 'rule4', 'new': 'stuff', 'rule_file': 'rules/rule4.yaml', 'run_every': run_every} mock_hashes.return_value = new_hashes ea.load_rule_changes() ea.scheduler.remove_job.assert_called_with(job_id='rule4') def test_strf_index(ea): """ Test that the get_index function properly generates indexes spanning days """ ea.rules[0]['index'] = 'logstash-%Y.%m.%d' ea.rules[0]['use_strftime_index'] = True # Test formatting with times start = ts_to_dt('2015-01-02T12:34:45Z') end = ts_to_dt('2015-01-02T16:15:14Z') assert ea.get_index(ea.rules[0], start, end) == 'logstash-2015.01.02' end = ts_to_dt('2015-01-03T01:02:03Z') assert set(ea.get_index(ea.rules[0], start, end).split(',')) == set(['logstash-2015.01.02', 'logstash-2015.01.03']) # Test formatting for wildcard assert ea.get_index(ea.rules[0]) == 'logstash-*' ea.rules[0]['index'] = 'logstash-%Y.%m' assert ea.get_index(ea.rules[0]) == 'logstash-*' ea.rules[0]['index'] = 'logstash-%Y.%m-stuff' assert ea.get_index(ea.rules[0]) == 'logstash-*-stuff' def test_count_keys(ea): ea.rules[0]['timeframe'] = datetime.timedelta(minutes=60) ea.rules[0]['top_count_keys'] = ['this', 'that'] ea.rules[0]['type'].matches = {'@timestamp': END} ea.rules[0]['doc_type'] = 'blah' buckets = [{'aggregations': { 'filtered': {'counts': {'buckets': [{'key': 'a', 'doc_count': 10}, {'key': 'b', 'doc_count': 5}]}}}}, {'aggregations': {'filtered': { 'counts': {'buckets': [{'key': 'd', 'doc_count': 10}, {'key': 'c', 'doc_count': 12}]}}}}] ea.thread_data.current_es.deprecated_search.side_effect = buckets counts = ea.get_top_counts(ea.rules[0], START, END, ['this', 'that']) calls = ea.thread_data.current_es.deprecated_search.call_args_list assert calls[0][1]['search_type'] == 'count' assert calls[0][1]['body']['aggs']['filtered']['aggs']['counts']['terms'] == {'field': 'this', 'size': 5, 'min_doc_count': 1} assert counts['top_events_this'] == {'a': 10, 'b': 5} assert counts['top_events_that'] == {'d': 10, 'c': 12} def test_exponential_realert(ea): ea.rules[0]['exponential_realert'] = datetime.timedelta(days=1) # 1 day ~ 10 * 2**13 seconds ea.rules[0]['realert'] = datetime.timedelta(seconds=10) until = ts_to_dt('2015-03-24T00:00:00') ts5s = until + datetime.timedelta(seconds=5) ts15s = until + datetime.timedelta(seconds=15) ts1m = until + datetime.timedelta(minutes=1) ts5m = until + datetime.timedelta(minutes=5) ts4h = until + datetime.timedelta(hours=4) test_values = [(ts5s, until, 0), # Exp will increase to 1, 10*2**0 = 10s (ts15s, until, 0), # Exp will stay at 0, 10*2**0 = 10s (ts15s, until, 1), # Exp will increase to 2, 10*2**1 = 20s (ts1m, until, 2), # Exp will decrease to 1, 10*2**2 = 40s (ts1m, until, 3), # Exp will increase to 4, 10*2**3 = 1m20s (ts5m, until, 1), # Exp will lower back to 0, 10*2**1 = 20s (ts4h, until, 9), # Exp will lower back to 0, 10*2**9 = 1h25m (ts4h, until, 10), # Exp will lower back to 9, 10*2**10 = 2h50m (ts4h, until, 11)] # Exp will increase to 12, 10*2**11 = 5h results = (1, 0, 2, 1, 4, 0, 0, 9, 12) next_res = iter(results) for args in test_values: ea.silence_cache[ea.rules[0]['name']] = (args[1], args[2]) next_alert, exponent = ea.next_alert_time(ea.rules[0], ea.rules[0]['name'], args[0]) assert exponent == next(next_res) def test_wait_until_responsive(ea): """Unblock as soon as ElasticSearch becomes responsive.""" # Takes a while before becoming responsive. ea.writeback_es.indices.exists.side_effect = [ ConnectionError(), # ES is not yet responsive. False, # index does not yet exist. True, ] clock = mock.MagicMock() clock.side_effect = [0.0, 1.0, 2.0, 3.0, 4.0] timeout = datetime.timedelta(seconds=3.5) with mock.patch('time.sleep') as sleep: ea.wait_until_responsive(timeout=timeout, clock=clock) # Sleep as little as we can. sleep.mock_calls == [ mock.call(1.0), ] def test_wait_until_responsive_timeout_es_not_available(ea, capsys): """Bail out if ElasticSearch doesn't (quickly) become responsive.""" # Never becomes responsive :-) ea.writeback_es.ping.return_value = False ea.writeback_es.indices.exists.return_value = False clock = mock.MagicMock() clock.side_effect = [0.0, 1.0, 2.0, 3.0] timeout = datetime.timedelta(seconds=2.5) with mock.patch('time.sleep') as sleep: with pytest.raises(SystemExit) as exc: ea.wait_until_responsive(timeout=timeout, clock=clock) assert exc.value.code == 1 # Ensure we get useful diagnostics. output, errors = capsys.readouterr() assert 'Could not reach ElasticSearch at "es:14900".' in errors # Slept until we passed the deadline. sleep.mock_calls == [ mock.call(1.0), mock.call(1.0), mock.call(1.0), ] def test_wait_until_responsive_timeout_index_does_not_exist(ea, capsys): """Bail out if ElasticSearch doesn't (quickly) become responsive.""" # Never becomes responsive :-) ea.writeback_es.ping.return_value = True ea.writeback_es.indices.exists.return_value = False clock = mock.MagicMock() clock.side_effect = [0.0, 1.0, 2.0, 3.0] timeout = datetime.timedelta(seconds=2.5) with mock.patch('time.sleep') as sleep: with pytest.raises(SystemExit) as exc: ea.wait_until_responsive(timeout=timeout, clock=clock) assert exc.value.code == 1 # Ensure we get useful diagnostics. output, errors = capsys.readouterr() assert 'Writeback alias "wb_a" does not exist, did you run `elastalert-create-index`?' in errors # Slept until we passed the deadline. sleep.mock_calls == [ mock.call(1.0), mock.call(1.0), mock.call(1.0), ] def test_stop(ea): """ The purpose of this test is to make sure that calling ElastAlerter.stop() will break it out of a ElastAlerter.start() loop. This method exists to provide a mechanism for running ElastAlert with threads and thus must be tested with threads. mock_loop verifies the loop is running and will call stop after several iterations. """ # Exit the thread on the fourth iteration def mock_loop(): for i in range(3): assert ea.running yield ea.stop() with mock.patch.object(ea, 'sleep_for', return_value=None): with mock.patch.object(ea, 'sleep_for') as mock_run: mock_run.side_effect = mock_loop() start_thread = threading.Thread(target=ea.start) # Set as daemon to prevent a failed test from blocking exit start_thread.daemon = True start_thread.start() # Give it a few seconds to run the loop start_thread.join(5) assert not ea.running assert not start_thread.is_alive() assert mock_run.call_count == 4 def test_notify_email(ea): mock_smtp = mock.Mock() ea.rules[0]['notify_email'] = ['foo@foo.foo', 'bar@bar.bar'] with mock.patch('elastalert.elastalert.SMTP') as mock_smtp_f: mock_smtp_f.return_value = mock_smtp # Notify_email from rules, array ea.send_notification_email('omg', rule=ea.rules[0]) assert set(mock_smtp.sendmail.call_args_list[0][0][1]) == set(ea.rules[0]['notify_email']) # With ea.notify_email ea.notify_email = ['baz@baz.baz'] ea.send_notification_email('omg', rule=ea.rules[0]) assert set(mock_smtp.sendmail.call_args_list[1][0][1]) == set(['baz@baz.baz'] + ea.rules[0]['notify_email']) # With ea.notify email but as single string ea.rules[0]['notify_email'] = 'foo@foo.foo' ea.send_notification_email('omg', rule=ea.rules[0]) assert set(mock_smtp.sendmail.call_args_list[2][0][1]) == set(['baz@baz.baz', 'foo@foo.foo']) # None from rule ea.rules[0].pop('notify_email') ea.send_notification_email('omg', rule=ea.rules[0]) assert set(mock_smtp.sendmail.call_args_list[3][0][1]) == set(['baz@baz.baz']) def test_uncaught_exceptions(ea): e = Exception("Errors yo!") # With disabling set to false ea.disable_rules_on_error = False ea.handle_uncaught_exception(e, ea.rules[0]) assert len(ea.rules) == 1 assert len(ea.disabled_rules) == 0 # With disabling set to true ea.disable_rules_on_error = True ea.handle_uncaught_exception(e, ea.rules[0]) assert len(ea.rules) == 0 assert len(ea.disabled_rules) == 1 # Changing the file should re-enable it ea.rule_hashes = {'blah.yaml': 'abc'} new_hashes = {'blah.yaml': 'def'} with mock.patch.object(ea.conf['rules_loader'], 'get_hashes') as mock_hashes: with mock.patch.object(ea.conf['rules_loader'], 'load_configuration') as mock_load: mock_load.side_effect = [ea.disabled_rules[0]] mock_hashes.return_value = new_hashes ea.load_rule_changes() assert len(ea.rules) == 1 assert len(ea.disabled_rules) == 0 # Notify email is sent ea.notify_email = 'qlo@example.com' with mock.patch.object(ea, 'send_notification_email') as mock_email: ea.handle_uncaught_exception(e, ea.rules[0]) assert mock_email.call_args_list[0][1] == {'exception': e, 'rule': ea.disabled_rules[0]} def test_get_top_counts_handles_no_hits_returned(ea): with mock.patch.object(ea, 'get_hits_terms') as mock_hits: mock_hits.return_value = None rule = ea.rules[0] starttime = datetime.datetime.now() - datetime.timedelta(minutes=10) endtime = datetime.datetime.now() keys = ['foo'] all_counts = ea.get_top_counts(rule, starttime, endtime, keys) assert all_counts == {'top_events_foo': {}} def test_remove_old_events(ea): now = ts_now() minute = datetime.timedelta(minutes=1) ea.rules[0]['processed_hits'] = {'foo': now - minute, 'bar': now - minute * 5, 'baz': now - minute * 15} ea.rules[0]['buffer_time'] = datetime.timedelta(minutes=10) # With a query delay, only events older than 20 minutes will be removed (none) ea.rules[0]['query_delay'] = datetime.timedelta(minutes=10) ea.remove_old_events(ea.rules[0]) assert len(ea.rules[0]['processed_hits']) == 3 # With no query delay, the 15 minute old event will be removed ea.rules[0].pop('query_delay') ea.remove_old_events(ea.rules[0]) assert len(ea.rules[0]['processed_hits']) == 2 assert 'baz' not in ea.rules[0]['processed_hits'] def test_query_with_whitelist_filter_es(ea): ea.rules[0]['_source_enabled'] = False ea.rules[0]['five'] = False ea.rules[0]['filter'] = [{'query_string': {'query': 'baz'}}] ea.rules[0]['compare_key'] = "username" ea.rules[0]['whitelist'] = ['xudan1', 'xudan12', 'aa1', 'bb1'] new_rule = copy.copy(ea.rules[0]) ea.init_rule(new_rule, True) assert 'NOT username:"xudan1" AND NOT username:"xudan12" AND NOT username:"aa1"' \ in new_rule['filter'][-1]['query']['query_string']['query'] def test_query_with_whitelist_filter_es_five(ea_sixsix): ea_sixsix.rules[0]['_source_enabled'] = False ea_sixsix.rules[0]['filter'] = [{'query_string': {'query': 'baz'}}] ea_sixsix.rules[0]['compare_key'] = "username" ea_sixsix.rules[0]['whitelist'] = ['xudan1', 'xudan12', 'aa1', 'bb1'] new_rule = copy.copy(ea_sixsix.rules[0]) ea_sixsix.init_rule(new_rule, True) assert 'NOT username:"xudan1" AND NOT username:"xudan12" AND NOT username:"aa1"' in \ new_rule['filter'][-1]['query_string']['query'] def test_query_with_blacklist_filter_es(ea): ea.rules[0]['_source_enabled'] = False ea.rules[0]['filter'] = [{'query_string': {'query': 'baz'}}] ea.rules[0]['compare_key'] = "username" ea.rules[0]['blacklist'] = ['xudan1', 'xudan12', 'aa1', 'bb1'] new_rule = copy.copy(ea.rules[0]) ea.init_rule(new_rule, True) assert 'username:"xudan1" OR username:"xudan12" OR username:"aa1"' in \ new_rule['filter'][-1]['query']['query_string']['query'] def test_query_with_blacklist_filter_es_five(ea_sixsix): ea_sixsix.rules[0]['_source_enabled'] = False ea_sixsix.rules[0]['filter'] = [{'query_string': {'query': 'baz'}}] ea_sixsix.rules[0]['compare_key'] = "username" ea_sixsix.rules[0]['blacklist'] = ['xudan1', 'xudan12', 'aa1', 'bb1'] ea_sixsix.rules[0]['blacklist'] = ['xudan1', 'xudan12', 'aa1', 'bb1'] new_rule = copy.copy(ea_sixsix.rules[0]) ea_sixsix.init_rule(new_rule, True) assert 'username:"xudan1" OR username:"xudan12" OR username:"aa1"' in new_rule['filter'][-1]['query_string'][ 'query'] elastalert-0.2.4/tests/conftest.py000066400000000000000000000231401364615736500172360ustar00rootroot00000000000000# -*- coding: utf-8 -*- import datetime import logging import os import mock import pytest import elastalert.elastalert import elastalert.util from elastalert.util import dt_to_ts from elastalert.util import ts_to_dt writeback_index = 'wb' def pytest_addoption(parser): parser.addoption( "--runelasticsearch", action="store_true", default=False, help="run elasticsearch tests" ) def pytest_collection_modifyitems(config, items): if config.getoption("--runelasticsearch"): # --runelasticsearch given in cli: run elasticsearch tests, skip ordinary unit tests skip_unit_tests = pytest.mark.skip(reason="not running when --runelasticsearch option is used to run") for item in items: if "elasticsearch" not in item.keywords: item.add_marker(skip_unit_tests) else: # skip elasticsearch tests skip_elasticsearch = pytest.mark.skip(reason="need --runelasticsearch option to run") for item in items: if "elasticsearch" in item.keywords: item.add_marker(skip_elasticsearch) @pytest.fixture(scope='function', autouse=True) def reset_loggers(): """Prevent logging handlers from capturing temporary file handles. For example, a test that uses the `capsys` fixture and calls `logging.exception()` will initialize logging with a default handler that captures `sys.stderr`. When the test ends, the file handles will be closed and `sys.stderr` will be returned to its original handle, but the logging will have a dangling reference to the temporary handle used in the `capsys` fixture. """ logger = logging.getLogger() for handler in logger.handlers: logger.removeHandler(handler) class mock_es_indices_client(object): def __init__(self): self.exists = mock.Mock(return_value=True) class mock_es_client(object): def __init__(self, host='es', port=14900): self.host = host self.port = port self.return_hits = [] self.search = mock.Mock() self.deprecated_search = mock.Mock() self.create = mock.Mock() self.index = mock.Mock() self.delete = mock.Mock() self.info = mock.Mock(return_value={'status': 200, 'name': 'foo', 'version': {'number': '2.0'}}) self.ping = mock.Mock(return_value=True) self.indices = mock_es_indices_client() self.es_version = mock.Mock(return_value='2.0') self.is_atleastfive = mock.Mock(return_value=False) self.is_atleastsix = mock.Mock(return_value=False) self.is_atleastsixtwo = mock.Mock(return_value=False) self.is_atleastsixsix = mock.Mock(return_value=False) self.is_atleastseven = mock.Mock(return_value=False) self.resolve_writeback_index = mock.Mock(return_value=writeback_index) class mock_es_sixsix_client(object): def __init__(self, host='es', port=14900): self.host = host self.port = port self.return_hits = [] self.search = mock.Mock() self.deprecated_search = mock.Mock() self.create = mock.Mock() self.index = mock.Mock() self.delete = mock.Mock() self.info = mock.Mock(return_value={'status': 200, 'name': 'foo', 'version': {'number': '6.6.0'}}) self.ping = mock.Mock(return_value=True) self.indices = mock_es_indices_client() self.es_version = mock.Mock(return_value='6.6.0') self.is_atleastfive = mock.Mock(return_value=True) self.is_atleastsix = mock.Mock(return_value=True) self.is_atleastsixtwo = mock.Mock(return_value=False) self.is_atleastsixsix = mock.Mock(return_value=True) self.is_atleastseven = mock.Mock(return_value=False) def writeback_index_side_effect(index, doc_type): if doc_type == 'silence': return index + '_silence' elif doc_type == 'past_elastalert': return index + '_past' elif doc_type == 'elastalert_status': return index + '_status' elif doc_type == 'elastalert_error': return index + '_error' return index self.resolve_writeback_index = mock.Mock(side_effect=writeback_index_side_effect) class mock_rule_loader(object): def __init__(self, conf): self.base_config = conf self.load = mock.Mock() self.get_hashes = mock.Mock() self.load_configuration = mock.Mock() class mock_ruletype(object): def __init__(self): self.add_data = mock.Mock() self.add_count_data = mock.Mock() self.add_terms_data = mock.Mock() self.matches = [] self.get_match_data = lambda x: x self.get_match_str = lambda x: "some stuff happened" self.garbage_collect = mock.Mock() class mock_alert(object): def __init__(self): self.alert = mock.Mock() def get_info(self): return {'type': 'mock'} @pytest.fixture def ea(): rules = [{'es_host': '', 'es_port': 14900, 'name': 'anytest', 'index': 'idx', 'filter': [], 'include': ['@timestamp'], 'aggregation': datetime.timedelta(0), 'realert': datetime.timedelta(0), 'processed_hits': {}, 'timestamp_field': '@timestamp', 'match_enhancements': [], 'rule_file': 'blah.yaml', 'max_query_size': 10000, 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts, '_source_enabled': True, 'run_every': datetime.timedelta(seconds=15)}] conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=10), 'buffer_time': datetime.timedelta(minutes=5), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': 'wb', 'writeback_alias': 'wb_a', 'rules': rules, 'max_query_size': 10000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False, 'scroll_keepalive': '30s'} elastalert.util.elasticsearch_client = mock_es_client conf['rules_loader'] = mock_rule_loader(conf) elastalert.elastalert.elasticsearch_client = mock_es_client with mock.patch('elastalert.elastalert.load_conf') as load_conf: with mock.patch('elastalert.elastalert.BackgroundScheduler'): load_conf.return_value = conf conf['rules_loader'].load.return_value = rules conf['rules_loader'].get_hashes.return_value = {} ea = elastalert.elastalert.ElastAlerter(['--pin_rules']) ea.rules[0]['type'] = mock_ruletype() ea.rules[0]['alert'] = [mock_alert()] ea.writeback_es = mock_es_client() ea.writeback_es.search.return_value = {'hits': {'hits': []}, 'total': 0} ea.writeback_es.deprecated_search.return_value = {'hits': {'hits': []}} ea.writeback_es.index.return_value = {'_id': 'ABCD', 'created': True} ea.current_es = mock_es_client('', '') ea.thread_data.current_es = ea.current_es ea.thread_data.num_hits = 0 ea.thread_data.num_dupes = 0 return ea @pytest.fixture def ea_sixsix(): rules = [{'es_host': '', 'es_port': 14900, 'name': 'anytest', 'index': 'idx', 'filter': [], 'include': ['@timestamp'], 'run_every': datetime.timedelta(seconds=1), 'aggregation': datetime.timedelta(0), 'realert': datetime.timedelta(0), 'processed_hits': {}, 'timestamp_field': '@timestamp', 'match_enhancements': [], 'rule_file': 'blah.yaml', 'max_query_size': 10000, 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts, '_source_enabled': True}] conf = {'rules_folder': 'rules', 'run_every': datetime.timedelta(minutes=10), 'buffer_time': datetime.timedelta(minutes=5), 'alert_time_limit': datetime.timedelta(hours=24), 'es_host': 'es', 'es_port': 14900, 'writeback_index': writeback_index, 'writeback_alias': 'wb_a', 'rules': rules, 'max_query_size': 10000, 'old_query_limit': datetime.timedelta(weeks=1), 'disable_rules_on_error': False, 'scroll_keepalive': '30s'} conf['rules_loader'] = mock_rule_loader(conf) elastalert.elastalert.elasticsearch_client = mock_es_sixsix_client elastalert.util.elasticsearch_client = mock_es_sixsix_client with mock.patch('elastalert.elastalert.load_conf') as load_conf: with mock.patch('elastalert.elastalert.BackgroundScheduler'): load_conf.return_value = conf conf['rules_loader'].load.return_value = rules conf['rules_loader'].get_hashes.return_value = {} ea_sixsix = elastalert.elastalert.ElastAlerter(['--pin_rules']) ea_sixsix.rules[0]['type'] = mock_ruletype() ea_sixsix.rules[0]['alert'] = [mock_alert()] ea_sixsix.writeback_es = mock_es_sixsix_client() ea_sixsix.writeback_es.search.return_value = {'hits': {'hits': []}} ea_sixsix.writeback_es.deprecated_search.return_value = {'hits': {'hits': []}} ea_sixsix.writeback_es.index.return_value = {'_id': 'ABCD'} ea_sixsix.current_es = mock_es_sixsix_client('', -1) return ea_sixsix @pytest.fixture(scope='function') def environ(): """py.test fixture to get a fresh mutable environment.""" old_env = os.environ new_env = dict(list(old_env.items())) os.environ = new_env yield os.environ os.environ = old_env elastalert-0.2.4/tests/create_index_test.py000066400000000000000000000030141364615736500211000ustar00rootroot00000000000000# -*- coding: utf-8 -*- import json import pytest import elastalert.create_index es_mappings = [ 'elastalert', 'elastalert_error', 'elastalert_status', 'past_elastalert', 'silence' ] @pytest.mark.parametrize('es_mapping', es_mappings) def test_read_default_index_mapping(es_mapping): mapping = elastalert.create_index.read_es_index_mapping(es_mapping) assert es_mapping not in mapping print((json.dumps(mapping, indent=2))) @pytest.mark.parametrize('es_mapping', es_mappings) def test_read_es_5_index_mapping(es_mapping): mapping = elastalert.create_index.read_es_index_mapping(es_mapping, 5) assert es_mapping in mapping print((json.dumps(mapping, indent=2))) @pytest.mark.parametrize('es_mapping', es_mappings) def test_read_es_6_index_mapping(es_mapping): mapping = elastalert.create_index.read_es_index_mapping(es_mapping, 6) assert es_mapping not in mapping print((json.dumps(mapping, indent=2))) def test_read_default_index_mappings(): mappings = elastalert.create_index.read_es_index_mappings() assert len(mappings) == len(es_mappings) print((json.dumps(mappings, indent=2))) def test_read_es_5_index_mappings(): mappings = elastalert.create_index.read_es_index_mappings(5) assert len(mappings) == len(es_mappings) print((json.dumps(mappings, indent=2))) def test_read_es_6_index_mappings(): mappings = elastalert.create_index.read_es_index_mappings(6) assert len(mappings) == len(es_mappings) print((json.dumps(mappings, indent=2))) elastalert-0.2.4/tests/elasticsearch_test.py000066400000000000000000000101431364615736500212610ustar00rootroot00000000000000# -*- coding: utf-8 -*- import datetime import json import time import dateutil import pytest import elastalert.create_index import elastalert.elastalert from elastalert import ElasticSearchClient from elastalert.util import build_es_conn_config from tests.conftest import ea # noqa: F401 test_index = 'test_index' es_host = '127.0.0.1' es_port = 9200 es_timeout = 10 @pytest.fixture def es_client(): es_conn_config = build_es_conn_config({'es_host': es_host, 'es_port': es_port, 'es_conn_timeout': es_timeout}) return ElasticSearchClient(es_conn_config) @pytest.mark.elasticsearch class TestElasticsearch(object): # TODO perform teardown removing data inserted into Elasticsearch # Warning!!!: Test class is not erasing its testdata on the Elasticsearch server. # This is not a problem as long as the data is manually removed or the test environment # is torn down after the test run(eg. running tests in a test environment such as Travis) def test_create_indices(self, es_client): elastalert.create_index.create_index_mappings(es_client=es_client, ea_index=test_index) indices_mappings = es_client.indices.get_mapping(test_index + '*') print(('-' * 50)) print((json.dumps(indices_mappings, indent=2))) print(('-' * 50)) if es_client.is_atleastsix(): assert test_index in indices_mappings assert test_index + '_error' in indices_mappings assert test_index + '_status' in indices_mappings assert test_index + '_silence' in indices_mappings assert test_index + '_past' in indices_mappings else: assert 'elastalert' in indices_mappings[test_index]['mappings'] assert 'elastalert_error' in indices_mappings[test_index]['mappings'] assert 'elastalert_status' in indices_mappings[test_index]['mappings'] assert 'silence' in indices_mappings[test_index]['mappings'] assert 'past_elastalert' in indices_mappings[test_index]['mappings'] @pytest.mark.usefixtures("ea") def test_aggregated_alert(self, ea, es_client): # noqa: F811 match_timestamp = datetime.datetime.now(tz=dateutil.tz.tzutc()).replace(microsecond=0) + datetime.timedelta( days=1) ea.rules[0]['aggregate_by_match_time'] = True match = {'@timestamp': match_timestamp, 'num_hits': 0, 'num_matches': 3 } ea.writeback_es = es_client res = ea.add_aggregated_alert(match, ea.rules[0]) if ea.writeback_es.is_atleastsix(): assert res['result'] == 'created' else: assert res['created'] is True # Make sure added data is available for querying time.sleep(2) # Now lets find the pending aggregated alert assert ea.find_pending_aggregate_alert(ea.rules[0]) @pytest.mark.usefixtures("ea") def test_silenced(self, ea, es_client): # noqa: F811 until_timestamp = datetime.datetime.now(tz=dateutil.tz.tzutc()).replace(microsecond=0) + datetime.timedelta( days=1) ea.writeback_es = es_client res = ea.set_realert(ea.rules[0]['name'], until_timestamp, 0) if ea.writeback_es.is_atleastsix(): assert res['result'] == 'created' else: assert res['created'] is True # Make sure added data is available for querying time.sleep(2) # Force lookup in elasticsearch ea.silence_cache = {} # Now lets check if our rule is reported as silenced assert ea.is_silenced(ea.rules[0]['name']) @pytest.mark.usefixtures("ea") def test_get_hits(self, ea, es_client): # noqa: F811 start = datetime.datetime.now(tz=dateutil.tz.tzutc()).replace(microsecond=0) end = start + datetime.timedelta(days=1) ea.current_es = es_client if ea.current_es.is_atleastfive(): ea.rules[0]['five'] = True else: ea.rules[0]['five'] = False ea.thread_data.current_es = ea.current_es hits = ea.get_hits(ea.rules[0], start, end, test_index) assert isinstance(hits, list) elastalert-0.2.4/tests/kibana_discover_test.py000066400000000000000000000703761364615736500216100ustar00rootroot00000000000000# -*- coding: utf-8 -*- from datetime import timedelta import pytest from elastalert.kibana_discover import generate_kibana_discover_url @pytest.mark.parametrize("kibana_version", ['5.6', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5', '6.6', '6.7', '6.8']) def test_generate_kibana_discover_url_with_kibana_5x_and_6x(kibana_version): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': kibana_version, 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl @pytest.mark.parametrize("kibana_version", ['7.0', '7.1', '7.2', '7.3']) def test_generate_kibana_discover_url_with_kibana_7x(kibana_version): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': kibana_version, 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_missing_kibana_discover_version(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_index_pattern_id': 'logs', 'timestamp_field': 'timestamp', 'name': 'test' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) assert url is None def test_generate_kibana_discover_url_with_missing_kibana_discover_app_url(): url = generate_kibana_discover_url( rule={ 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs', 'timestamp_field': 'timestamp', 'name': 'test' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) assert url is None def test_generate_kibana_discover_url_with_missing_kibana_discover_index_pattern_id(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'timestamp_field': 'timestamp', 'name': 'test' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) assert url is None def test_generate_kibana_discover_url_with_invalid_kibana_version(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '4.5', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) assert url is None def test_generate_kibana_discover_url_with_kibana_discover_app_url_env_substitution(environ): environ.update({ 'KIBANA_HOST': 'kibana', 'KIBANA_PORT': '5601', }) url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://$KIBANA_HOST:$KIBANA_PORT/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_from_timedelta(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '7.3', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_from_timedelta': timedelta(hours=1), 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T04:00:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T03%3A00%3A00Z%27%2C' + 'to%3A%272019-09-01T04%3A10%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_from_timedelta_and_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '7.3', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_from_timedelta': timedelta(hours=1), 'timeframe': timedelta(minutes=20), 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T04:00:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T03%3A00%3A00Z%27%2C' + 'to%3A%272019-09-01T04%3A20%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_to_timedelta(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '7.3', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_to_timedelta': timedelta(hours=1), 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T04:00:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T03%3A50%3A00Z%27%2C' + 'to%3A%272019-09-01T05%3A00%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_to_timedelta_and_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '7.3', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_to_timedelta': timedelta(hours=1), 'timeframe': timedelta(minutes=20), 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T04:00:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T03%3A40%3A00Z%27%2C' + 'to%3A%272019-09-01T05%3A00%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '7.3', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'timeframe': timedelta(minutes=20), 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T04:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'filters%3A%21%28%29%2C' + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T04%3A10%3A00Z%27%2C' + 'to%3A%272019-09-01T04%3A50%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3Ad6cabfb6-aaef-44ea-89c5-600e9a76991a%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_custom_columns(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'kibana_discover_columns': ['level', 'message'], 'timestamp_field': 'timestamp' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28level%2Cmessage%29%2C' + 'filters%3A%21%28%29%2C' + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_single_filter(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'filter': [ {'term': {'level': 30}} ] }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'bool%3A%28must%3A%21%28%28term%3A%28level%3A30%29%29%29%29%2C' + 'meta%3A%28' # meta start + 'alias%3Afilter%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Abool%2C' + 'negate%3A%21f%2C' + 'type%3Acustom%2C' + 'value%3A%27%7B%22must%22%3A%5B%7B%22term%22%3A%7B%22level%22%3A30%7D%7D%5D%7D%27' + '%29' # meta end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_multiple_filters(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': '90943e30-9a47-11e8-b64d-95841ca0b247', 'timestamp_field': 'timestamp', 'filter': [ {'term': {'app': 'test'}}, {'term': {'level': 30}} ] }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'bool%3A%28must%3A%21%28%28term%3A%28app%3Atest%29%29%2C%28term%3A%28level%3A30%29%29%29%29%2C' + 'meta%3A%28' # meta start + 'alias%3Afilter%2C' + 'disabled%3A%21f%2C' + 'index%3A%2790943e30-9a47-11e8-b64d-95841ca0b247%27%2C' + 'key%3Abool%2C' + 'negate%3A%21f%2C' + 'type%3Acustom%2C' + 'value%3A%27%7B%22must%22%3A%5B' # value start + '%7B%22term%22%3A%7B%22app%22%3A%22test%22%7D%7D%2C%7B%22term%22%3A%7B%22level%22%3A30%7D%7D' + '%5D%7D%27' # value end + '%29' # meta end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%2790943e30-9a47-11e8-b64d-95841ca0b247%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_int_query_key(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'query_key': 'geo.dest' }, match={ 'timestamp': '2019-09-01T00:30:00Z', 'geo.dest': 200 } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Ageo.dest%2C' + 'negate%3A%21f%2C' + 'params%3A%28query%3A200%2C' # params start + 'type%3Aphrase' + '%29%2C' # params end + 'type%3Aphrase%2C' + 'value%3A%27200%27' + '%29%2C' # meta end + 'query%3A%28' # query start + 'match%3A%28' # match start + 'geo.dest%3A%28' # reponse start + 'query%3A200%2C' + 'type%3Aphrase' + '%29' # geo.dest end + '%29' # match end + '%29' # query end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_str_query_key(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'query_key': 'geo.dest' }, match={ 'timestamp': '2019-09-01T00:30:00Z', 'geo': { 'dest': 'ok' } } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Ageo.dest%2C' + 'negate%3A%21f%2C' + 'params%3A%28query%3Aok%2C' # params start + 'type%3Aphrase' + '%29%2C' # params end + 'type%3Aphrase%2C' + 'value%3Aok' + '%29%2C' # meta end + 'query%3A%28' # query start + 'match%3A%28' # match start + 'geo.dest%3A%28' # geo.dest start + 'query%3Aok%2C' + 'type%3Aphrase' + '%29' # geo.dest end + '%29' # match end + '%29' # query end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_null_query_key_value(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'query_key': 'status' }, match={ 'timestamp': '2019-09-01T00:30:00Z', 'status': None } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'exists%3A%28field%3Astatus%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Astatus%2C' + 'negate%3A%21t%2C' + 'type%3Aexists%2C' + 'value%3Aexists' + '%29' # meta end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_missing_query_key_value(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'query_key': 'status' }, match={ 'timestamp': '2019-09-01T00:30:00Z' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'exists%3A%28field%3Astatus%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Astatus%2C' + 'negate%3A%21t%2C' + 'type%3Aexists%2C' + 'value%3Aexists' + '%29' # meta end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_compound_query_key(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'compound_query_key': ['geo.src', 'geo.dest'], 'query_key': 'geo.src,geo.dest' }, match={ 'timestamp': '2019-09-01T00:30:00Z', 'geo': { 'src': 'CA', 'dest': 'US' } } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # geo.src filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Ageo.src%2C' + 'negate%3A%21f%2C' + 'params%3A%28query%3ACA%2C' # params start + 'type%3Aphrase' + '%29%2C' # params end + 'type%3Aphrase%2C' + 'value%3ACA' + '%29%2C' # meta end + 'query%3A%28' # query start + 'match%3A%28' # match start + 'geo.src%3A%28' # reponse start + 'query%3ACA%2C' + 'type%3Aphrase' + '%29' # geo.src end + '%29' # match end + '%29' # query end + '%29%2C' # geo.src filter end + '%28' # geo.dest filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Ageo.dest%2C' + 'negate%3A%21f%2C' + 'params%3A%28query%3AUS%2C' # params start + 'type%3Aphrase' + '%29%2C' # params end + 'type%3Aphrase%2C' + 'value%3AUS' + '%29%2C' # meta end + 'query%3A%28' # query start + 'match%3A%28' # match start + 'geo.dest%3A%28' # geo.dest start + 'query%3AUS%2C' + 'type%3Aphrase' + '%29' # geo.dest end + '%29' # match end + '%29' # query end + '%29' # geo.dest filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl def test_generate_kibana_discover_url_with_filter_and_query_key(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', 'kibana_discover_version': '6.8', 'kibana_discover_index_pattern_id': 'logs-*', 'timestamp_field': 'timestamp', 'filter': [ {'term': {'level': 30}} ], 'query_key': 'status' }, match={ 'timestamp': '2019-09-01T00:30:00Z', 'status': 'ok' } ) expectedUrl = ( 'http://kibana:5601/#/discover' + '?_g=%28' # global start + 'refreshInterval%3A%28pause%3A%21t%2Cvalue%3A0%29%2C' + 'time%3A%28' # time start + 'from%3A%272019-09-01T00%3A20%3A00Z%27%2C' + 'mode%3Aabsolute%2C' + 'to%3A%272019-09-01T00%3A40%3A00Z%27' + '%29' # time end + '%29' # global end + '&_a=%28' # app start + 'columns%3A%21%28_source%29%2C' + 'filters%3A%21%28' # filters start + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'bool%3A%28must%3A%21%28%28term%3A%28level%3A30%29%29%29%29%2C' + 'meta%3A%28' # meta start + 'alias%3Afilter%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Abool%2C' + 'negate%3A%21f%2C' + 'type%3Acustom%2C' + 'value%3A%27%7B%22must%22%3A%5B%7B%22term%22%3A%7B%22level%22%3A30%7D%7D%5D%7D%27' + '%29' # meta end + '%29%2C' # filter end + '%28' # filter start + '%27%24state%27%3A%28store%3AappState%29%2C' + 'meta%3A%28' # meta start + 'alias%3A%21n%2C' + 'disabled%3A%21f%2C' + 'index%3A%27logs-%2A%27%2C' + 'key%3Astatus%2C' + 'negate%3A%21f%2C' + 'params%3A%28query%3Aok%2C' # params start + 'type%3Aphrase' + '%29%2C' # params end + 'type%3Aphrase%2C' + 'value%3Aok' + '%29%2C' # meta end + 'query%3A%28' # query start + 'match%3A%28' # match start + 'status%3A%28' # status start + 'query%3Aok%2C' + 'type%3Aphrase' + '%29' # status end + '%29' # match end + '%29' # query end + '%29' # filter end + '%29%2C' # filters end + 'index%3A%27logs-%2A%27%2C' + 'interval%3Aauto' + '%29' # app end ) assert url == expectedUrl elastalert-0.2.4/tests/kibana_test.py000066400000000000000000000052171364615736500177020ustar00rootroot00000000000000import copy import json from elastalert.kibana import add_filter from elastalert.kibana import dashboard_temp from elastalert.kibana import filters_from_dashboard from elastalert.kibana import kibana4_dashboard_link # Dashboard schema with only filters section test_dashboard = '''{ "title": "AD Lock Outs", "services": { "filter": { "list": { "0": { "type": "time", "field": "@timestamp", "from": "now-7d", "to": "now", "mandate": "must", "active": true, "alias": "", "id": 0 }, "1": { "type": "field", "field": "_log_type", "query": "\\"active_directory\\"", "mandate": "must", "active": true, "alias": "", "id": 1 }, "2": { "type": "querystring", "query": "ad.security_auditing_code:4740", "mandate": "must", "active": true, "alias": "", "id": 2 } }, "ids": [ 0, 1, 2 ] } } }''' test_dashboard = json.loads(test_dashboard) def test_filters_from_dashboard(): filters = filters_from_dashboard(test_dashboard) assert {'term': {'_log_type': '"active_directory"'}} in filters assert {'query': {'query_string': {'query': 'ad.security_auditing_code:4740'}}} in filters def test_add_filter(): basic_filter = {"term": {"this": "that"}} db = copy.deepcopy(dashboard_temp) add_filter(db, basic_filter) assert db['services']['filter']['list']['1'] == { 'field': 'this', 'alias': '', 'mandate': 'must', 'active': True, 'query': '"that"', 'type': 'field', 'id': 1 } list_filter = {"term": {"this": ["that", "those"]}} db = copy.deepcopy(dashboard_temp) add_filter(db, list_filter) assert db['services']['filter']['list']['1'] == { 'field': 'this', 'alias': '', 'mandate': 'must', 'active': True, 'query': '("that" AND "those")', 'type': 'field', 'id': 1 } def test_url_encoded(): url = kibana4_dashboard_link('example.com/#/Dashboard', '2015-01-01T00:00:00Z', '2017-01-01T00:00:00Z') assert not any([special_char in url for special_char in ["',\":;?&=()"]]) def test_url_env_substitution(environ): environ.update({ 'KIBANA_HOST': 'kibana', 'KIBANA_PORT': '5601', }) url = kibana4_dashboard_link( 'http://$KIBANA_HOST:$KIBANA_PORT/#/Dashboard', '2015-01-01T00:00:00Z', '2017-01-01T00:00:00Z', ) assert url.startswith('http://kibana:5601/#/Dashboard') elastalert-0.2.4/tests/loaders_test.py000066400000000000000000000442411364615736500201060ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import os import mock import pytest import elastalert.alerts import elastalert.ruletypes from elastalert.config import load_conf from elastalert.loaders import FileRulesLoader from elastalert.util import EAException test_config = {'rules_folder': 'test_folder', 'run_every': {'minutes': 10}, 'buffer_time': {'minutes': 10}, 'es_host': 'elasticsearch.test', 'es_port': 12345, 'writeback_index': 'test_index', 'writeback_alias': 'test_alias'} test_rule = {'es_host': 'test_host', 'es_port': 12345, 'name': 'testrule', 'type': 'spike', 'spike_height': 2, 'spike_type': 'up', 'timeframe': {'minutes': 10}, 'index': 'test_index', 'query_key': 'testkey', 'compare_key': 'comparekey', 'filter': [{'term': {'key': 'value'}}], 'alert': 'email', 'use_count_query': True, 'doc_type': 'blsh', 'email': 'test@test.test', 'aggregation': {'hours': 2}, 'include': ['comparekey', '@timestamp']} test_args = mock.Mock() test_args.config = 'test_config' test_args.rule = None test_args.debug = False test_args.es_debug_trace = None def test_import_rules(): rules_loader = FileRulesLoader(test_config) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['type'] = 'testing.test.RuleType' with mock.patch.object(rules_loader, 'load_yaml') as mock_open: mock_open.return_value = test_rule_copy # Test that type is imported with mock.patch('builtins.__import__') as mock_import: mock_import.return_value = elastalert.ruletypes rules_loader.load_configuration('test_config', test_config) assert mock_import.call_args_list[0][0][0] == 'testing.test' assert mock_import.call_args_list[0][0][3] == ['RuleType'] # Test that alerts are imported test_rule_copy = copy.deepcopy(test_rule) mock_open.return_value = test_rule_copy test_rule_copy['alert'] = 'testing2.test2.Alerter' with mock.patch('builtins.__import__') as mock_import: mock_import.return_value = elastalert.alerts rules_loader.load_configuration('test_config', test_config) assert mock_import.call_args_list[0][0][0] == 'testing2.test2' assert mock_import.call_args_list[0][0][3] == ['Alerter'] def test_import_import(): rules_loader = FileRulesLoader(test_config) import_rule = copy.deepcopy(test_rule) del(import_rule['es_host']) del(import_rule['es_port']) import_rule['import'] = 'importme.ymlt' import_me = { 'es_host': 'imported_host', 'es_port': 12349, 'email': 'ignored@email', # overwritten by the email in import_rule } with mock.patch.object(rules_loader, 'get_yaml') as mock_open: mock_open.side_effect = [import_rule, import_me] rules = rules_loader.load_configuration('blah.yaml', test_config) assert mock_open.call_args_list[0][0] == ('blah.yaml',) assert mock_open.call_args_list[1][0] == ('importme.ymlt',) assert len(mock_open.call_args_list) == 2 assert rules['es_port'] == 12349 assert rules['es_host'] == 'imported_host' assert rules['email'] == ['test@test.test'] assert rules['filter'] == import_rule['filter'] # check global import_rule dependency assert rules_loader.import_rules == {'blah.yaml': ['importme.ymlt']} def test_import_absolute_import(): rules_loader = FileRulesLoader(test_config) import_rule = copy.deepcopy(test_rule) del(import_rule['es_host']) del(import_rule['es_port']) import_rule['import'] = '/importme.ymlt' import_me = { 'es_host': 'imported_host', 'es_port': 12349, 'email': 'ignored@email', # overwritten by the email in import_rule } with mock.patch.object(rules_loader, 'get_yaml') as mock_open: mock_open.side_effect = [import_rule, import_me] rules = rules_loader.load_configuration('blah.yaml', test_config) assert mock_open.call_args_list[0][0] == ('blah.yaml',) assert mock_open.call_args_list[1][0] == ('/importme.ymlt',) assert len(mock_open.call_args_list) == 2 assert rules['es_port'] == 12349 assert rules['es_host'] == 'imported_host' assert rules['email'] == ['test@test.test'] assert rules['filter'] == import_rule['filter'] def test_import_filter(): # Check that if a filter is specified the rules are merged: rules_loader = FileRulesLoader(test_config) import_rule = copy.deepcopy(test_rule) del(import_rule['es_host']) del(import_rule['es_port']) import_rule['import'] = 'importme.ymlt' import_me = { 'es_host': 'imported_host', 'es_port': 12349, 'filter': [{'term': {'ratchet': 'clank'}}], } with mock.patch.object(rules_loader, 'get_yaml') as mock_open: mock_open.side_effect = [import_rule, import_me] rules = rules_loader.load_configuration('blah.yaml', test_config) assert rules['filter'] == [{'term': {'ratchet': 'clank'}}, {'term': {'key': 'value'}}] def test_load_inline_alert_rule(): rules_loader = FileRulesLoader(test_config) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['alert'] = [ { 'email': { 'email': 'foo@bar.baz' } }, { 'email': { 'email': 'baz@foo.bar' } } ] test_config_copy = copy.deepcopy(test_config) with mock.patch.object(rules_loader, 'get_yaml') as mock_open: mock_open.side_effect = [test_config_copy, test_rule_copy] rules_loader.load_modules(test_rule_copy) assert isinstance(test_rule_copy['alert'][0], elastalert.alerts.EmailAlerter) assert isinstance(test_rule_copy['alert'][1], elastalert.alerts.EmailAlerter) assert 'foo@bar.baz' in test_rule_copy['alert'][0].rule['email'] assert 'baz@foo.bar' in test_rule_copy['alert'][1].rule['email'] def test_file_rules_loader_get_names_recursive(): conf = {'scan_subdirectories': True, 'rules_folder': 'root'} rules_loader = FileRulesLoader(conf) walk_paths = (('root', ('folder_a', 'folder_b'), ('rule.yaml',)), ('root/folder_a', (), ('a.yaml', 'ab.yaml')), ('root/folder_b', (), ('b.yaml',))) with mock.patch('os.walk') as mock_walk: mock_walk.return_value = walk_paths paths = rules_loader.get_names(conf) paths = [p.replace(os.path.sep, '/') for p in paths] assert 'root/rule.yaml' in paths assert 'root/folder_a/a.yaml' in paths assert 'root/folder_a/ab.yaml' in paths assert 'root/folder_b/b.yaml' in paths assert len(paths) == 4 def test_file_rules_loader_get_names(): # Check for no subdirectory conf = {'scan_subdirectories': False, 'rules_folder': 'root'} rules_loader = FileRulesLoader(conf) files = ['badfile', 'a.yaml', 'b.yaml'] with mock.patch('os.listdir') as mock_list: with mock.patch('os.path.isfile') as mock_path: mock_path.return_value = True mock_list.return_value = files paths = rules_loader.get_names(conf) paths = [p.replace(os.path.sep, '/') for p in paths] assert 'root/a.yaml' in paths assert 'root/b.yaml' in paths assert len(paths) == 2 def test_load_rules(): test_rule_copy = copy.deepcopy(test_rule) test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.walk') as mock_ls: mock_ls.return_value = [('', [], ['testrule.yaml'])] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) assert isinstance(rules['rules'][0]['type'], elastalert.ruletypes.RuleType) assert isinstance(rules['rules'][0]['alert'][0], elastalert.alerts.Alerter) assert isinstance(rules['rules'][0]['timeframe'], datetime.timedelta) assert isinstance(rules['run_every'], datetime.timedelta) for included_key in ['comparekey', 'testkey', '@timestamp']: assert included_key in rules['rules'][0]['include'] # Assert include doesn't contain duplicates assert rules['rules'][0]['include'].count('@timestamp') == 1 assert rules['rules'][0]['include'].count('comparekey') == 1 def test_load_default_host_port(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('es_host') test_rule_copy.pop('es_port') test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.walk') as mock_ls: mock_ls.return_value = [('', [], ['testrule.yaml'])] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) # Assert include doesn't contain duplicates assert rules['es_port'] == 12345 assert rules['es_host'] == 'elasticsearch.test' def test_load_ssl_env_false(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('es_host') test_rule_copy.pop('es_port') test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.listdir') as mock_ls: with mock.patch.dict(os.environ, {'ES_USE_SSL': 'false'}): mock_ls.return_value = ['testrule.yaml'] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) assert rules['use_ssl'] is False def test_load_ssl_env_true(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('es_host') test_rule_copy.pop('es_port') test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.listdir') as mock_ls: with mock.patch.dict(os.environ, {'ES_USE_SSL': 'true'}): mock_ls.return_value = ['testrule.yaml'] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) assert rules['use_ssl'] is True def test_load_url_prefix_env(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('es_host') test_rule_copy.pop('es_port') test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.listdir') as mock_ls: with mock.patch.dict(os.environ, {'ES_URL_PREFIX': 'es/'}): mock_ls.return_value = ['testrule.yaml'] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) assert rules['es_url_prefix'] == 'es/' def test_load_disabled_rules(): test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['is_enabled'] = False test_config_copy = copy.deepcopy(test_config) with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.listdir') as mock_ls: mock_ls.return_value = ['testrule.yaml'] rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) # The rule is not loaded for it has "is_enabled=False" assert len(rules['rules']) == 0 def test_raises_on_missing_config(): optional_keys = ('aggregation', 'use_count_query', 'query_key', 'compare_key', 'filter', 'include', 'es_host', 'es_port', 'name') test_rule_copy = copy.deepcopy(test_rule) for key in list(test_rule_copy.keys()): test_rule_copy = copy.deepcopy(test_rule) test_config_copy = copy.deepcopy(test_config) test_rule_copy.pop(key) # Non required keys if key in optional_keys: continue with mock.patch('elastalert.config.yaml_loader') as mock_conf_open: mock_conf_open.return_value = test_config_copy with mock.patch('elastalert.loaders.yaml_loader') as mock_rule_open: mock_rule_open.return_value = test_rule_copy with mock.patch('os.walk') as mock_walk: mock_walk.return_value = [('', [], ['testrule.yaml'])] with pytest.raises(EAException, message='key %s should be required' % key): rules = load_conf(test_args) rules['rules'] = rules['rules_loader'].load(rules) def test_compound_query_key(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('use_count_query') test_rule_copy['query_key'] = ['field1', 'field2'] rules_loader.load_options(test_rule_copy, test_config, 'filename.yaml') assert 'field1' in test_rule_copy['include'] assert 'field2' in test_rule_copy['include'] assert test_rule_copy['query_key'] == 'field1,field2' assert test_rule_copy['compound_query_key'] == ['field1', 'field2'] def test_query_key_with_single_value(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('use_count_query') test_rule_copy['query_key'] = ['field1'] rules_loader.load_options(test_rule_copy, test_config, 'filename.yaml') assert 'field1' in test_rule_copy['include'] assert test_rule_copy['query_key'] == 'field1' assert 'compound_query_key' not in test_rule_copy def test_query_key_with_no_values(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('use_count_query') test_rule_copy['query_key'] = [] rules_loader.load_options(test_rule_copy, test_config, 'filename.yaml') assert 'query_key' not in test_rule_copy assert 'compound_query_key' not in test_rule_copy def test_name_inference(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy.pop('name') rules_loader.load_options(test_rule_copy, test_config, 'msmerc woz ere.yaml') assert test_rule_copy['name'] == 'msmerc woz ere' def test_raises_on_bad_generate_kibana_filters(): test_rule['generate_kibana_link'] = True bad_filters = [[{'not': {'terms': {'blah': 'blah'}}}], [{'terms': {'blah': 'blah'}}], [{'query': {'not_querystring': 'this:that'}}], [{'query': {'wildcard': 'this*that'}}], [{'blah': 'blah'}]] good_filters = [[{'term': {'field': 'value'}}], [{'not': {'term': {'this': 'that'}}}], [{'not': {'query': {'query_string': {'query': 'this:that'}}}}], [{'query': {'query_string': {'query': 'this:that'}}}], [{'range': {'blah': {'from': 'a', 'to': 'b'}}}], [{'not': {'range': {'blah': {'from': 'a', 'to': 'b'}}}}]] # Test that all the good filters work, but fail with a bad filter added for good in good_filters: test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['filter'] = good with mock.patch.object(rules_loader, 'get_yaml') as mock_open: mock_open.return_value = test_rule_copy rules_loader.load_configuration('blah', test_config) for bad in bad_filters: test_rule_copy['filter'] = good + bad with pytest.raises(EAException): rules_loader.load_configuration('blah', test_config) def test_kibana_discover_from_timedelta(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['kibana_discover_from_timedelta'] = {'minutes': 2} rules_loader.load_options(test_rule_copy, test_config, 'filename.yaml') assert isinstance(test_rule_copy['kibana_discover_from_timedelta'], datetime.timedelta) assert test_rule_copy['kibana_discover_from_timedelta'] == datetime.timedelta(minutes=2) def test_kibana_discover_to_timedelta(): test_config_copy = copy.deepcopy(test_config) rules_loader = FileRulesLoader(test_config_copy) test_rule_copy = copy.deepcopy(test_rule) test_rule_copy['kibana_discover_to_timedelta'] = {'minutes': 2} rules_loader.load_options(test_rule_copy, test_config, 'filename.yaml') assert isinstance(test_rule_copy['kibana_discover_to_timedelta'], datetime.timedelta) assert test_rule_copy['kibana_discover_to_timedelta'] == datetime.timedelta(minutes=2) elastalert-0.2.4/tests/rules_test.py000066400000000000000000001332031364615736500176040ustar00rootroot00000000000000# -*- coding: utf-8 -*- import copy import datetime import mock import pytest from elastalert.ruletypes import AnyRule from elastalert.ruletypes import BaseAggregationRule from elastalert.ruletypes import BlacklistRule from elastalert.ruletypes import CardinalityRule from elastalert.ruletypes import ChangeRule from elastalert.ruletypes import EventWindow from elastalert.ruletypes import FlatlineRule from elastalert.ruletypes import FrequencyRule from elastalert.ruletypes import MetricAggregationRule from elastalert.ruletypes import NewTermsRule from elastalert.ruletypes import PercentageMatchRule from elastalert.ruletypes import SpikeRule from elastalert.ruletypes import WhitelistRule from elastalert.util import dt_to_ts from elastalert.util import EAException from elastalert.util import ts_now from elastalert.util import ts_to_dt def hits(size, **kwargs): ret = [] for n in range(size): ts = ts_to_dt('2014-09-26T12:%s:%sZ' % (n / 60, n % 60)) n += 1 event = create_event(ts, **kwargs) ret.append(event) return ret def create_event(timestamp, timestamp_field='@timestamp', **kwargs): event = {timestamp_field: timestamp} event.update(**kwargs) return event def create_bucket_aggregation(agg_name, buckets): agg = {agg_name: {'buckets': buckets}} return agg def create_percentage_match_agg(match_count, other_count): agg = create_bucket_aggregation( 'percentage_match_aggs', { 'match_bucket': { 'doc_count': match_count }, '_other_': { 'doc_count': other_count } } ) return agg def assert_matches_have(matches, terms): assert len(matches) == len(terms) for match, term in zip(matches, terms): assert term[0] in match assert match[term[0]] == term[1] if len(term) > 2: assert match[term[2]] == term[3] def test_any(): event = hits(1) rule = AnyRule({}) rule.add_data([event]) assert rule.matches == [event] def test_freq(): events = hits(60, timestamp_field='blah', username='qlo') rules = {'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah'} rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Test wit query_key events = hits(60, timestamp_field='blah', username='qlo') rules['query_key'] = 'username' rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't match events = hits(60, timestamp_field='blah', username='qlo') rules['num_events'] = 61 rule = FrequencyRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # garbage collection assert 'qlo' in rule.occurrences rule.garbage_collect(ts_to_dt('2014-09-28T12:0:0')) assert rule.occurrences == {} def test_freq_count(): rules = {'num_events': 100, 'timeframe': datetime.timedelta(hours=1), 'use_count_query': True} # Normal match rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:15:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:25:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 6}) assert len(rule.matches) == 1 # First data goes out of timeframe first rule = FrequencyRule(rules) rule.add_count_data({ts_to_dt('2014-10-10T00:00:00'): 75}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:45:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T00:55:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:05:00'): 6}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-10T01:00:00'): 75}) assert len(rule.matches) == 1 def test_freq_out_of_order(): events = hits(60, timestamp_field='blah', username='qlo') rules = {'num_events': 59, 'timeframe': datetime.timedelta(hours=1), 'timestamp_field': 'blah'} rule = FrequencyRule(rules) rule.add_data(events[:10]) assert len(rule.matches) == 0 # Try to add events from before the first occurrence rule.add_data([{'blah': ts_to_dt('2014-09-26T11:00:00'), 'username': 'qlo'}] * 50) assert len(rule.matches) == 0 rule.add_data(events[15:20]) assert len(rule.matches) == 0 rule.add_data(events[10:15]) assert len(rule.matches) == 0 rule.add_data(events[20:55]) rule.add_data(events[57:]) assert len(rule.matches) == 0 rule.add_data(events[55:57]) assert len(rule.matches) == 1 def test_freq_terms(): rules = {'num_events': 10, 'timeframe': datetime.timedelta(hours=1), 'query_key': 'username'} rule = FrequencyRule(rules) terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 1}, {'key': 'userB', 'doc_count': 5}]} terms2 = {ts_to_dt('2014-01-01T00:10:00Z'): [{'key': 'userA', 'doc_count': 8}, {'key': 'userB', 'doc_count': 5}]} terms3 = {ts_to_dt('2014-01-01T00:25:00Z'): [{'key': 'userA', 'doc_count': 3}, {'key': 'userB', 'doc_count': 0}]} # Initial data rule.add_terms_data(terms1) assert len(rule.matches) == 0 # Match for user B rule.add_terms_data(terms2) assert len(rule.matches) == 1 assert rule.matches[0].get('username') == 'userB' # Match for user A rule.add_terms_data(terms3) assert len(rule.matches) == 2 assert rule.matches[1].get('username') == 'userA' def test_eventwindow(): timeframe = datetime.timedelta(minutes=10) window = EventWindow(timeframe) timestamps = [ts_to_dt(x) for x in ['2014-01-01T10:00:00', '2014-01-01T10:05:00', '2014-01-01T10:03:00', '2014-01-01T09:55:00', '2014-01-01T10:09:00']] for ts in timestamps: window.append([{'@timestamp': ts}, 1]) timestamps.sort() for exp, actual in zip(timestamps[1:], window.data): assert actual[0]['@timestamp'] == exp window.append([{'@timestamp': ts_to_dt('2014-01-01T10:14:00')}, 1]) timestamps.append(ts_to_dt('2014-01-01T10:14:00')) for exp, actual in zip(timestamps[3:], window.data): assert actual[0]['@timestamp'] == exp def test_spike_count(): rules = {'threshold_ref': 10, 'spike_height': 2, 'timeframe': datetime.timedelta(seconds=10), 'spike_type': 'both', 'timestamp_field': '@timestamp'} rule = SpikeRule(rules) # Double rate of events at 20 seconds rule.add_count_data({ts_to_dt('2014-09-26T00:00:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-09-26T00:00:10'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-09-26T00:00:20'): 20}) assert len(rule.matches) == 1 # Downward spike rule = SpikeRule(rules) rule.add_count_data({ts_to_dt('2014-09-26T00:00:00'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-09-26T00:00:10'): 10}) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-09-26T00:00:20'): 0}) assert len(rule.matches) == 1 def test_spike_deep_key(): rules = {'threshold_ref': 10, 'spike_height': 2, 'timeframe': datetime.timedelta(seconds=10), 'spike_type': 'both', 'timestamp_field': '@timestamp', 'query_key': 'foo.bar.baz'} rule = SpikeRule(rules) rule.add_data([{'@timestamp': ts_to_dt('2015'), 'foo': {'bar': {'baz': 'LOL'}}}]) assert 'LOL' in rule.cur_windows def test_spike(): # Events are 1 per second events = hits(100, timestamp_field='ts') # Constant rate, doesn't match rules = {'threshold_ref': 10, 'spike_height': 2, 'timeframe': datetime.timedelta(seconds=10), 'spike_type': 'both', 'use_count_query': False, 'timestamp_field': 'ts'} rule = SpikeRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # Double the rate of events after [50:] events2 = events[:50] for event in events[50:]: events2.append(event) events2.append({'ts': event['ts'] + datetime.timedelta(milliseconds=1)}) rules['spike_type'] = 'up' rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 1 # Doesn't match rules['spike_height'] = 3 rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 0 # Downward spike events = events[:50] + events[75:] rules['spike_type'] = 'down' rule = SpikeRule(rules) rule.add_data(events) assert len(rule.matches) == 1 # Doesn't meet threshold_ref # When ref hits 11, cur is only 20 rules['spike_height'] = 2 rules['threshold_ref'] = 11 rules['spike_type'] = 'up' rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 0 # Doesn't meet threshold_cur # Maximum rate of events is 20 per 10 seconds rules['threshold_ref'] = 10 rules['threshold_cur'] = 30 rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 0 # Alert on new data # (At least 25 events occur before 30 seconds has elapsed) rules.pop('threshold_ref') rules['timeframe'] = datetime.timedelta(seconds=30) rules['threshold_cur'] = 25 rules['spike_height'] = 2 rules['alert_on_new_data'] = True rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 1 def test_spike_query_key(): events = hits(100, timestamp_field='ts', username='qlo') # Constant rate, doesn't match rules = {'threshold_ref': 10, 'spike_height': 2, 'timeframe': datetime.timedelta(seconds=10), 'spike_type': 'both', 'use_count_query': False, 'timestamp_field': 'ts', 'query_key': 'username'} rule = SpikeRule(rules) rule.add_data(events) assert len(rule.matches) == 0 # Double the rate of events, but with a different usename events_bob = hits(100, timestamp_field='ts', username='bob') events2 = events[:50] for num in range(50, 99): events2.append(events_bob[num]) events2.append(events[num]) rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 0 # Double the rate of events, with the same username events2 = events[:50] for num in range(50, 99): events2.append(events_bob[num]) events2.append(events[num]) events2.append(events[num]) rule = SpikeRule(rules) rule.add_data(events2) assert len(rule.matches) == 1 def test_spike_terms(): rules = {'threshold_ref': 5, 'spike_height': 2, 'timeframe': datetime.timedelta(minutes=10), 'spike_type': 'both', 'use_count_query': False, 'timestamp_field': 'ts', 'query_key': 'username', 'use_term_query': True} terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 10}, {'key': 'userB', 'doc_count': 5}]} terms2 = {ts_to_dt('2014-01-01T00:10:00Z'): [{'key': 'userA', 'doc_count': 22}, {'key': 'userB', 'doc_count': 5}]} terms3 = {ts_to_dt('2014-01-01T00:25:00Z'): [{'key': 'userA', 'doc_count': 25}, {'key': 'userB', 'doc_count': 27}]} terms4 = {ts_to_dt('2014-01-01T00:27:00Z'): [{'key': 'userA', 'doc_count': 10}, {'key': 'userB', 'doc_count': 12}, {'key': 'userC', 'doc_count': 100}]} terms5 = {ts_to_dt('2014-01-01T00:30:00Z'): [{'key': 'userD', 'doc_count': 100}, {'key': 'userC', 'doc_count': 100}]} rule = SpikeRule(rules) # Initial input rule.add_terms_data(terms1) assert len(rule.matches) == 0 # No spike for UserA because windows not filled rule.add_terms_data(terms2) assert len(rule.matches) == 0 # Spike for userB only rule.add_terms_data(terms3) assert len(rule.matches) == 1 assert rule.matches[0].get('username') == 'userB' # Test no alert for new user over threshold rules.pop('threshold_ref') rules['threshold_cur'] = 50 rule = SpikeRule(rules) rule.add_terms_data(terms1) rule.add_terms_data(terms2) rule.add_terms_data(terms3) rule.add_terms_data(terms4) assert len(rule.matches) == 0 # Test alert_on_new_data rules['alert_on_new_data'] = True rule = SpikeRule(rules) rule.add_terms_data(terms1) rule.add_terms_data(terms2) rule.add_terms_data(terms3) rule.add_terms_data(terms4) assert len(rule.matches) == 1 # Test that another alert doesn't fire immediately for userC but it does for userD rule.matches = [] rule.add_terms_data(terms5) assert len(rule.matches) == 1 assert rule.matches[0]['username'] == 'userD' def test_spike_terms_query_key_alert_on_new_data(): rules = {'spike_height': 1.5, 'timeframe': datetime.timedelta(minutes=10), 'spike_type': 'both', 'use_count_query': False, 'timestamp_field': 'ts', 'query_key': 'username', 'use_term_query': True, 'alert_on_new_data': True} terms1 = {ts_to_dt('2014-01-01T00:01:00Z'): [{'key': 'userA', 'doc_count': 10}]} terms2 = {ts_to_dt('2014-01-01T00:06:00Z'): [{'key': 'userA', 'doc_count': 10}]} terms3 = {ts_to_dt('2014-01-01T00:11:00Z'): [{'key': 'userA', 'doc_count': 10}]} terms4 = {ts_to_dt('2014-01-01T00:21:00Z'): [{'key': 'userA', 'doc_count': 20}]} terms5 = {ts_to_dt('2014-01-01T00:26:00Z'): [{'key': 'userA', 'doc_count': 20}]} terms6 = {ts_to_dt('2014-01-01T00:31:00Z'): [{'key': 'userA', 'doc_count': 20}]} terms7 = {ts_to_dt('2014-01-01T00:36:00Z'): [{'key': 'userA', 'doc_count': 20}]} terms8 = {ts_to_dt('2014-01-01T00:41:00Z'): [{'key': 'userA', 'doc_count': 20}]} rule = SpikeRule(rules) # Initial input rule.add_terms_data(terms1) assert len(rule.matches) == 0 # No spike for UserA because windows not filled rule.add_terms_data(terms2) assert len(rule.matches) == 0 rule.add_terms_data(terms3) assert len(rule.matches) == 0 rule.add_terms_data(terms4) assert len(rule.matches) == 0 # Spike rule.add_terms_data(terms5) assert len(rule.matches) == 1 rule.matches[:] = [] # There will be no more spikes since all terms have the same doc_count rule.add_terms_data(terms6) assert len(rule.matches) == 0 rule.add_terms_data(terms7) assert len(rule.matches) == 0 rule.add_terms_data(terms8) assert len(rule.matches) == 0 def test_blacklist(): events = [{'@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad'}] rules = {'blacklist': ['bad', 'really bad'], 'compare_key': 'term', 'timestamp_field': '@timestamp'} rule = BlacklistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad')]) def test_whitelist(): events = [{'@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad'}] rules = {'whitelist': ['good', 'also good'], 'compare_key': 'term', 'ignore_null': True, 'timestamp_field': '@timestamp'} rule = WhitelistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad')]) def test_whitelist_dont_ignore_nulls(): events = [{'@timestamp': ts_to_dt('2014-09-26T12:34:56Z'), 'term': 'good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:57Z'), 'term': 'bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:58Z'), 'term': 'also good'}, {'@timestamp': ts_to_dt('2014-09-26T12:34:59Z'), 'term': 'really bad'}, {'@timestamp': ts_to_dt('2014-09-26T12:35:00Z'), 'no_term': 'bad'}] rules = {'whitelist': ['good', 'also good'], 'compare_key': 'term', 'ignore_null': True, 'timestamp_field': '@timestamp'} rules['ignore_null'] = False rule = WhitelistRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad'), ('term', 'really bad'), ('no_term', 'bad')]) def test_change(): events = hits(10, username='qlo', term='good', second_term='yes') events[8].pop('term') events[8].pop('second_term') events[9]['term'] = 'bad' events[9]['second_term'] = 'no' rules = {'compound_compare_key': ['term', 'second_term'], 'query_key': 'username', 'ignore_null': True, 'timestamp_field': '@timestamp'} rule = ChangeRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad', 'second_term', 'no')]) # Unhashable QK events2 = hits(10, username=['qlo'], term='good', second_term='yes') events2[9]['term'] = 'bad' events2[9]['second_term'] = 'no' rule = ChangeRule(rules) rule.add_data(events2) assert_matches_have(rule.matches, [('term', 'bad', 'second_term', 'no')]) # Don't ignore nulls rules['ignore_null'] = False rule = ChangeRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('username', 'qlo'), ('term', 'bad', 'second_term', 'no')]) # With timeframe rules['timeframe'] = datetime.timedelta(seconds=2) rules['ignore_null'] = True rule = ChangeRule(rules) rule.add_data(events) assert_matches_have(rule.matches, [('term', 'bad', 'second_term', 'no')]) # With timeframe, doesn't match events = events[:8] + events[9:] rules['timeframe'] = datetime.timedelta(seconds=1) rule = ChangeRule(rules) rule.add_data(events) assert rule.matches == [] def test_new_term(): rules = {'fields': ['a', 'b'], 'timestamp_field': '@timestamp', 'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts} mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1}, {'key': 'key2', 'doc_count': 5}]}}}} with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} call_args = [] # search is called with a mutable dict containing timestamps, this is required to test def record_args(*args, **kwargs): call_args.append((copy.deepcopy(args), copy.deepcopy(kwargs))) return mock_res mock_es.return_value.search.side_effect = record_args rule = NewTermsRule(rules) # 30 day default range, 1 day default step, times 2 fields assert rule.es.search.call_count == 60 # Assert that all calls have the proper ordering of time ranges old_ts = '2010-01-01T00:00:00Z' old_field = '' for call in call_args: field = call[1]['body']['aggs']['filtered']['aggs']['values']['terms']['field'] if old_field != field: old_field = field old_ts = '2010-01-01T00:00:00Z' gte = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['gte'] assert gte > old_ts lt = call[1]['body']['aggs']['filtered']['filter']['bool']['must'][0]['range']['@timestamp']['lt'] assert lt > gte old_ts = gte # Key1 and key2 shouldn't cause a match rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2'}]) assert rule.matches == [] # Neither will missing values rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}]) assert rule.matches == [] # Key3 causes an alert for field b rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}]) assert len(rule.matches) == 1 assert rule.matches[0]['new_field'] == 'b' assert rule.matches[0]['b'] == 'key3' rule.matches = [] # Key3 doesn't cause another alert for field b rule.add_data([{'@timestamp': ts_now(), 'a': 'key2', 'b': 'key3'}]) assert rule.matches == [] # Missing_field rules['alert_on_missing_field'] = True with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} rule = NewTermsRule(rules) rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}]) assert len(rule.matches) == 1 assert rule.matches[0]['missing_field'] == 'b' def test_new_term_nested_field(): rules = {'fields': ['a', 'b.c'], 'timestamp_field': '@timestamp', 'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts} mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1}, {'key': 'key2', 'doc_count': 5}]}}}} with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} rule = NewTermsRule(rules) assert rule.es.search.call_count == 60 # Key3 causes an alert for nested field b.c rule.add_data([{'@timestamp': ts_now(), 'b': {'c': 'key3'}}]) assert len(rule.matches) == 1 assert rule.matches[0]['new_field'] == 'b.c' assert rule.matches[0]['b']['c'] == 'key3' rule.matches = [] def test_new_term_with_terms(): rules = {'fields': ['a'], 'timestamp_field': '@timestamp', 'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'query_key': 'a', 'window_step_size': {'days': 2}, 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts} mock_res = {'aggregations': {'filtered': {'values': {'buckets': [{'key': 'key1', 'doc_count': 1}, {'key': 'key2', 'doc_count': 5}]}}}} with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} rule = NewTermsRule(rules) # Only 15 queries because of custom step size assert rule.es.search.call_count == 15 # Key1 and key2 shouldn't cause a match terms = {ts_now(): [{'key': 'key1', 'doc_count': 1}, {'key': 'key2', 'doc_count': 1}]} rule.add_terms_data(terms) assert rule.matches == [] # Key3 causes an alert for field a terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]} rule.add_terms_data(terms) assert len(rule.matches) == 1 assert rule.matches[0]['new_field'] == 'a' assert rule.matches[0]['a'] == 'key3' rule.matches = [] # Key3 doesn't cause another alert terms = {ts_now(): [{'key': 'key3', 'doc_count': 1}]} rule.add_terms_data(terms) assert rule.matches == [] def test_new_term_with_composite_fields(): rules = {'fields': [['a', 'b', 'c'], ['d', 'e.f']], 'timestamp_field': '@timestamp', 'es_host': 'example.com', 'es_port': 10, 'index': 'logstash', 'ts_to_dt': ts_to_dt, 'dt_to_ts': dt_to_ts} mock_res = { 'aggregations': { 'filtered': { 'values': { 'buckets': [ { 'key': 'key1', 'doc_count': 5, 'values': { 'buckets': [ { 'key': 'key2', 'doc_count': 5, 'values': { 'buckets': [ { 'key': 'key3', 'doc_count': 3, }, { 'key': 'key4', 'doc_count': 2, }, ] } } ] } } ] } } } } with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} rule = NewTermsRule(rules) assert rule.es.search.call_count == 60 # key3 already exists, and thus shouldn't cause a match rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key3'}]) assert rule.matches == [] # key5 causes an alert for composite field [a, b, c] rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key5'}]) assert len(rule.matches) == 1 assert rule.matches[0]['new_field'] == ('a', 'b', 'c') assert rule.matches[0]['a'] == 'key1' assert rule.matches[0]['b'] == 'key2' assert rule.matches[0]['c'] == 'key5' rule.matches = [] # New values in other fields that are not part of the composite key should not cause an alert rule.add_data([{'@timestamp': ts_now(), 'a': 'key1', 'b': 'key2', 'c': 'key4', 'd': 'unrelated_value'}]) assert len(rule.matches) == 0 rule.matches = [] # Verify nested fields work properly # Key6 causes an alert for nested field e.f rule.add_data([{'@timestamp': ts_now(), 'd': 'key4', 'e': {'f': 'key6'}}]) assert len(rule.matches) == 1 assert rule.matches[0]['new_field'] == ('d', 'e.f') assert rule.matches[0]['d'] == 'key4' assert rule.matches[0]['e']['f'] == 'key6' rule.matches = [] # Missing_fields rules['alert_on_missing_field'] = True with mock.patch('elastalert.ruletypes.elasticsearch_client') as mock_es: mock_es.return_value = mock.Mock() mock_es.return_value.search.return_value = mock_res mock_es.return_value.info.return_value = {'version': {'number': '2.x.x'}} rule = NewTermsRule(rules) rule.add_data([{'@timestamp': ts_now(), 'a': 'key2'}]) assert len(rule.matches) == 2 # This means that any one of the three n composite fields were not present assert rule.matches[0]['missing_field'] == ('a', 'b', 'c') assert rule.matches[1]['missing_field'] == ('d', 'e.f') def test_flatline(): events = hits(40) rules = { 'timeframe': datetime.timedelta(seconds=30), 'threshold': 2, 'timestamp_field': '@timestamp', } rule = FlatlineRule(rules) # 1 hit should cause an alert until after at least 30 seconds pass rule.add_data(hits(1)) assert rule.matches == [] # Add hits with timestamps 2014-09-26T12:00:00 --> 2014-09-26T12:00:09 rule.add_data(events[0:10]) # This will be run at the end of the hits rule.garbage_collect(ts_to_dt('2014-09-26T12:00:11Z')) assert rule.matches == [] # This would be run if the query returned nothing for a future timestamp rule.garbage_collect(ts_to_dt('2014-09-26T12:00:45Z')) assert len(rule.matches) == 1 # After another garbage collection, since there are still no events, a new match is added rule.garbage_collect(ts_to_dt('2014-09-26T12:00:50Z')) assert len(rule.matches) == 2 # Add hits with timestamps 2014-09-26T12:00:30 --> 2014-09-26T12:00:39 rule.add_data(events[30:]) # Now that there is data in the last 30 minutes, no more matches should be added rule.garbage_collect(ts_to_dt('2014-09-26T12:00:55Z')) assert len(rule.matches) == 2 # After that window passes with no more data, a new match is added rule.garbage_collect(ts_to_dt('2014-09-26T12:01:11Z')) assert len(rule.matches) == 3 def test_flatline_no_data(): rules = { 'timeframe': datetime.timedelta(seconds=30), 'threshold': 2, 'timestamp_field': '@timestamp', } rule = FlatlineRule(rules) # Initial lack of data rule.garbage_collect(ts_to_dt('2014-09-26T12:00:00Z')) assert len(rule.matches) == 0 # Passed the timeframe, still no events rule.garbage_collect(ts_to_dt('2014-09-26T12:35:00Z')) assert len(rule.matches) == 1 def test_flatline_count(): rules = {'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'timestamp_field': '@timestamp'} rule = FlatlineRule(rules) rule.add_count_data({ts_to_dt('2014-10-11T00:00:00'): 1}) rule.garbage_collect(ts_to_dt('2014-10-11T00:00:10')) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-11T00:00:15'): 0}) rule.garbage_collect(ts_to_dt('2014-10-11T00:00:20')) assert len(rule.matches) == 0 rule.add_count_data({ts_to_dt('2014-10-11T00:00:35'): 0}) assert len(rule.matches) == 1 def test_flatline_query_key(): rules = {'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'use_query_key': True, 'query_key': 'qk', 'timestamp_field': '@timestamp'} rule = FlatlineRule(rules) # Adding two separate query keys, the flatline rule should trigger for both rule.add_data(hits(1, qk='key1')) rule.add_data(hits(1, qk='key2')) rule.add_data(hits(1, qk='key3')) assert rule.matches == [] # This will be run at the end of the hits rule.garbage_collect(ts_to_dt('2014-09-26T12:00:11Z')) assert rule.matches == [] # Add new data from key3. It will not immediately cause an alert rule.add_data([create_event(ts_to_dt('2014-09-26T12:00:20Z'), qk='key3')]) # key1 and key2 have not had any new data, so they will trigger the flatline alert timestamp = '2014-09-26T12:00:45Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 2 assert set(['key1', 'key2']) == set([m['key'] for m in rule.matches if m['@timestamp'] == timestamp]) # Next time the rule runs, all 3 keys still have no data, so all three will cause an alert timestamp = '2014-09-26T12:01:20Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 5 assert set(['key1', 'key2', 'key3']) == set([m['key'] for m in rule.matches if m['@timestamp'] == timestamp]) def test_flatline_forget_query_key(): rules = {'timeframe': datetime.timedelta(seconds=30), 'threshold': 1, 'query_key': 'qk', 'forget_keys': True, 'timestamp_field': '@timestamp'} rule = FlatlineRule(rules) # Adding two separate query keys, the flatline rule should trigger for both rule.add_data(hits(1, qk='key1')) assert rule.matches == [] # This will be run at the end of the hits rule.garbage_collect(ts_to_dt('2014-09-26T12:00:11Z')) assert rule.matches == [] # Key1 should not alert timestamp = '2014-09-26T12:00:45Z' rule.garbage_collect(ts_to_dt(timestamp)) assert len(rule.matches) == 1 rule.matches = [] # key1 was forgotten, so no more matches rule.garbage_collect(ts_to_dt('2014-09-26T12:01:11Z')) assert rule.matches == [] def test_cardinality_max(): rules = {'max_cardinality': 4, 'timeframe': datetime.timedelta(minutes=10), 'cardinality_field': 'user', 'timestamp_field': '@timestamp'} rule = CardinalityRule(rules) # Add 4 different usernames users = ['bill', 'coach', 'zoey', 'louis'] for user in users: event = {'@timestamp': datetime.datetime.now(), 'user': user} rule.add_data([event]) assert len(rule.matches) == 0 rule.garbage_collect(datetime.datetime.now()) # Add a duplicate, stay at 4 cardinality event = {'@timestamp': datetime.datetime.now(), 'user': 'coach'} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now()) assert len(rule.matches) == 0 # Next unique will trigger event = {'@timestamp': datetime.datetime.now(), 'user': 'francis'} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now()) assert len(rule.matches) == 1 rule.matches = [] # 15 minutes later, adding more will not trigger an alert users = ['nick', 'rochelle', 'ellis'] for user in users: event = {'@timestamp': datetime.datetime.now() + datetime.timedelta(minutes=15), 'user': user} rule.add_data([event]) assert len(rule.matches) == 0 def test_cardinality_min(): rules = {'min_cardinality': 4, 'timeframe': datetime.timedelta(minutes=10), 'cardinality_field': 'user', 'timestamp_field': '@timestamp'} rule = CardinalityRule(rules) # Add 2 different usernames, no alert because time hasn't elapsed users = ['foo', 'bar'] for user in users: event = {'@timestamp': datetime.datetime.now(), 'user': user} rule.add_data([event]) assert len(rule.matches) == 0 rule.garbage_collect(datetime.datetime.now()) # Add 3 more unique ad t+5 mins users = ['faz', 'fuz', 'fiz'] for user in users: event = {'@timestamp': datetime.datetime.now() + datetime.timedelta(minutes=5), 'user': user} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now() + datetime.timedelta(minutes=5)) assert len(rule.matches) == 0 # Adding the same one again at T+15 causes an alert user = 'faz' event = {'@timestamp': datetime.datetime.now() + datetime.timedelta(minutes=15), 'user': user} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now() + datetime.timedelta(minutes=15)) assert len(rule.matches) == 1 def test_cardinality_qk(): rules = {'max_cardinality': 2, 'timeframe': datetime.timedelta(minutes=10), 'cardinality_field': 'foo', 'timestamp_field': '@timestamp', 'query_key': 'user'} rule = CardinalityRule(rules) # Add 3 different usernames, one value each users = ['foo', 'bar', 'baz'] for user in users: event = {'@timestamp': datetime.datetime.now(), 'user': user, 'foo': 'foo' + user} rule.add_data([event]) assert len(rule.matches) == 0 rule.garbage_collect(datetime.datetime.now()) # Add 2 more unique for "baz", one alert per value values = ['faz', 'fuz', 'fiz'] for value in values: event = {'@timestamp': datetime.datetime.now() + datetime.timedelta(minutes=5), 'user': 'baz', 'foo': value} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now() + datetime.timedelta(minutes=5)) assert len(rule.matches) == 2 assert rule.matches[0]['user'] == 'baz' assert rule.matches[1]['user'] == 'baz' assert rule.matches[0]['foo'] == 'fuz' assert rule.matches[1]['foo'] == 'fiz' def test_cardinality_nested_cardinality_field(): rules = {'max_cardinality': 4, 'timeframe': datetime.timedelta(minutes=10), 'cardinality_field': 'd.ip', 'timestamp_field': '@timestamp'} rule = CardinalityRule(rules) # Add 4 different IPs ips = ['10.0.0.1', '10.0.0.2', '10.0.0.3', '10.0.0.4'] for ip in ips: event = {'@timestamp': datetime.datetime.now(), 'd': {'ip': ip}} rule.add_data([event]) assert len(rule.matches) == 0 rule.garbage_collect(datetime.datetime.now()) # Add a duplicate, stay at 4 cardinality event = {'@timestamp': datetime.datetime.now(), 'd': {'ip': '10.0.0.4'}} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now()) assert len(rule.matches) == 0 # Add an event with no IP, stay at 4 cardinality event = {'@timestamp': datetime.datetime.now()} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now()) assert len(rule.matches) == 0 # Next unique will trigger event = {'@timestamp': datetime.datetime.now(), 'd': {'ip': '10.0.0.5'}} rule.add_data([event]) rule.garbage_collect(datetime.datetime.now()) assert len(rule.matches) == 1 rule.matches = [] # 15 minutes later, adding more will not trigger an alert ips = ['10.0.0.6', '10.0.0.7', '10.0.0.8'] for ip in ips: event = {'@timestamp': datetime.datetime.now() + datetime.timedelta(minutes=15), 'd': {'ip': ip}} rule.add_data([event]) assert len(rule.matches) == 0 def test_base_aggregation_constructor(): rules = {'bucket_interval_timedelta': datetime.timedelta(seconds=10), 'buffer_time': datetime.timedelta(minutes=1), 'timestamp_field': '@timestamp'} # Test time period constructor logic rules['bucket_interval'] = {'seconds': 10} rule = BaseAggregationRule(rules) assert rule.rules['bucket_interval_period'] == '10s' rules['bucket_interval'] = {'minutes': 5} rule = BaseAggregationRule(rules) assert rule.rules['bucket_interval_period'] == '5m' rules['bucket_interval'] = {'hours': 4} rule = BaseAggregationRule(rules) assert rule.rules['bucket_interval_period'] == '4h' rules['bucket_interval'] = {'days': 2} rule = BaseAggregationRule(rules) assert rule.rules['bucket_interval_period'] == '2d' rules['bucket_interval'] = {'weeks': 1} rule = BaseAggregationRule(rules) assert rule.rules['bucket_interval_period'] == '1w' # buffer_time evenly divisible by bucket_interval with pytest.raises(EAException): rules['bucket_interval_timedelta'] = datetime.timedelta(seconds=13) rule = BaseAggregationRule(rules) # run_every evenly divisible by bucket_interval rules['use_run_every_query_size'] = True rules['run_every'] = datetime.timedelta(minutes=2) rules['bucket_interval_timedelta'] = datetime.timedelta(seconds=10) rule = BaseAggregationRule(rules) with pytest.raises(EAException): rules['bucket_interval_timedelta'] = datetime.timedelta(seconds=13) rule = BaseAggregationRule(rules) def test_base_aggregation_payloads(): with mock.patch.object(BaseAggregationRule, 'check_matches', return_value=None) as mock_check_matches: rules = {'bucket_interval': {'seconds': 10}, 'bucket_interval_timedelta': datetime.timedelta(seconds=10), 'buffer_time': datetime.timedelta(minutes=5), 'timestamp_field': '@timestamp'} timestamp = datetime.datetime.now() interval_agg = create_bucket_aggregation('interval_aggs', [{'key_as_string': '2014-01-01T00:00:00Z'}]) rule = BaseAggregationRule(rules) # Payload not wrapped rule.add_aggregation_data({timestamp: {}}) mock_check_matches.assert_called_once_with(timestamp, None, {}) mock_check_matches.reset_mock() # Payload wrapped by date_histogram interval_agg_data = {timestamp: interval_agg} rule.add_aggregation_data(interval_agg_data) mock_check_matches.assert_called_once_with(ts_to_dt('2014-01-01T00:00:00Z'), None, {'key_as_string': '2014-01-01T00:00:00Z'}) mock_check_matches.reset_mock() # Payload wrapped by terms bucket_agg_data = {timestamp: create_bucket_aggregation('bucket_aggs', [{'key': 'qk'}])} rule.add_aggregation_data(bucket_agg_data) mock_check_matches.assert_called_once_with(timestamp, 'qk', {'key': 'qk'}) mock_check_matches.reset_mock() # Payload wrapped by terms and date_histogram bucket_interval_agg_data = { timestamp: create_bucket_aggregation('bucket_aggs', [{'key': 'qk', 'interval_aggs': interval_agg['interval_aggs']}]) } rule.add_aggregation_data(bucket_interval_agg_data) mock_check_matches.assert_called_once_with(ts_to_dt('2014-01-01T00:00:00Z'), 'qk', {'key_as_string': '2014-01-01T00:00:00Z'}) mock_check_matches.reset_mock() def test_metric_aggregation(): rules = {'buffer_time': datetime.timedelta(minutes=5), 'timestamp_field': '@timestamp', 'metric_agg_type': 'avg', 'metric_agg_key': 'cpu_pct'} # Check threshold logic with pytest.raises(EAException): rule = MetricAggregationRule(rules) rules['min_threshold'] = 0.1 rules['max_threshold'] = 0.8 rule = MetricAggregationRule(rules) assert rule.rules['aggregation_query_element'] == {'metric_cpu_pct_avg': {'avg': {'field': 'cpu_pct'}}} assert rule.crossed_thresholds(None) is False assert rule.crossed_thresholds(0.09) is True assert rule.crossed_thresholds(0.10) is False assert rule.crossed_thresholds(0.79) is False assert rule.crossed_thresholds(0.81) is True rule.check_matches(datetime.datetime.now(), None, {'metric_cpu_pct_avg': {'value': None}}) rule.check_matches(datetime.datetime.now(), None, {'metric_cpu_pct_avg': {'value': 0.5}}) assert len(rule.matches) == 0 rule.check_matches(datetime.datetime.now(), None, {'metric_cpu_pct_avg': {'value': 0.05}}) rule.check_matches(datetime.datetime.now(), None, {'metric_cpu_pct_avg': {'value': 0.95}}) assert len(rule.matches) == 2 rules['query_key'] = 'qk' rule = MetricAggregationRule(rules) rule.check_matches(datetime.datetime.now(), 'qk_val', {'metric_cpu_pct_avg': {'value': 0.95}}) assert rule.matches[0]['qk'] == 'qk_val' def test_metric_aggregation_complex_query_key(): rules = {'buffer_time': datetime.timedelta(minutes=5), 'timestamp_field': '@timestamp', 'metric_agg_type': 'avg', 'metric_agg_key': 'cpu_pct', 'compound_query_key': ['qk', 'sub_qk'], 'query_key': 'qk,sub_qk', 'max_threshold': 0.8} query = {"bucket_aggs": {"buckets": [ {"metric_cpu_pct_avg": {"value": 0.91}, "key": "sub_qk_val1"}, {"metric_cpu_pct_avg": {"value": 0.95}, "key": "sub_qk_val2"}, {"metric_cpu_pct_avg": {"value": 0.89}, "key": "sub_qk_val3"}] }, "key": "qk_val"} rule = MetricAggregationRule(rules) rule.check_matches(datetime.datetime.now(), 'qk_val', query) assert len(rule.matches) == 3 assert rule.matches[0]['qk'] == 'qk_val' assert rule.matches[1]['qk'] == 'qk_val' assert rule.matches[0]['sub_qk'] == 'sub_qk_val1' assert rule.matches[1]['sub_qk'] == 'sub_qk_val2' def test_percentage_match(): rules = {'match_bucket_filter': {'term': 'term_val'}, 'buffer_time': datetime.timedelta(minutes=5), 'timestamp_field': '@timestamp'} # Check threshold logic with pytest.raises(EAException): rule = PercentageMatchRule(rules) rules['min_percentage'] = 25 rules['max_percentage'] = 75 rule = PercentageMatchRule(rules) assert rule.rules['aggregation_query_element'] == { 'percentage_match_aggs': { 'filters': { 'other_bucket': True, 'filters': { 'match_bucket': { 'bool': { 'must': { 'term': 'term_val' } } } } } } } assert rule.percentage_violation(25) is False assert rule.percentage_violation(50) is False assert rule.percentage_violation(75) is False assert rule.percentage_violation(24.9) is True assert rule.percentage_violation(75.1) is True rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(0, 0)) rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(None, 100)) rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(26, 74)) rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(74, 26)) assert len(rule.matches) == 0 rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(24, 76)) rule.check_matches(datetime.datetime.now(), None, create_percentage_match_agg(76, 24)) assert len(rule.matches) == 2 rules['query_key'] = 'qk' rule = PercentageMatchRule(rules) rule.check_matches(datetime.datetime.now(), 'qk_val', create_percentage_match_agg(76.666666667, 24)) assert rule.matches[0]['qk'] == 'qk_val' assert '76.1589403974' in rule.get_match_str(rule.matches[0]) rules['percentage_format_string'] = '%.2f' assert '76.16' in rule.get_match_str(rule.matches[0]) elastalert-0.2.4/tests/util_test.py000066400000000000000000000170231364615736500174300ustar00rootroot00000000000000# -*- coding: utf-8 -*- from datetime import datetime from datetime import timedelta import mock import pytest from dateutil.parser import parse as dt from elastalert.util import add_raw_postfix from elastalert.util import format_index from elastalert.util import lookup_es_key from elastalert.util import parse_deadline from elastalert.util import parse_duration from elastalert.util import replace_dots_in_field_names from elastalert.util import resolve_string from elastalert.util import set_es_key from elastalert.util import should_scrolling_continue @pytest.mark.parametrize('spec, expected_delta', [ ('hours=2', timedelta(hours=2)), ('minutes=30', timedelta(minutes=30)), ('seconds=45', timedelta(seconds=45)), ]) def test_parse_duration(spec, expected_delta): """``unit=num`` specs can be translated into ``timedelta`` instances.""" assert parse_duration(spec) == expected_delta @pytest.mark.parametrize('spec, expected_deadline', [ ('hours=2', dt('2017-07-07T12:00:00.000Z')), ('minutes=30', dt('2017-07-07T10:30:00.000Z')), ('seconds=45', dt('2017-07-07T10:00:45.000Z')), ]) def test_parse_deadline(spec, expected_deadline): """``unit=num`` specs can be translated into ``datetime`` instances.""" # Note: Can't mock ``utcnow`` directly because ``datetime`` is a built-in. class MockDatetime(datetime): @staticmethod def utcnow(): return dt('2017-07-07T10:00:00.000Z') with mock.patch('datetime.datetime', MockDatetime): assert parse_deadline(spec) == expected_deadline def test_setting_keys(ea): expected = 12467267 record = { 'Message': '12345', 'Fields': { 'ts': 'fail', 'severity': 'large', 'user': 'jimmay' } } # Set the value assert set_es_key(record, 'Fields.ts', expected) # Get the value again assert lookup_es_key(record, 'Fields.ts') == expected def test_looking_up_missing_keys(ea): record = { 'Message': '12345', 'Fields': { 'severity': 'large', 'user': 'jimmay', 'null': None } } assert lookup_es_key(record, 'Fields.ts') is None assert lookup_es_key(record, 'Fields.null.foo') is None def test_looking_up_nested_keys(ea): expected = 12467267 record = { 'Message': '12345', 'Fields': { 'ts': expected, 'severity': 'large', 'user': 'jimmay' } } assert lookup_es_key(record, 'Fields.ts') == expected def test_looking_up_nested_composite_keys(ea): expected = 12467267 record = { 'Message': '12345', 'Fields': { 'ts.value': expected, 'severity': 'large', 'user': 'jimmay' } } assert lookup_es_key(record, 'Fields.ts.value') == expected def test_looking_up_arrays(ea): record = { 'flags': [1, 2, 3], 'objects': [ {'foo': 'bar'}, {'foo': [{'bar': 'baz'}]}, {'foo': {'bar': 'baz'}} ] } assert lookup_es_key(record, 'flags[0]') == 1 assert lookup_es_key(record, 'flags[1]') == 2 assert lookup_es_key(record, 'objects[0]foo') == 'bar' assert lookup_es_key(record, 'objects[1]foo[0]bar') == 'baz' assert lookup_es_key(record, 'objects[2]foo.bar') == 'baz' assert lookup_es_key(record, 'objects[1]foo[1]bar') is None assert lookup_es_key(record, 'objects[1]foo[0]baz') is None def test_add_raw_postfix(ea): expected = 'foo.raw' assert add_raw_postfix('foo', False) == expected assert add_raw_postfix('foo.raw', False) == expected expected = 'foo.keyword' assert add_raw_postfix('foo', True) == expected assert add_raw_postfix('foo.keyword', True) == expected def test_replace_dots_in_field_names(ea): actual = { 'a': { 'b.c': 'd', 'e': { 'f': { 'g.h': 0 } } }, 'i.j.k': 1, 'l': { 'm': 2 } } expected = { 'a': { 'b_c': 'd', 'e': { 'f': { 'g_h': 0 } } }, 'i_j_k': 1, 'l': { 'm': 2 } } assert replace_dots_in_field_names(actual) == expected assert replace_dots_in_field_names({'a': 0, 1: 2}) == {'a': 0, 1: 2} def test_resolve_string(ea): match = { 'name': 'mySystem', 'temperature': 45, 'humidity': 80.56, 'sensors': ['outsideSensor', 'insideSensor'], 'foo': {'bar': 'baz'} } expected_outputs = [ "mySystem is online ", "Sensors ['outsideSensor', 'insideSensor'] in the have temp 45 and 80.56 humidity", "Actuator in the has temp ", 'Something baz'] old_style_strings = [ "%(name)s is online %(noKey)s", "Sensors %(sensors)s in the %(noPlace)s have temp %(temperature)s and %(humidity)s humidity", "Actuator %(noKey)s in the %(noPlace)s has temp %(noKey)s", 'Something %(foo.bar)s'] assert resolve_string(old_style_strings[0], match) == expected_outputs[0] assert resolve_string(old_style_strings[1], match) == expected_outputs[1] assert resolve_string(old_style_strings[2], match) == expected_outputs[2] assert resolve_string(old_style_strings[3], match) == expected_outputs[3] new_style_strings = [ "{name} is online {noKey}", "Sensors {sensors} in the {noPlace} have temp {temperature} and {humidity} humidity", "Actuator {noKey} in the {noPlace} has temp {noKey}", "Something {foo[bar]}"] assert resolve_string(new_style_strings[0], match) == expected_outputs[0] assert resolve_string(new_style_strings[1], match) == expected_outputs[1] assert resolve_string(new_style_strings[2], match) == expected_outputs[2] assert resolve_string(new_style_strings[3], match) == expected_outputs[3] def test_format_index(): pattern = 'logstash-%Y.%m.%d' pattern2 = 'logstash-%Y.%W' date = dt('2018-06-25T12:00:00Z') date2 = dt('2018-06-26T12:00:00Z') assert sorted(format_index(pattern, date, date).split(',')) == ['logstash-2018.06.25'] assert sorted(format_index(pattern, date, date2).split(',')) == ['logstash-2018.06.25', 'logstash-2018.06.26'] assert sorted(format_index(pattern, date, date2, True).split(',')) == ['logstash-2018.06.24', 'logstash-2018.06.25', 'logstash-2018.06.26'] assert sorted(format_index(pattern2, date, date2, True).split(',')) == ['logstash-2018.25', 'logstash-2018.26'] def test_should_scrolling_continue(): rule_no_max_scrolling = {'max_scrolling_count': 0, 'scrolling_cycle': 1} rule_reached_max_scrolling = {'max_scrolling_count': 2, 'scrolling_cycle': 2} rule_before_first_run = {'max_scrolling_count': 0, 'scrolling_cycle': 0} rule_before_max_scrolling = {'max_scrolling_count': 2, 'scrolling_cycle': 1} rule_over_max_scrolling = {'max_scrolling_count': 2, 'scrolling_cycle': 3} assert should_scrolling_continue(rule_no_max_scrolling) is True assert should_scrolling_continue(rule_reached_max_scrolling) is False assert should_scrolling_continue(rule_before_first_run) is True assert should_scrolling_continue(rule_before_max_scrolling) is True assert should_scrolling_continue(rule_over_max_scrolling) is False elastalert-0.2.4/tox.ini000066400000000000000000000011411364615736500152050ustar00rootroot00000000000000[tox] project = elastalert envlist = py36,docs [testenv] deps = -rrequirements-dev.txt commands = coverage run --source=elastalert/,tests/ -m pytest --strict {posargs} coverage report -m flake8 . [testenv:lint] deps = {[testenv]deps} pylint commands = pylint --rcfile=.pylintrc elastalert pylint --rcfile=.pylintrc tests [testenv:devenv] envdir = virtualenv_run commands = [pytest] norecursedirs = .* virtualenv_run docs build venv env [testenv:docs] deps = {[testenv]deps} sphinx==1.6.6 changedir = docs commands = sphinx-build -b html -d build/doctrees -W source build/html